1 // license:BSD-3-Clause
2 // copyright-holders:Samuele Zannoli
3 #include "emu.h"
4 #include "video/poly.h"
5 #include "bitmap.h"
6 #include "includes/xbox_nv2a.h"
7 #include <bitset>
8 #include <cfloat>
9
10 //#define LOG_NV2A
11 #define DEBUG_CHECKS // enable for debugging
12
13 char const *const vertex_program_disassembler::srctypes[] = { "??", "Rn", "Vn", "Cn" };
14 char const *const vertex_program_disassembler::scaops[] = { "NOP", "IMV", "RCP", "RCC", "RSQ", "EXP", "LOG", "LIT", "???", "???", "???", "???", "???", "???", "???", "???", "???" };
15 int const vertex_program_disassembler::scapar2[] = { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
16 char const *const vertex_program_disassembler::vecops[] = { "NOP", "MOV", "MUL", "ADD", "MAD", "DP3", "DPH", "DP4", "DST", "MIN", "MAX", "SLT", "SGE", "ARL", "???", "???", "???" };
17 int const vertex_program_disassembler::vecpar2[] = { 0, 4, 6, 5, 7, 6, 6, 6, 6, 6, 6, 6, 6, 4, 0, 0, 0 };
18 char const *const vertex_program_disassembler::vecouts[] = { "oPos", "???", "???", "oD0", "oD1", "oFog", "oPts", "oB0", "oB1", "oT0", "oT1", "oT2", "oT3" };
19 char const vertex_program_disassembler::compchar[] = { 'x', 'y', 'z', 'w' };
20
21 /*
22 Each vertex program instruction is a 128 bit word made of the fields:
23 d f
24 w b i
25 o i e
26 r t l
27 d s d
28 +-+-----+-------
29 |0|31-0 |not used
30 +-+-----+-------
31 | |31-29|not used
32 | +-----+-------
33 | |28-25|scalar operation
34 | +-----+-------
35 | |24-21|vectorial operation
36 | +-----+-------
37 | |20-13|index for source constant C[]
38 | +-----+-------
39 | |12-9 |input vector index
40 | +-----+-------
41 |1| 8 |parameter A:sign
42 | +-----+-------
43 | | 7-6 |parameter A:swizzle x
44 | +-----+-------
45 | | 5-4 |parameter A:swizzle y
46 | +-----+-------
47 | | 3-2 |parameter A:swizzle z
48 | +-----+-------
49 | | 1-0 |parameter A:swizzle w
50 |-+-----+-------
51 | |31-28|parameter A:parameter Rn index
52 | +-----+-------
53 | |27-26|parameter A:input type 1:Rn 2:Vn 3:C[n]
54 | +-----+-------
55 | | 25 |parameter B:sign
56 | +-----+-------
57 | |24-23|parameter B:swizzle x
58 | +-----+-------
59 | |22-21|parameter B:swizzle y
60 | +-----+-------
61 | |20-19|parameter B:swizzle z
62 | +-----+-------
63 |2|18-17|parameter B:swizzle w
64 | +-----+-------
65 | |16-13|parameter B:parameter Rn index
66 | +-----+-------
67 | |12-11|parameter B:input type 1:Rn 2:Vn 3:C[n]
68 | +-----+-------
69 | | 10 |parameter C:sign
70 | +-----+-------
71 | | 9-8 |parameter C:swizzle x
72 | +-----+-------
73 | | 7-6 |parameter C:swizzle y
74 | +-----+-------
75 | | 5-4 |parameter C:swizzle z
76 | +-----+-------
77 | | 3-2 |parameter C:swizzle w
78 | +-----+-------
79 | | 1-0 |
80 |-+ |parameter C:parameter Rn index
81 | |31-30|
82 | +-----+-------
83 | |29-28|parameter C:input type 1:Rn 2:Vn 3:C[n]
84 | +-----+-------
85 | |27-24|output Rn mask from vectorial operation
86 | +-----+-------
87 | |23-20|output Rn index from vectorial operation
88 | +-----+-------
89 | |19-16|output Rn mask from scalar operation
90 | +-----+-------
91 |3|15-12|output vector write mask
92 | +-----+-------
93 | | 11 |1:output is output vector 0:output is constant C[]
94 | +-----+-------
95 | |10-3 |output vector/constant index
96 | +-----+-------
97 | | 2 |0:output Rn from vectorial operation 1:output Rn from scalar operation
98 | +-----+-------
99 | | 1 |1:add a0x to index for source constant C[]
100 | +-----+-------
101 | | 0 |1:end of program
102 +-+-----+-------
103 Each vertex program instruction can generate up to three destination values using up to three source values.
104 The first possible destination is to Rn from a vectorial operation.
105 The second possible destination is to a vertex shader output or C[n] from a vectorial or scalar operation.
106 The third possible destination is to Rn from a scalar operation.
107 */
decodefields(unsigned int * dwords,int offset,fields & decoded)108 void vertex_program_disassembler::decodefields(unsigned int *dwords, int offset, fields &decoded)
109 {
110 unsigned int srcbits[3];
111 int a;
112
113 srcbits[0] = ((dwords[1 + offset] & 0x1ff) << 6) | (dwords[2 + offset] >> 26);
114 srcbits[1] = (dwords[2 + offset] >> 11) & 0x7fff;
115 srcbits[2] = ((dwords[2 + offset] & 0x7ff) << 4) | (dwords[3 + offset] >> 28);
116 decoded.ScaOperation = (int)(dwords[1 + offset] >> 25) & 0xf;
117 decoded.VecOperation = (int)(dwords[1 + offset] >> 21) & 0xf;
118 decoded.SourceConstantIndex = (int)(dwords[1 + offset] >> 13) & 0xff;
119 decoded.InputIndex = (int)(dwords[1 + offset] >> 9) & 0xf;
120 for (a = 0; a < 3; a++)
121 {
122 decoded.src[a].Sign = (int)(srcbits[a] >> 14) & 1;
123 decoded.src[a].SwizzleX = (int)(srcbits[a] >> 12) & 3;
124 decoded.src[a].SwizzleY = (int)(srcbits[a] >> 10) & 3;
125 decoded.src[a].SwizzleZ = (int)(srcbits[a] >> 8) & 3;
126 decoded.src[a].SwizzleW = (int)(srcbits[a] >> 6) & 3;
127 decoded.src[a].TempIndex = (int)(srcbits[a] >> 2) & 0xf;
128 decoded.src[a].ParameterType = (int)(srcbits[a] >> 0) & 3;
129 }
130
131 decoded.VecTempWriteMask = (int)(dwords[3 + offset] >> 24) & 0xf;
132 decoded.VecTempIndex = (int)(dwords[3 + offset] >> 20) & 0xf;
133 decoded.ScaTempWriteMask = (int)(dwords[3 + offset] >> 16) & 0xf;
134 decoded.OutputWriteMask = (int)(dwords[3 + offset] >> 12) & 0xf;
135 decoded.OutputSelect = (int)(dwords[3 + offset] >> 11) & 0x1;
136 decoded.OutputIndex = (int)(dwords[3 + offset] >> 3) & 0xff;
137 decoded.MultiplexerControl = (int)(dwords[3 + offset] >> 2) & 0x1;
138 decoded.Usea0x = (int)(dwords[3 + offset] >> 1) & 0x1;
139 decoded.EndOfProgram = (int)(dwords[3 + offset] >> 0) & 0x1;
140 }
141
disassemble_mask(int mask,char * s)142 int vertex_program_disassembler::disassemble_mask(int mask, char *s)
143 {
144 int l;
145
146 *s = 0;
147 if (mask == 15)
148 return 0;
149 s[0] = '.';
150 l = 1;
151 if ((mask & 8) != 0)
152 {
153 s[l] = 'x';
154 l++;
155 }
156 if ((mask & 4) != 0)
157 {
158 s[l] = 'y';
159 l++;
160 }
161 if ((mask & 2) != 0)
162 {
163 s[l] = 'z';
164 l++;
165 }
166 if ((mask & 1) != 0)
167 {
168 s[l] = 'w';
169 l++;
170 }
171 s[l] = 0;
172 return l;
173 }
174
disassemble_swizzle(sourcefields f,char * s)175 int vertex_program_disassembler::disassemble_swizzle(sourcefields f, char *s)
176 {
177 int t, l;
178
179 t = 4;
180 if (f.SwizzleW == 3)
181 {
182 t = t - 1;
183 if (f.SwizzleZ == 2)
184 {
185 t = t - 1;
186 if (f.SwizzleY == 1)
187 {
188 t = t - 1;
189 if (f.SwizzleX == 0)
190 {
191 t = t - 1;
192 }
193 }
194 }
195 }
196 *s = 0;
197 if (t == 0)
198 return 0;
199 s[0] = '.';
200 l = 1;
201 if (t > 0)
202 {
203 s[l] = compchar[f.SwizzleX];
204 l++;
205 }
206 if (t > 1)
207 {
208 s[l] = compchar[f.SwizzleY];
209 l++;
210 }
211 if (t > 2)
212 {
213 s[l] = compchar[f.SwizzleZ];
214 l++;
215 }
216 if (t > 3)
217 {
218 s[l] = compchar[f.SwizzleW];
219 l++;
220 }
221 s[l] = 0;
222 return l;
223 }
224
disassemble_source(sourcefields f,fields fi,char * s)225 int vertex_program_disassembler::disassemble_source(sourcefields f, fields fi, char *s)
226 {
227 int l;
228
229 if (f.ParameterType == 0) {
230 strcpy(s, ",???");
231 return 4;
232 }
233 l = 0;
234 if (f.Sign != 0) {
235 s[l] = '-';
236 l++;
237 }
238 if (f.ParameterType == 1) {
239 s[l] = 'r';
240 l = l + 1 + sprintf(s + l + 1, "%d", f.TempIndex);
241 }
242 else if (f.ParameterType == 2){
243 s[l] = 'v';
244 l = l + 1 + sprintf(s + l + 1, "%d", fi.InputIndex);
245 }
246 else
247 {
248 if (fi.Usea0x != 0)
249 {
250 if (fi.SourceConstantIndex >= 96) {
251 strcpy(s + l, "c[");
252 l = l + 2;
253 l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
254 strcpy(s + l, "+a0.x]");
255 l = l + 6;
256 }
257 else {
258 strcpy(s + l, "c[a0.x");
259 l = l + 6;
260 l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
261 s[l] = ']';
262 l++;
263 }
264 }
265 else {
266 strcpy(s + l, "c[");
267 l = l + 2;
268 l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
269 s[l] = ']';
270 l++;
271 }
272 }
273 l = l + disassemble_swizzle(f, s + l);
274 s[l] = 0;
275 return l;
276 }
277
disassemble_output(fields f,char * s)278 int vertex_program_disassembler::disassemble_output(fields f, char *s)
279 {
280 int l;
281
282 if (f.OutputSelect == 1) {
283 strcpy(s, vecouts[f.OutputIndex]);
284 return strlen(s);
285 }
286 else {
287 strcpy(s, "c[");
288 l = 2;
289 l = l + sprintf(s + l, "%d", f.OutputIndex - 96);
290 s[l] = ']';
291 l++;
292 }
293 s[l] = 0;
294 return l;
295 }
296
output_types(fields f,int * o)297 int vertex_program_disassembler::output_types(fields f, int *o)
298 {
299 o[0] = o[1] = o[2] = o[3] = o[4] = o[5] = 0;
300 if ((f.VecOperation > 0) && (f.VecTempWriteMask != 0))
301 o[0] = 1;
302 if ((f.VecOperation > 0) && (f.OutputWriteMask != 0) && (f.MultiplexerControl == 0))
303 o[1] = 1;
304 if ((f.ScaOperation > 0) && (f.OutputWriteMask != 0) && (f.MultiplexerControl == 1))
305 o[2] = 1;
306 if ((f.ScaOperation > 0) && (f.ScaTempWriteMask != 0))
307 o[3] = 1;
308 if (f.VecOperation == 13)
309 o[4] = 1;
310 if (f.EndOfProgram == 1)
311 o[5] = 1;
312 return o[0] + o[1] + o[2] + o[3] + o[4] + o[5];
313 }
314
disassemble(unsigned int * instruction,char * line)315 int vertex_program_disassembler::disassemble(unsigned int *instruction, char *line)
316 {
317 int b, p;
318 char *c;
319
320 if (state == 0) {
321 decodefields(instruction, 0, f);
322 output_types(f, o);
323 state = 1;
324 }
325 if (o[0] != 0)
326 {
327 o[0] = 0;
328 c = line;
329 strcpy(c, vecops[f.VecOperation]);
330 c = c + strlen(c);
331 strcpy(c, " r");
332 c = c + 2;
333 c = c + sprintf(c, "%d", f.VecTempIndex);
334 c = c + disassemble_mask(f.VecTempWriteMask, c);
335 b = 0;
336 for (p = 4; p != 0; p = p >> 1)
337 {
338 if ((vecpar2[f.VecOperation] & p) != 0) {
339 c[0] = ',';
340 c++;
341 c = c + disassemble_source(f.src[b], f, c);
342 }
343 b++;
344 }
345 *c = 0;
346 return 1;
347 }
348 if (o[1] != 0)
349 {
350 o[1] = 0;
351 c = line;
352 strcpy(c, vecops[f.VecOperation]);
353 c = c + strlen(c);
354 *c = ' ';
355 c++;
356 c = c + disassemble_output(f, c);
357 c = c + disassemble_mask(f.OutputWriteMask, c);
358 b = 0;
359 for (p = 4; p != 0; p = p >> 1)
360 {
361 if ((vecpar2[f.VecOperation] & p) != 0) {
362 *c = ',';
363 c++;
364 c = c + disassemble_source(f.src[b], f, c);
365 }
366 b++;
367 }
368 *c = 0;
369 return 1;
370 }
371 if (o[2] != 0)
372 {
373 o[2] = 0;
374 c = line;
375 strcpy(c, scaops[f.ScaOperation]);
376 c = c + strlen(c);
377 *c = ' ';
378 c++;
379 c = c + disassemble_output(f, c);
380 c = c + disassemble_mask(f.OutputWriteMask, c);
381 b = 0;
382 for (p = 4; p != 0; p = p >> 1)
383 {
384 if ((scapar2[f.ScaOperation] & p) != 0) {
385 *c = ',';
386 c++;
387 c = c + disassemble_source(f.src[b], f, c);
388 }
389 b++;
390 }
391 *c = 0;
392 return 1;
393 }
394 if (o[3] != 0)
395 {
396 if (f.VecOperation > 0)
397 b = 1;
398 else
399 b = f.VecTempIndex;
400 o[3] = 0;
401 c = line;
402 strcpy(c, scaops[f.ScaOperation]);
403 c = c + strlen(c);
404 strcpy(c, " r");
405 c = c + 2;
406 c = c + sprintf(c, "%d", b);
407 c = c + disassemble_mask(f.ScaTempWriteMask, c);
408 b = 0;
409 for (p = 4; p != 0; p = p >> 1)
410 {
411 if ((scapar2[f.ScaOperation] & p) != 0) {
412 *c = ',';
413 c++;
414 c = c + disassemble_source(f.src[b], f, c);
415 }
416 b++;
417 }
418 *c = 0;
419 return 1;
420 }
421 if (o[4] != 0)
422 {
423 o[4] = 0;
424 c = line;
425 c = c + sprintf(c, "MOV a0.x,");
426 c = c + disassemble_source(f.src[0], f, c);
427 *c = 0;
428 return 1;
429 }
430 if (o[5] != 0)
431 {
432 o[5] = 0;
433 strcpy(line, "END");
434 return 1;
435 }
436 state = 0;
437 return 0;
438 }
439
vertex_program_simulator()440 vertex_program_simulator::vertex_program_simulator()
441 {
442 for (auto & elem : op)
443 elem.modified = 0;
444 initialize_constants();
445 }
446
set_data(vertex_nv * in,vertex_nv * out)447 void vertex_program_simulator::set_data(vertex_nv *in, vertex_nv *out)
448 {
449 input = in;
450 output = out;
451 }
452
reset()453 void vertex_program_simulator::reset()
454 {
455 ip = 0;
456 a0x = 0;
457 initialize_outputs();
458 initialize_temps();
459 }
460
decode_instruction(int address)461 void vertex_program_simulator::decode_instruction(int address)
462 {
463 instruction *i;
464
465 i = &op[address];
466 i->d.NegateA = i->i[1] & (1 << 8);
467 i->d.ParameterTypeA = (i->i[2] >> 26) & 3;
468 i->d.TempIndexA = (i->i[2] >> 28) & 15;
469 i->d.SwizzleA[0] = (i->i[1] >> 6) & 3;
470 i->d.SwizzleA[1] = (i->i[1] >> 4) & 3;
471 i->d.SwizzleA[2] = (i->i[1] >> 2) & 3;
472 i->d.SwizzleA[3] = (i->i[1] >> 0) & 3;
473 i->d.NegateB = i->i[2] & (1 << 25);
474 i->d.ParameterTypeB = (i->i[2] >> 11) & 3;
475 i->d.TempIndexB = (i->i[2] >> 13) & 15;
476 i->d.SwizzleB[0] = (i->i[2] >> 23) & 3;
477 i->d.SwizzleB[1] = (i->i[2] >> 21) & 3;
478 i->d.SwizzleB[2] = (i->i[2] >> 19) & 3;
479 i->d.SwizzleB[3] = (i->i[2] >> 17) & 3;
480 i->d.NegateC = i->i[2] & (1 << 10);
481 i->d.ParameterTypeC = (i->i[3] >> 28) & 3;
482 i->d.TempIndexC = ((i->i[2] & 3) << 2) + (i->i[3] >> 30);
483 i->d.SwizzleC[0] = (i->i[2] >> 8) & 3;
484 i->d.SwizzleC[1] = (i->i[2] >> 6) & 3;
485 i->d.SwizzleC[2] = (i->i[2] >> 4) & 3;
486 i->d.SwizzleC[3] = (i->i[2] >> 2) & 3;
487 i->d.VecOperation = (VectorialOperation)((i->i[1] >> 21) & 15);
488 i->d.ScaOperation = (ScalarOperation)((i->i[1] >> 25) & 15);
489 i->d.OutputWriteMask = ((i->i[3] >> 12) & 15);
490 i->d.MultiplexerControl = i->i[3] & 4; // 0 : output Rn from vectorial operation 4 : output Rn from scalar operation
491 i->d.VecTempIndex = (i->i[3] >> 20) & 15;
492 i->d.OutputIndex = (i->i[3] >> 3) & 255;
493 i->d.OutputSelect = i->i[3] & 0x800;
494 i->d.VecTempWriteMask = (i->i[3] >> 24) & 15;
495 i->d.ScaTempWriteMask = (i->i[3] >> 16) & 15;
496 i->d.InputIndex = (i->i[1] >> 9) & 15;
497 i->d.SourceConstantIndex = (i->i[1] >> 13) & 255;
498 i->d.Usea0x = i->i[3] & 2;
499 i->d.EndOfProgram = i->i[3] & 1;
500 }
501
step()502 int vertex_program_simulator::step()
503 {
504 int p1, p2;
505 float tmp[3 * 4] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
506 float tmpv[4] = { 0, 0, 0, 0};
507 float tmps[4] = { 0, 0, 0, 0};
508 instruction::decoded *d;
509
510 #if 0 // useful while debugging to see what instrucion is being executed
511 static int debugvpi = 0;
512 char disbuffer[256];
513 if (debugvpi) {
514 char *pp;
515 vertex_program_disassembler vdis;
516
517 pp = disbuffer;
518 while (vdis.disassemble(op[ip].i, pp) != 0) {
519 pp = pp + strlen(pp);
520 *pp = '\n';
521 pp++;
522 *pp = 0;
523 printf("%s", disbuffer);
524 }
525 }
526 #endif
527
528 if (op[ip].modified)
529 decode_instruction(ip);
530 d = &(op[ip].d);
531 // prepare inputs
532 // input A
533 generate_input(&tmp[0], d->NegateA, d->ParameterTypeA, d->TempIndexA, d->SwizzleA);
534 // input B
535 generate_input(&tmp[4], d->NegateB, d->ParameterTypeB, d->TempIndexB, d->SwizzleB);
536 // input C
537 generate_input(&tmp[8], d->NegateC, d->ParameterTypeC, d->TempIndexC, d->SwizzleC);
538 // compute 2 instructions
539 // vectorial
540 compute_vectorial_operation(tmpv, d->VecOperation, tmp);
541 // scalar
542 compute_scalar_operation(tmps, d->ScaOperation, tmp);
543 // assign destinations
544 if (d->VecOperation != VecNOP) {
545 if (d->VecOperation == VecARL)
546 //o[4] = 1;
547 a0x = (int)tmpv[0];
548 else {
549 if (d->VecTempWriteMask != 0) { // assign to Rn
550 //o[0] = 1;
551 assign_register(d->VecTempIndex, tmpv, d->VecTempWriteMask);
552 }
553 if ((d->OutputWriteMask != 0) && (d->MultiplexerControl == 0)) {
554 //o[1] = 1;
555 if (d->OutputSelect) { // assign to output
556 assign_output(d->OutputIndex, tmpv, d->OutputWriteMask);
557 // remeber, output position == r12
558 if (d->OutputIndex == 0)
559 for (p1 = 0; p1 < 4; p1++) {
560 r_register[12].fv[p1] = output->attribute[d->OutputIndex].fv[p1];
561 }
562 }
563 else { // assign to constant
564 assign_constant(d->OutputIndex, tmpv, d->OutputWriteMask);
565 }
566 }
567 }
568 }
569 if (d->ScaOperation != ScaNOP) {
570 if (d->ScaTempWriteMask != 0) { // assign to Rn
571 //o[3] = 1;
572 if (d->VecOperation != VecNOP)
573 p2 = 1;
574 else
575 p2 = d->VecTempIndex;
576 assign_register(p2, tmps, d->ScaTempWriteMask);
577 }
578 if ((d->OutputWriteMask != 0) && (d->MultiplexerControl != 0)) { // assign to output
579 //o[2] = 1;
580 assign_output(d->OutputIndex, tmps, d->OutputWriteMask);
581 // remeber, output position == r12
582 if (d->OutputIndex == 0) {
583 for (p1 = 0; p1 < 4; p1++) {
584 r_register[12].fv[p1] = output->attribute[d->OutputIndex].fv[p1];
585 }
586 }
587 }
588 }
589 return d->EndOfProgram;
590 }
591
execute()592 void vertex_program_simulator::execute()
593 {
594 int c;
595
596 c = 0;
597 do {
598 c = step();
599 ip++;
600 } while (c == 0);
601 }
602
jump(int address)603 void vertex_program_simulator::jump(int address)
604 {
605 ip = address;
606 }
607
process(int address,vertex_nv * in,vertex_nv * out,int count)608 void vertex_program_simulator::process(int address, vertex_nv *in, vertex_nv *out, int count)
609 {
610 #if 0 // useful while debugging to see what is being executed
611 static int debugvps = 0;
612 if (debugvps) {
613 FILE *f;
614 char *pp;
615 vertex_program_disassembler vdis;
616 char disbuffer[128];
617
618 debugvps--;
619 if ((f = fopen("vertexshader_debug.txt", "wb")) != nullptr) {
620 jump(address);
621 fprintf(f, "SHADER:\n");
622 for (int t = 0; t < 128; t++) {
623 pp = disbuffer;
624 while (vdis.disassemble(op[ip + t].i, pp) != 0) {
625 pp = pp + strlen(pp);
626 *pp = '\n';
627 pp++;
628 *pp = 0;
629 }
630 fprintf(f, "%08X %08X %08X %s", op[ip + t].i[1], op[ip + t].i[2], op[ip + t].i[3], disbuffer);
631 if (op[ip + t].i[3] & 1)
632 break;
633 }
634 fprintf(f, "INPUTS:\n");
635 for (int t = 0; t < 16; t++)
636 fprintf(f, "v%d %f %f %f %f\n", t, in->attribute[t].fv[0], in->attribute[t].fv[1], in->attribute[t].fv[2], in->attribute[t].fv[3]);
637 fprintf(f, "CONSTANTS:\n");
638 for (int t = 0; t < 192; t++)
639 fprintf(f, "c[%d] %f %f %f %f\n", t - 96, c_constant[t].fv[0], c_constant[t].fv[1], c_constant[t].fv[2], c_constant[t].fv[3]);
640 fclose(f);
641 }
642 }
643 #endif
644 set_data(in, out);
645 while (count > 0) {
646 reset();
647 jump(address);
648 execute();
649 input++;
650 output++;
651 count--;
652 }
653 }
654
status()655 int vertex_program_simulator::status()
656 {
657 return ip;
658 }
659
initialize_outputs()660 void vertex_program_simulator::initialize_outputs()
661 {
662 for (int n = 0; n < 16; n++) {
663 output->attribute[n].fv[0] = output->attribute[n].fv[1] = output->attribute[n].fv[2] = 0;
664 output->attribute[n].fv[3] = 1;
665 }
666 }
667
initialize_temps()668 void vertex_program_simulator::initialize_temps()
669 {
670 for (auto & elem : r_register) {
671 for (int m = 0; m < 4; m++)
672 elem.fv[m] = 0;
673 }
674 }
675
initialize_constants()676 void vertex_program_simulator::initialize_constants()
677 {
678 for (auto & elem : c_constant) {
679 for (int m = 0; m < 4;m++)
680 elem.fv[m] = 0;
681 }
682 }
683
generate_input(float t[4],int sign,int type,int temp,int swizzle[4])684 void vertex_program_simulator::generate_input(float t[4], int sign, int type, int temp, int swizzle[4])
685 {
686 float sgn = 1;
687
688 if (sign)
689 sgn = -1;
690 if (type == 1) {
691 t[0] = sgn * r_register[temp].fv[swizzle[0]];
692 t[1] = sgn * r_register[temp].fv[swizzle[1]];
693 t[2] = sgn * r_register[temp].fv[swizzle[2]];
694 t[3] = sgn * r_register[temp].fv[swizzle[3]];
695 }
696 else if (type == 2) {
697 int InputIndex = op[ip].d.InputIndex;
698 t[0] = sgn * input->attribute[InputIndex].fv[swizzle[0]];
699 t[1] = sgn * input->attribute[InputIndex].fv[swizzle[1]];
700 t[2] = sgn * input->attribute[InputIndex].fv[swizzle[2]];
701 t[3] = sgn * input->attribute[InputIndex].fv[swizzle[3]];
702 }
703 else if (type == 3) {
704 int SourceConstantIndex = op[ip].d.SourceConstantIndex;
705 if (op[ip].d.Usea0x)
706 SourceConstantIndex = SourceConstantIndex + a0x;
707 t[0] = sgn * c_constant[SourceConstantIndex].fv[swizzle[0]];
708 t[1] = sgn * c_constant[SourceConstantIndex].fv[swizzle[1]];
709 t[2] = sgn * c_constant[SourceConstantIndex].fv[swizzle[2]];
710 t[3] = sgn * c_constant[SourceConstantIndex].fv[swizzle[3]];
711 }
712 }
713
compute_vectorial_operation(float t_out[4],int instruction,float par_in[3* 4])714 void vertex_program_simulator::compute_vectorial_operation(float t_out[4], int instruction, float par_in[3 * 4])
715 {
716 const int p1_A = 0;
717 const int p2_B = 4;
718 const int p3_C = 8;
719
720 // t_out <= instruction(par_in)
721 switch (instruction) {
722 case 0: // "NOP"
723 break;
724 case 1: // "MOV"
725 t_out[0] = par_in[p1_A + 0];
726 t_out[1] = par_in[p1_A + 1];
727 t_out[2] = par_in[p1_A + 2];
728 t_out[3] = par_in[p1_A + 3];
729 break;
730 case 2: // "MUL"
731 t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0];
732 t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1];
733 t_out[2] = par_in[p1_A + 2] * par_in[p2_B + 2];
734 t_out[3] = par_in[p1_A + 3] * par_in[p2_B + 3];
735 break;
736 case 3: // "ADD"
737 t_out[0] = par_in[p1_A + 0] + par_in[p3_C + 0];
738 t_out[1] = par_in[p1_A + 1] + par_in[p3_C + 1];
739 t_out[2] = par_in[p1_A + 2] + par_in[p3_C + 2];
740 t_out[3] = par_in[p1_A + 3] + par_in[p3_C + 3];
741 break;
742 case 4: // "MAD"
743 t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p3_C + 0];
744 t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p3_C + 1];
745 t_out[2] = par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p3_C + 2];
746 t_out[3] = par_in[p1_A + 3] * par_in[p2_B + 3] + par_in[p3_C + 3];
747 break;
748 case 5: // "DP3"
749 t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2];
750 t_out[1] = t_out[2] = t_out[3] = t_out[0];
751 break;
752 case 6: // "DPH"
753 t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p2_B + 3];
754 t_out[1] = t_out[2] = t_out[3] = t_out[0];
755 break;
756 case 7: // "DP4"
757 t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p1_A + 3] * par_in[p2_B + 3];
758 t_out[1] = t_out[2] = t_out[3] = t_out[0];
759 break;
760 case 8: // "DST"
761 t_out[0] = 1.0;
762 t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1];
763 t_out[2] = par_in[p1_A + 2];
764 t_out[3] = par_in[p2_B + 3];
765 break;
766 case 9: // "MIN"
767 t_out[0] = fmin(par_in[p1_A + 0], par_in[p2_B + 0]);
768 t_out[1] = fmin(par_in[p1_A + 1], par_in[p2_B + 1]);
769 t_out[2] = fmin(par_in[p1_A + 2], par_in[p2_B + 2]);
770 t_out[3] = fmin(par_in[p1_A + 3], par_in[p2_B + 3]);
771 break;
772 case 10: // "MAX"
773 t_out[0] = fmax(par_in[p1_A + 0], par_in[p2_B + 0]);
774 t_out[1] = fmax(par_in[p1_A + 1], par_in[p2_B + 1]);
775 t_out[2] = fmax(par_in[p1_A + 2], par_in[p2_B + 2]);
776 t_out[3] = fmax(par_in[p1_A + 3], par_in[p2_B + 3]);
777 break;
778 case 11: // "SLT"
779 t_out[0] = (par_in[p1_A + 0] < par_in[p2_B + 0]) ? 1.0 : 0;
780 t_out[1] = (par_in[p1_A + 1] < par_in[p2_B + 1]) ? 1.0 : 0;
781 t_out[2] = (par_in[p1_A + 2] < par_in[p2_B + 2]) ? 1.0 : 0;
782 t_out[3] = (par_in[p1_A + 3] < par_in[p2_B + 3]) ? 1.0 : 0;
783 break;
784 case 12: // "SGE"
785 t_out[0] = (par_in[p1_A + 0] >= par_in[p2_B + 0]) ? 1.0 : 0;
786 t_out[1] = (par_in[p1_A + 1] >= par_in[p2_B + 1]) ? 1.0 : 0;
787 t_out[2] = (par_in[p1_A + 2] >= par_in[p2_B + 2]) ? 1.0 : 0;
788 t_out[3] = (par_in[p1_A + 3] >= par_in[p2_B + 3]) ? 1.0 : 0;
789 break;
790 case 13: // "ARL"
791 t_out[0] = par_in[p1_A + 0];
792 }
793 }
794
compute_scalar_operation(float t_out[4],int instruction,float par_in[3* 4])795 void vertex_program_simulator::compute_scalar_operation(float t_out[4], int instruction, float par_in[3 * 4])
796 {
797 //const int p1_A = 0;
798 //const int p2_B = 4;
799 const int p3_C = 8;
800 union {
801 float f;
802 unsigned int i;
803 } t;
804 int e;
805
806 // t_out <= instruction(par_in)
807 switch (instruction) {
808 case 0: // "NOP"
809 break;
810 case 1: // "IMV"
811 t_out[0] = par_in[p3_C + 0];
812 t_out[1] = par_in[p3_C + 1];
813 t_out[2] = par_in[p3_C + 2];
814 t_out[3] = par_in[p3_C + 3];
815 break;
816 case 2: // "RCP"
817 if (par_in[p3_C + 0] == 0)
818 t.f = std::numeric_limits<float>::infinity();
819 else if (par_in[p3_C + 0] == 1.0f)
820 t.f = 1.0f;
821 else
822 t.f = 1.0f / par_in[p3_C + 0];
823 t_out[0] = t_out[1] = t_out[2] = t_out[3] = t.f;
824 break;
825 case 3: // "RCC"
826 t.f = par_in[p3_C + 0];
827 if ((t.f < 0) && (t.f > -5.42101e-20f))
828 t.f = -5.42101e-20f;
829 else if ((t.f >= 0) && (t.f < 5.42101e-20f))
830 t.f = 5.42101e-20f;
831 if (t.f != 1.0f)
832 t.f = 1.0f / t.f;
833 t_out[0] = t_out[1] = t_out[2] = t_out[3] = t.f;
834 break;
835 case 4: // "RSQ"
836 t_out[0] = t_out[1] = t_out[2] = t_out[3] = 1.0f / sqrtf(fabsf(par_in[p3_C + 0]));
837 break;
838 case 5: // "EXP"
839 t_out[0] = pow(2, floor(par_in[p3_C + 0]));
840 t_out[1] = par_in[p3_C + 0] - floorf(par_in[p3_C + 0]);
841 t.f = pow(2, par_in[p3_C + 0]);
842 t.i = t.i & 0xffffff00;
843 t_out[2] = t.f;
844 t_out[3] = 1.0;
845 break;
846 case 6: // "LOG"
847 t_out[1] = frexp(par_in[p3_C + 0], &e)*2.0; // frexp gives mantissa as 0.5....1
848 t_out[0] = e - 1;
849 #ifndef __OS2__
850 t.f = log2(fabsf(par_in[p3_C + 0]));
851 #else
852 static double log_2 = 0.0;
853 if (log_2 == 0.0)
854 log_2 = log(2);
855 t.f = log(abs(par_in[p3_C + 0])) / log_2;
856 #endif
857 t.i = t.i & 0xffffff00;
858 t_out[2] = t.f;
859 t_out[3] = 1.0;
860 break;
861 case 7: // "LIT"
862 t_out[0] = 1.0;
863 t_out[1] = fmax(0, fmin(par_in[p3_C + 0], 1.0f));
864 t_out[2] = par_in[p3_C + 0] > 0 ? pow(fmax(par_in[p3_C + 1], 0), par_in[p3_C + 3]) : 0;
865 t_out[3] = 1.0;
866 break;
867 }
868 }
869
assign_output(int index,float t[4],int mask)870 void vertex_program_simulator::assign_output(int index, float t[4], int mask)
871 {
872 for (int p1 = 0; p1 < 4; p1++) {
873 if (mask & 8)
874 output->attribute[index].fv[p1] = t[p1];
875 mask = mask << 1;
876 }
877 }
878
assign_register(int index,float t[4],int mask)879 void vertex_program_simulator::assign_register(int index, float t[4], int mask)
880 {
881 for (int p1 = 0; p1 < 4; p1++) {
882 if (mask & 8)
883 r_register[index].fv[p1] = t[p1];
884 mask = mask << 1;
885 }
886 }
887
assign_constant(int index,float t[4],int mask)888 void vertex_program_simulator::assign_constant(int index, float t[4], int mask)
889 {
890 for (int p1 = 0; p1 < 4; p1++) {
891 if (mask & 8)
892 c_constant[index].fv[p1] = t[p1];
893 mask = mask << 1;
894 }
895 }
896
897 /*
898 * Graphics
899 */
900
dilate0(uint32_t value,int bits)901 uint32_t nv2a_renderer::dilate0(uint32_t value, int bits) // dilate first "bits" bits in "value"
902 {
903 uint32_t x, m1, m2, m3;
904 int a;
905
906 x = value;
907 for (a = 0; a < bits; a++)
908 {
909 m2 = 1 << (a << 1);
910 m1 = m2 - 1;
911 m3 = (~m1) << 1;
912 x = (x & m1) + (x & m2) + ((x & m3) << 1);
913 }
914 return x;
915 }
916
dilate1(uint32_t value,int bits)917 uint32_t nv2a_renderer::dilate1(uint32_t value, int bits) // dilate first "bits" bits in "value"
918 {
919 uint32_t x, m1, m2, m3;
920 int a;
921
922 x = value;
923 for (a = 0; a < bits; a++)
924 {
925 m2 = 1 << (a << 1);
926 m1 = m2 - 1;
927 m3 = (~m1) << 1;
928 x = (x & m1) + ((x & m2) << 1) + ((x & m3) << 1);
929 }
930 return x;
931 }
932
computedilated(void)933 void nv2a_renderer::computedilated(void)
934 {
935 int a, b;
936
937 for (b = 0; b < 16; b++)
938 for (a = 0; a < 2048; a++) {
939 dilated0[b][a] = dilate0(a, b);
940 dilated1[b][a] = dilate1(a, b);
941 }
942 for (b = 0; b < 16; b++)
943 for (a = 0; a < 16; a++)
944 dilatechose[(b << 4) + a] = (a < b ? a : b);
945 }
946
direct_access_ptr(offs_t address)947 inline uint8_t *nv2a_renderer::direct_access_ptr(offs_t address)
948 {
949 #ifdef DEBUG_CHECKS
950 if (address >= 512*1024*1024)
951 machine().logerror("Bad address in direct_access_ptr !\n");
952 #endif
953 return basemempointer + address;
954 }
955
geforce_commandkind(uint32_t word)956 nv2a_renderer::COMMAND nv2a_renderer::geforce_commandkind(uint32_t word)
957 {
958 if ((word & 0x00000003) == 0x00000002)
959 return COMMAND::CALL;
960 if ((word & 0x00000003) == 0x00000001)
961 return COMMAND::JUMP;
962 if ((word & 0xE0030003) == 0x40000000)
963 return COMMAND::NON_INCREASING;
964 if ((word & 0xE0000003) == 0x20000000)
965 return COMMAND::OLD_JUMP;
966 if ((word & 0xFFFF0003) == 0x00030000)
967 return COMMAND::LONG_NON_INCREASING;
968 if ((word & 0xFFFFFFFF) == 0x00020000)
969 return COMMAND::RETURN;
970 if ((word & 0xFFFF0003) == 0x00010000)
971 return COMMAND::SLI_CONDITIONAL;
972 if ((word & 0xE0030003) == 0x00000000)
973 return COMMAND::INCREASING;
974 return COMMAND::INVALID;
975 }
976
geforce_object_offset(uint32_t handle)977 uint32_t nv2a_renderer::geforce_object_offset(uint32_t handle)
978 {
979 uint32_t h = ((((handle >> 11) ^ handle) >> 11) ^ handle) & 0x7ff;
980 uint32_t o = (pfifo[0x210 / 4] & 0x1ff) << 8; // 0x1ff is not certain
981 uint32_t e = o + h * 8; // at 0xfd000000+0x00700000
982 uint32_t w;
983
984 if (ramin[e / 4] != handle) {
985 // this should never happen
986 for (uint32_t aa = o / 4; aa < (sizeof(ramin) / 4); aa = aa + 2) {
987 if (ramin[aa] == handle) {
988 e = aa * 4;
989 }
990 }
991 }
992 w = ramin[e / 4 + 1];
993 return (w & 0xffff) * 0x10; // 0xffff is not certain
994 }
995
geforce_read_dma_object(uint32_t handle,uint32_t & offset,uint32_t & size)996 void nv2a_renderer::geforce_read_dma_object(uint32_t handle, uint32_t &offset, uint32_t &size)
997 {
998 //uint32_t objclass,pt_present,pt_linear,access,target,rorw;
999 uint32_t dma_adjust, dma_frame;
1000 uint32_t o = geforce_object_offset(handle);
1001
1002 o = o / 4;
1003 //objclass=ramin[o] & 0xfff;
1004 //pt_present=(ramin[o] >> 12) & 1;
1005 //pt_linear=(ramin[o] >> 13) & 1;
1006 //access=(ramin[o] >> 14) & 3;
1007 //target=(ramin[o] >> 16) & 3;
1008 dma_adjust = (ramin[o] >> 20) & 0xfff;
1009 size = ramin[o + 1];
1010 //rorw=ramin[o+2] & 1;
1011 dma_frame = ramin[o + 2] & 0xfffff000;
1012 offset = dma_frame + dma_adjust;
1013 }
1014
1015 /*void debug(uint32_t *bmp, int width, int eight, float x1, float y1, float x2, float y2, uint32_t color)
1016 {
1017 int xx1,yy1,xx2,yy2;
1018
1019 xx1=x1;
1020 xx2=x2;
1021 yy1=y1;
1022 yy2=y2;
1023 if (xx1 == xx2) {
1024 if (yy1 > yy2) {
1025 int t=yy1;
1026 yy1=yy2;
1027 yy2=t;
1028 }
1029 for (int y=yy1;y <= yy2;y++) {
1030 *(bmp+y*width+xx1) = color;
1031 }
1032 } else if (yy1 == yy2) {
1033 if (xx1 > xx2) {
1034 int t=xx1;
1035 xx1=xx2;
1036 xx2=t;
1037 }
1038 for (int x=xx1;x <= xx2;x++)
1039 *(bmp+yy1*width+x) = color;
1040 }
1041 }*/
1042
convert_a4r4g4b4_a8r8g8b8(uint32_t a4r4g4b4)1043 inline uint32_t convert_a4r4g4b4_a8r8g8b8(uint32_t a4r4g4b4)
1044 {
1045 uint32_t a8r8g8b8;
1046 int ca, cr, cg, cb;
1047
1048 cb = pal4bit(a4r4g4b4 & 0x000f);
1049 cg = pal4bit((a4r4g4b4 & 0x00f0) >> 4);
1050 cr = pal4bit((a4r4g4b4 & 0x0f00) >> 8);
1051 ca = pal4bit((a4r4g4b4 & 0xf000) >> 12);
1052 a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1053 return a8r8g8b8;
1054 }
1055
convert_a1r5g5b5_a8r8g8b8(uint32_t a1r5g5b5)1056 inline uint32_t convert_a1r5g5b5_a8r8g8b8(uint32_t a1r5g5b5)
1057 {
1058 uint32_t a8r8g8b8;
1059 int ca, cr, cg, cb;
1060
1061 cb = pal5bit(a1r5g5b5 & 0x001f);
1062 cg = pal5bit((a1r5g5b5 & 0x03e0) >> 5);
1063 cr = pal5bit((a1r5g5b5 & 0x7c00) >> 10);
1064 ca = a1r5g5b5 & 0x8000 ? 0xff : 0;
1065 a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1066 return a8r8g8b8;
1067 }
1068
convert_r5g6b5_r8g8b8(uint32_t r5g6b5)1069 inline uint32_t convert_r5g6b5_r8g8b8(uint32_t r5g6b5)
1070 {
1071 uint32_t r8g8b8;
1072 int cr, cg, cb;
1073
1074 cb = pal5bit(r5g6b5 & 0x001f);
1075 cg = pal6bit((r5g6b5 & 0x07e0) >> 5);
1076 cr = pal5bit((r5g6b5 & 0xf800) >> 11);
1077 r8g8b8 = (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1078 return r8g8b8;
1079 }
1080
texture_get_texel(int number,int x,int y)1081 uint32_t nv2a_renderer::texture_get_texel(int number, int x, int y)
1082 {
1083 uint32_t to, s, c, sa, ca;
1084 uint32_t a4r4g4b4, a1r5g5b5, r5g6b5;
1085 int bx, by;
1086 int color0, color1, color0m2, color1m2, alpha0, alpha1;
1087 uint32_t codes;
1088 uint64_t alphas;
1089 int cr, cg, cb;
1090 int sizeu, sizev;
1091
1092 if (texture[number].rectangle == false) {
1093 sizeu = texture[number].sizes;
1094 sizev = texture[number].sizet;
1095 }
1096 else {
1097 sizeu = texture[number].rectwidth;
1098 sizev = texture[number].rectheight;
1099 }
1100 switch (texture[number].addrmodes) {
1101 default:
1102 case 1: // wrap
1103 x = x % sizeu;
1104 if (x < 0)
1105 x = sizeu + x;
1106 break;
1107 case 3: // clamp
1108 if (x < 0)
1109 x = 0;
1110 if (x >= sizeu)
1111 x = sizeu - 1;
1112 break;
1113 }
1114 switch (texture[number].addrmodet) {
1115 default:
1116 case 1: // wrap
1117 y = y % sizev;
1118 if (y < 0)
1119 y = sizev + y;
1120 break;
1121 case 3: // clamp
1122 if (y < 0)
1123 y = 0;
1124 if (y >= sizev)
1125 y = sizev - 1;
1126 break;
1127 }
1128 switch (texture[number].format) {
1129 case NV2A_TEX_FORMAT::A8R8G8B8:
1130 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1131 return *(((uint32_t *)texture[number].buffer) + to); // get texel color
1132 case NV2A_TEX_FORMAT::X8R8G8B8:
1133 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1134 return 0xff000000 | (*(((uint32_t*)texture[number].buffer) + to) & 0xffffff); // get texel color
1135 case NV2A_TEX_FORMAT::DXT1:
1136 bx = x >> 2;
1137 by = y >> 2;
1138 x = x & 3;
1139 y = y & 3;
1140 to = bx + by*(sizeu >> 2);
1141 color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 0);
1142 color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1143 codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1144 s = (y << 3) + (x << 1);
1145 c = (codes >> s) & 3;
1146 c = c + (color0 > color1 ? 0 : 4);
1147 color0m2 = color0 << 1;
1148 color1m2 = color1 << 1;
1149 switch (c) {
1150 case 0:
1151 return 0xff000000 + convert_r5g6b5_r8g8b8(color0);
1152 case 1:
1153 return 0xff000000 + convert_r5g6b5_r8g8b8(color1);
1154 case 2:
1155 cb = pal5bit(((color0m2 & 0x003e) + (color1 & 0x001f)) / 3);
1156 cg = pal6bit(((color0m2 & 0x0fc0) + (color1 & 0x07e0)) / 3 >> 5);
1157 cr = pal5bit(((color0m2 & 0x1f000) + color1) / 3 >> 11);
1158 return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1159 case 3:
1160 cb = pal5bit(((color1m2 & 0x003e) + (color0 & 0x001f)) / 3);
1161 cg = pal6bit(((color1m2 & 0x0fc0) + (color0 & 0x07e0)) / 3 >> 5);
1162 cr = pal5bit(((color1m2 & 0x1f000) + color0) / 3 >> 11);
1163 return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1164 case 4:
1165 return 0xff000000 + convert_r5g6b5_r8g8b8(color0);
1166 case 5:
1167 return 0xff000000 + convert_r5g6b5_r8g8b8(color1);
1168 case 6:
1169 cb = pal5bit(((color0 & 0x001f) + (color1 & 0x001f)) / 2);
1170 cg = pal6bit(((color0 & 0x07e0) + (color1 & 0x07e0)) / 2 >> 5);
1171 cr = pal5bit(((color0 & 0xf800) + (color1 & 0xf800)) / 2 >> 11);
1172 return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1173 default:
1174 return 0xff000000;
1175 }
1176 case NV2A_TEX_FORMAT::DXT3:
1177 bx = x >> 2;
1178 by = y >> 2;
1179 x = x & 3;
1180 y = y & 3;
1181 to = (bx + by*(sizeu >> 2)) << 1;
1182 color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 4);
1183 color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 5);
1184 codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 3);
1185 alphas = *(((uint64_t *)texture[number].buffer) + to);
1186 s = (y << 3) + (x << 1);
1187 sa = ((y << 2) + x) << 2;
1188 c = (codes >> s) & 3;
1189 ca = (alphas >> sa) & 15;
1190 switch (c) {
1191 case 0:
1192 return ((ca + (ca << 4)) << 24) + convert_r5g6b5_r8g8b8(color0);
1193 case 1:
1194 return ((ca + (ca << 4)) << 24) + convert_r5g6b5_r8g8b8(color1);
1195 case 2:
1196 cb = pal5bit((2 * (color0 & 0x001f) + (color1 & 0x001f)) / 3);
1197 cg = pal6bit((2 * (color0 & 0x07e0) + (color1 & 0x07e0)) / 3 >> 5);
1198 cr = pal5bit((2 * (color0 & 0xf800) + (color1 & 0xf800)) / 3 >> 11);
1199 return ((ca + (ca << 4)) << 24) | (cr << 16) | (cg << 8) | (cb);
1200 default:
1201 cb = pal5bit(((color0 & 0x001f) + 2 * (color1 & 0x001f)) / 3);
1202 cg = pal6bit(((color0 & 0x07e0) + 2 * (color1 & 0x07e0)) / 3 >> 5);
1203 cr = pal5bit(((color0 & 0xf800) + 2 * (color1 & 0xf800)) / 3 >> 11);
1204 return ((ca + (ca << 4)) << 24) | (cr << 16) | (cg << 8) | (cb);
1205 }
1206 case NV2A_TEX_FORMAT::A4R4G4B4:
1207 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1208 a4r4g4b4 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1209 return convert_a4r4g4b4_a8r8g8b8(a4r4g4b4);
1210 case NV2A_TEX_FORMAT::A8:
1211 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1212 c = *(((uint8_t*)texture[number].buffer) + to); // get texel color
1213 return c << 24;
1214 case NV2A_TEX_FORMAT::A1R5G5B5:
1215 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1216 a1r5g5b5 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1217 return convert_a1r5g5b5_a8r8g8b8(a1r5g5b5);
1218 case NV2A_TEX_FORMAT::R5G6B5:
1219 to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1220 r5g6b5 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1221 return 0xff000000 + convert_r5g6b5_r8g8b8(r5g6b5);
1222 case NV2A_TEX_FORMAT::R8G8B8_RECT:
1223 to = texture[number].rectangle_pitch*y + (x << 2);
1224 return *((uint32_t *)(((uint8_t *)texture[number].buffer) + to));
1225 case NV2A_TEX_FORMAT::A8R8G8B8_RECT:
1226 to = texture[number].rectangle_pitch*y + (x << 2);
1227 return *((uint32_t *)(((uint8_t *)texture[number].buffer) + to));
1228 case NV2A_TEX_FORMAT::DXT5:
1229 bx = x >> 2;
1230 by = y >> 2;
1231 x = x & 3;
1232 y = y & 3;
1233 to = (bx + by*(sizeu >> 2)) << 1;
1234 color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 4);
1235 color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 5);
1236 codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 3);
1237 alpha0 = *((uint8_t *)(((uint64_t *)texture[number].buffer) + to) + 0);
1238 alpha1 = *((uint8_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1239 alphas = *(((uint64_t *)texture[number].buffer) + to);
1240 s = (y << 3) + (x << 1);
1241 sa = ((y << 2) + x) * 3;
1242 c = (codes >> s) & 3;
1243 ca = (alphas >> sa) & 7;
1244 ca = ca + (alpha0 > alpha1 ? 0 : 8);
1245 switch (ca) {
1246 case 0:
1247 ca = alpha0;
1248 break;
1249 case 1:
1250 ca = alpha1;
1251 break;
1252 case 2:
1253 ca = (6 * alpha0 + 1 * alpha1) / 7;
1254 break;
1255 case 3:
1256 ca = (5 * alpha0 + 2 * alpha1) / 7;
1257 break;
1258 case 4:
1259 ca = (4 * alpha0 + 3 * alpha1) / 7;
1260 break;
1261 case 5:
1262 ca = (3 * alpha0 + 4 * alpha1) / 7;
1263 break;
1264 case 6:
1265 ca = (2 * alpha0 + 5 * alpha1) / 7;
1266 break;
1267 case 7:
1268 ca = (1 * alpha0 + 6 * alpha1) / 7;
1269 break;
1270 case 8:
1271 ca = alpha0;
1272 break;
1273 case 9:
1274 ca = alpha1;
1275 break;
1276 case 10:
1277 ca = (4 * alpha0 + 1 * alpha1) / 5;
1278 break;
1279 case 11:
1280 ca = (3 * alpha0 + 2 * alpha1) / 5;
1281 break;
1282 case 12:
1283 ca = (2 * alpha0 + 3 * alpha1) / 5;
1284 break;
1285 case 13:
1286 ca = (1 * alpha0 + 4 * alpha1) / 5;
1287 break;
1288 case 14:
1289 ca = 0;
1290 break;
1291 case 15:
1292 ca = 255;
1293 break;
1294 }
1295 switch (c) {
1296 case 0:
1297 return (ca << 24) + convert_r5g6b5_r8g8b8(color0);
1298 case 1:
1299 return (ca << 24) + convert_r5g6b5_r8g8b8(color1);
1300 case 2:
1301 cb = pal5bit((2 * (color0 & 0x001f) + (color1 & 0x001f)) / 3);
1302 cg = pal6bit((2 * (color0 & 0x07e0) + (color1 & 0x07e0)) / 3 >> 5);
1303 cr = pal5bit((2 * (color0 & 0xf800) + (color1 & 0xf800)) / 3 >> 11);
1304 return (ca << 24) | (cr << 16) | (cg << 8) | (cb);
1305 default:
1306 cb = pal5bit(((color0 & 0x001f) + 2 * (color1 & 0x001f)) / 3);
1307 cg = pal6bit(((color0 & 0x07e0) + 2 * (color1 & 0x07e0)) / 3 >> 5);
1308 cr = pal5bit(((color0 & 0xf800) + 2 * (color1 & 0xf800)) / 3 >> 11);
1309 return (ca << 24) | (cr << 16) | (cg << 8) | (cb);
1310 }
1311 default:
1312 return 0xff00ff00;
1313 }
1314 }
1315
read_pixel(int x,int y,int32_t c[4])1316 inline uint8_t *nv2a_renderer::read_pixel(int x, int y, int32_t c[4])
1317 {
1318 uint32_t offset;
1319 uint32_t color;
1320 uint32_t *addr;
1321 uint16_t *addr16;
1322 uint8_t *addr8;
1323
1324 if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
1325 offset = (dilated0[dilate_rendertarget][x] + dilated1[dilate_rendertarget][y]) * bytespixel_rendertarget;
1326 else // type_rendertarget == LINEAR
1327 offset = pitch_rendertarget * y + x * bytespixel_rendertarget;
1328 #ifdef DEBUG_CHECKS
1329 if (offset >= size_rendertarget)
1330 {
1331 machine().logerror("Bad offset computed in read_pixel !\n");
1332 offset = 0;
1333 }
1334 #endif
1335 switch (colorformat_rendertarget) {
1336 case NV2A_COLOR_FORMAT::R5G6B5:
1337 addr16 = (uint16_t *)((uint8_t *)rendertarget + offset);
1338 color = *addr16;
1339 c[3] = 0xff;
1340 c[2] = pal5bit((color & 0xf800) >> 11);
1341 c[1] = pal6bit((color & 0x07e0) >> 5);
1342 c[0] = pal5bit(color & 0x1f);
1343 return (uint8_t *)addr16;
1344 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
1345 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
1346 addr = (uint32_t *)((uint8_t *)rendertarget + offset);
1347 color = *addr;
1348
1349 c[3] = 0xff;
1350 c[2] = (color >> 16) & 255;
1351 c[1] = (color >> 8) & 255;
1352 c[0] = color & 255;
1353 return (uint8_t *)addr;
1354 case NV2A_COLOR_FORMAT::A8R8G8B8:
1355 addr = (uint32_t *)((uint8_t *)rendertarget + offset);
1356 color = *addr;
1357 c[3] = color >> 24;
1358 c[2] = (color >> 16) & 255;
1359 c[1] = (color >> 8) & 255;
1360 c[0] = color & 255;
1361 return (uint8_t *)addr;
1362 case NV2A_COLOR_FORMAT::B8:
1363 addr8 = (uint8_t *)rendertarget + offset;
1364 c[0] = *addr8;
1365 c[1] = c[2] = 0;
1366 c[3] = 0xff;
1367 return addr8;
1368 default:
1369 return nullptr;
1370 }
1371 return nullptr;
1372 }
1373
write_pixel(int x,int y,uint32_t color,int z)1374 void nv2a_renderer::write_pixel(int x, int y, uint32_t color, int z)
1375 {
1376 uint8_t *addr;
1377 uint32_t *daddr32;
1378 uint16_t *daddr16;
1379 uint32_t depthandstencil;
1380 int32_t c[4], fb[4], s[4], d[4], cc[4];
1381 uint32_t depth, stencil, stenc, stenv;
1382 uint32_t udepth;
1383 bool stencil_passed;
1384 bool depth_passed;
1385
1386 if ((z > 0xffffff) || (z < 0) || (x < 0))
1387 return;
1388 udepth = (uint32_t)z;
1389 fb[3] = fb[2] = fb[1] = fb[0] = 0;
1390 addr = nullptr;
1391 if (color_mask != 0)
1392 addr = read_pixel(x, y, fb);
1393 if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1394 #ifdef DEBUG_CHECKS
1395 if (((pitch_depthbuffer / 4) * y + x) >= size_depthbuffer)
1396 {
1397 machine().logerror("Bad depthbuffer offset computed in write_pixel !\n");
1398 return;
1399 }
1400 #endif
1401 daddr32 = depthbuffer + (pitch_depthbuffer / 4) * y + x;
1402 depthandstencil = *daddr32;
1403 depth = depthandstencil >> 8;
1404 stencil = depthandstencil & 255;
1405 daddr16 = nullptr;
1406 }
1407 else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1408 #ifdef DEBUG_CHECKS
1409 if (((pitch_depthbuffer / 2) * y + x) >= size_depthbuffer)
1410 {
1411 machine().logerror("Bad depthbuffer offset computed in write_pixel !\n");
1412 return;
1413 }
1414 #endif
1415 daddr16 = (uint16_t *)depthbuffer + (pitch_depthbuffer / 2) * y + x;
1416 depthandstencil = *daddr16;
1417 depth = (depthandstencil << 8) | 0xff;
1418 stencil = 0;
1419 daddr32 = nullptr;
1420 }
1421 else {
1422 daddr32 = nullptr;
1423 daddr16 = nullptr;
1424 depth = 0xffffff;
1425 stencil = 0;
1426 }
1427 c[3] = color >> 24;
1428 c[2] = (color >> 16) & 255;
1429 c[1] = (color >> 8) & 255;
1430 c[0] = color & 255;
1431 cc[3] = blend_color >> 24;
1432 cc[2] = (blend_color >> 16) & 255;
1433 cc[1] = (blend_color >> 8) & 255;
1434 cc[0] = blend_color & 255;
1435 // ownership test and scissor test not done
1436 // alpha test
1437 if (alpha_test_enabled) {
1438 switch (alpha_func) {
1439 case NV2A_COMPARISON_OP::NEVER:
1440 return;
1441 case NV2A_COMPARISON_OP::ALWAYS:
1442 default:
1443 break;
1444 case NV2A_COMPARISON_OP::LESS:
1445 if (c[3] >= alpha_reference)
1446 return;
1447 break;
1448 case NV2A_COMPARISON_OP::LEQUAL:
1449 if (c[3] > alpha_reference)
1450 return;
1451 break;
1452 case NV2A_COMPARISON_OP::EQUAL:
1453 if (c[3] != alpha_reference)
1454 return;
1455 break;
1456 case NV2A_COMPARISON_OP::GEQUAL:
1457 if (c[3] < alpha_reference)
1458 return;
1459 break;
1460 case NV2A_COMPARISON_OP::GREATER:
1461 if (c[3] <= alpha_reference)
1462 return;
1463 break;
1464 case NV2A_COMPARISON_OP::NOTEQUAL:
1465 if (c[3] == alpha_reference)
1466 return;
1467 break;
1468 }
1469 }
1470 // stencil test
1471 stencil_passed = true;
1472 if (stencil_test_enabled) {
1473 stenc=stencil_mask & stencil_ref;
1474 stenv=stencil_mask & stencil;
1475 switch (stencil_func) {
1476 case NV2A_COMPARISON_OP::NEVER:
1477 stencil_passed = false;
1478 break;
1479 case NV2A_COMPARISON_OP::LESS:
1480 if (stenc >= stenv)
1481 stencil_passed = false;
1482 break;
1483 case NV2A_COMPARISON_OP::EQUAL:
1484 if (stenc != stenv)
1485 stencil_passed = false;
1486 break;
1487 case NV2A_COMPARISON_OP::LEQUAL:
1488 if (stenc > stenv)
1489 stencil_passed = false;
1490 break;
1491 case NV2A_COMPARISON_OP::GREATER:
1492 if (stenc <= stenv)
1493 stencil_passed = false;
1494 break;
1495 case NV2A_COMPARISON_OP::NOTEQUAL:
1496 if (stenc == stenv)
1497 stencil_passed = false;
1498 break;
1499 case NV2A_COMPARISON_OP::GEQUAL:
1500 if (stenc < stenv)
1501 stencil_passed = false;
1502 break;
1503 case NV2A_COMPARISON_OP::ALWAYS:
1504 default:
1505 break;
1506 }
1507 if (stencil_passed == false) {
1508 switch (stencil_op_fail) {
1509 case NV2A_STENCIL_OP::ZEROOP:
1510 stencil = 0;
1511 break;
1512 case NV2A_STENCIL_OP::INVERTOP:
1513 stencil = stencil ^ 255;
1514 break;
1515 case NV2A_STENCIL_OP::KEEP:
1516 default:
1517 break;
1518 case NV2A_STENCIL_OP::REPLACE:
1519 stencil = stencil_ref;
1520 break;
1521 case NV2A_STENCIL_OP::INCR:
1522 if (stencil < 255)
1523 stencil++;
1524 break;
1525 case NV2A_STENCIL_OP::DECR:
1526 if (stencil > 0)
1527 stencil--;
1528 break;
1529 case NV2A_STENCIL_OP::INCR_WRAP:
1530 if (stencil < 255)
1531 stencil++;
1532 else
1533 stencil = 0;
1534 break;
1535 case NV2A_STENCIL_OP::DECR_WRAP:
1536 if (stencil > 0)
1537 stencil--;
1538 else
1539 stencil = 255;
1540 break;
1541 }
1542 if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1543 depthandstencil = (depth << 8) | stencil;
1544 *daddr32 = depthandstencil;
1545 }
1546 else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1547 depthandstencil = depth >> 8;
1548 *daddr16 = (uint16_t)depthandstencil;
1549 }
1550 return;
1551 }
1552 }
1553 // depth buffer test
1554 depth_passed = true;
1555 if (depth_test_enabled) {
1556 switch (depth_function) {
1557 case NV2A_COMPARISON_OP::NEVER:
1558 depth_passed = false;
1559 break;
1560 case NV2A_COMPARISON_OP::LESS:
1561 if (udepth >= depth)
1562 depth_passed = false;
1563 break;
1564 case NV2A_COMPARISON_OP::EQUAL:
1565 if (udepth != depth)
1566 depth_passed = false;
1567 break;
1568 case NV2A_COMPARISON_OP::LEQUAL:
1569 if (udepth > depth)
1570 depth_passed = false;
1571 break;
1572 case NV2A_COMPARISON_OP::GREATER:
1573 if (udepth <= depth)
1574 depth_passed = false;
1575 break;
1576 case NV2A_COMPARISON_OP::NOTEQUAL:
1577 if (udepth == depth)
1578 depth_passed = false;
1579 break;
1580 case NV2A_COMPARISON_OP::GEQUAL:
1581 if (udepth < depth)
1582 depth_passed = false;
1583 break;
1584 case NV2A_COMPARISON_OP::ALWAYS:
1585 default:
1586 break;
1587 }
1588 if (depth_passed == false) {
1589 switch (stencil_op_zfail) {
1590 case NV2A_STENCIL_OP::ZEROOP:
1591 stencil = 0;
1592 break;
1593 case NV2A_STENCIL_OP::INVERTOP:
1594 stencil = stencil ^ 255;
1595 break;
1596 case NV2A_STENCIL_OP::KEEP:
1597 default:
1598 break;
1599 case NV2A_STENCIL_OP::REPLACE:
1600 stencil = stencil_ref;
1601 break;
1602 case NV2A_STENCIL_OP::INCR:
1603 if (stencil < 255)
1604 stencil++;
1605 break;
1606 case NV2A_STENCIL_OP::DECR:
1607 if (stencil > 0)
1608 stencil--;
1609 break;
1610 case NV2A_STENCIL_OP::INCR_WRAP:
1611 if (stencil < 255)
1612 stencil++;
1613 else
1614 stencil = 0;
1615 break;
1616 case NV2A_STENCIL_OP::DECR_WRAP:
1617 if (stencil > 0)
1618 stencil--;
1619 else
1620 stencil = 255;
1621 break;
1622 }
1623 if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1624 depthandstencil = (depth << 8) | stencil;
1625 *daddr32 = depthandstencil;
1626 }
1627 else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1628 depthandstencil = depth >> 8;
1629 *daddr16 = (uint16_t)depthandstencil;
1630 }
1631 return;
1632 }
1633 switch (stencil_op_zpass) {
1634 case NV2A_STENCIL_OP::ZEROOP:
1635 stencil = 0;
1636 break;
1637 case NV2A_STENCIL_OP::INVERTOP:
1638 stencil = stencil ^ 255;
1639 break;
1640 case NV2A_STENCIL_OP::KEEP:
1641 default:
1642 break;
1643 case NV2A_STENCIL_OP::REPLACE:
1644 stencil = stencil_ref;
1645 break;
1646 case NV2A_STENCIL_OP::INCR:
1647 if (stencil < 255)
1648 stencil++;
1649 break;
1650 case NV2A_STENCIL_OP::DECR:
1651 if (stencil > 0)
1652 stencil--;
1653 break;
1654 case NV2A_STENCIL_OP::INCR_WRAP:
1655 if (stencil < 255)
1656 stencil++;
1657 else
1658 stencil = 0;
1659 break;
1660 case NV2A_STENCIL_OP::DECR_WRAP:
1661 if (stencil > 0)
1662 stencil--;
1663 else
1664 stencil = 255;
1665 break;
1666 }
1667 }
1668 // blending
1669 if (blending_enabled) {
1670 switch (blend_function_source) {
1671 case NV2A_BLEND_FACTOR::ZERO:
1672 s[3] = s[2] = s[1] = s[0] = 0;
1673 break;
1674 case NV2A_BLEND_FACTOR::ONE:
1675 default:
1676 s[3] = s[2] = s[1] = s[0] = 255;
1677 break;
1678 case NV2A_BLEND_FACTOR::DST_COLOR:
1679 s[3] = fb[3];
1680 s[2] = fb[2];
1681 s[1] = fb[1];
1682 s[0] = fb[0];
1683 break;
1684 case NV2A_BLEND_FACTOR::ONE_MINUS_DST_COLOR:
1685 s[3] = fb[3] ^ 255;
1686 s[2] = fb[2] ^ 255;
1687 s[1] = fb[1] ^ 255;
1688 s[0] = fb[0] ^ 255;
1689 break;
1690 case NV2A_BLEND_FACTOR::SRC_ALPHA:
1691 s[3] = s[2] = s[1] = s[0] = c[3];
1692 break;
1693 case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_ALPHA:
1694 s[3] = s[2] = s[1] = s[0] = c[3] ^ 255;
1695 break;
1696 case NV2A_BLEND_FACTOR::DST_ALPHA:
1697 s[3] = s[2] = s[1] = s[0] = fb[3];
1698 break;
1699 case NV2A_BLEND_FACTOR::ONE_MINUS_DST_ALPHA:
1700 s[3] = s[2] = s[1] = s[0] = fb[3] ^ 255;
1701 break;
1702 case NV2A_BLEND_FACTOR::CONSTANT_COLOR:
1703 s[3] = cc[3];
1704 s[2] = cc[2];
1705 s[1] = cc[1];
1706 s[0] = cc[0];
1707 break;
1708 case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_COLOR:
1709 s[3] = cc[3] ^ 255;
1710 s[2] = cc[2] ^ 255;
1711 s[1] = cc[1] ^ 255;
1712 s[0] = cc[0] ^ 255;
1713 break;
1714 case NV2A_BLEND_FACTOR::CONSTANT_ALPHA:
1715 s[3] = s[2] = s[1] = s[0] = cc[3];
1716 break;
1717 case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_ALPHA:
1718 s[3] = s[2] = s[1] = s[0] = cc[3] ^ 255;
1719 break;
1720 case NV2A_BLEND_FACTOR::SRC_ALPHA_SATURATE:
1721 s[3] = 255;
1722 if (c[3] < (fb[3] ^ 255))
1723 s[2] = c[3];
1724 else
1725 s[2] = fb[3];
1726 s[1] = s[0] = s[2];
1727 break;
1728 }
1729 switch (blend_function_destination) {
1730 case NV2A_BLEND_FACTOR::ZERO:
1731 default:
1732 d[3] = d[2] = d[1] = d[0] = 0;
1733 break;
1734 case NV2A_BLEND_FACTOR::ONE:
1735 d[3] = d[2] = d[1] = d[0] = 255;
1736 break;
1737 case NV2A_BLEND_FACTOR::SRC_COLOR:
1738 d[3] = c[3];
1739 d[2] = c[2];
1740 d[1] = c[1];
1741 d[0] = c[0];
1742 break;
1743 case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_COLOR:
1744 d[3] = c[3] ^ 255;
1745 d[2] = c[2] ^ 255;
1746 d[1] = c[1] ^ 255;
1747 d[0] = c[0] ^ 255;
1748 break;
1749 case NV2A_BLEND_FACTOR::SRC_ALPHA:
1750 d[3] = d[2] = d[1] = d[0] = c[3];
1751 break;
1752 case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_ALPHA:
1753 d[3] = d[2] = d[1] = d[0] = c[3] ^ 255;
1754 break;
1755 case NV2A_BLEND_FACTOR::DST_ALPHA:
1756 d[3] = d[2] = d[1] = d[0] = fb[3];
1757 break;
1758 case NV2A_BLEND_FACTOR::ONE_MINUS_DST_ALPHA:
1759 d[3] = d[2] = d[1] = d[0] = fb[3] ^ 255;
1760 break;
1761 case NV2A_BLEND_FACTOR::CONSTANT_COLOR:
1762 d[3] = cc[3];
1763 d[2] = cc[2];
1764 d[1] = cc[1];
1765 d[0] = cc[0];
1766 break;
1767 case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_COLOR:
1768 d[3] = cc[3] ^ 255;
1769 d[2] = cc[2] ^ 255;
1770 d[1] = cc[1] ^ 255;
1771 d[0] = cc[0] ^ 255;
1772 break;
1773 case NV2A_BLEND_FACTOR::CONSTANT_ALPHA:
1774 d[3] = d[2] = d[1] = d[0] = cc[3];
1775 break;
1776 case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_ALPHA:
1777 d[3] = d[2] = d[1] = d[0] = cc[3] ^ 255;
1778 break;
1779 }
1780 switch (blend_equation) {
1781 case NV2A_BLEND_EQUATION::FUNC_ADD:
1782 c[3] = (c[3] * s[3] + fb[3] * d[3]) / 255;
1783 if (c[3] > 255)
1784 c[3] = 255;
1785 c[2] = (c[2] * s[2] + fb[2] * d[2]) / 255;
1786 if (c[2] > 255)
1787 c[2] = 255;
1788 c[1] = (c[1] * s[1] + fb[1] * d[1]) / 255;
1789 if (c[1] > 255)
1790 c[1] = 255;
1791 c[0] = (c[0] * s[0] + fb[0] * d[0]) / 255;
1792 if (c[0] > 255)
1793 c[0] = 255;
1794 break;
1795 case NV2A_BLEND_EQUATION::FUNC_SUBTRACT:
1796 c[3] = (c[3] * s[3] - fb[3] * d[3]) / 255;
1797 if (c[3] < 0)
1798 c[3] = 255;
1799 c[2] = (c[2] * s[2] - fb[2] * d[2]) / 255;
1800 if (c[2] < 0)
1801 c[2] = 255;
1802 c[1] = (c[1] * s[1] - fb[1] * d[1]) / 255;
1803 if (c[1] < 0)
1804 c[1] = 255;
1805 c[0] = (c[0] * s[0] - fb[0] * d[0]) / 255;
1806 if (c[0] < 0)
1807 c[0] = 255;
1808 break;
1809 case NV2A_BLEND_EQUATION::FUNC_REVERSE_SUBTRACT:
1810 c[3] = (fb[3] * d[3] - c[3] * s[3]) / 255;
1811 if (c[3] < 0)
1812 c[3] = 255;
1813 c[2] = (fb[2] * d[2] - c[2] * s[2]) / 255;
1814 if (c[2] < 0)
1815 c[2] = 255;
1816 c[1] = (fb[1] * d[1] - c[1] * s[1]) / 255;
1817 if (c[1] < 0)
1818 c[1] = 255;
1819 c[0] = (fb[0] * d[0] - c[0] * s[0]) / 255;
1820 if (c[0] < 0)
1821 c[0] = 255;
1822 break;
1823 case NV2A_BLEND_EQUATION::MIN:
1824 c[3] = s[3];
1825 if (d[3] < c[3])
1826 c[3] = d[3];
1827 c[2] = s[2];
1828 if (d[2] < c[2])
1829 c[2] = d[2];
1830 c[1] = s[1];
1831 if (d[1] < c[1])
1832 c[1] = d[1];
1833 c[0] = s[0];
1834 if (d[0] < c[0])
1835 c[0] = d[0];
1836 break;
1837 case NV2A_BLEND_EQUATION::MAX:
1838 c[3] = s[3];
1839 if (d[3] > c[3])
1840 c[3] = d[3];
1841 c[2] = s[2];
1842 if (d[2] > c[2])
1843 c[2] = d[2];
1844 c[1] = s[1];
1845 if (d[1] > c[1])
1846 c[1] = d[1];
1847 c[0] = s[0];
1848 if (d[0] > c[0])
1849 c[0] = d[0];
1850 break;
1851 }
1852 }
1853 // dithering not done
1854 // logical operation
1855 if (logical_operation_enabled) {
1856 switch (logical_operation) {
1857 case NV2A_LOGIC_OP::CLEAR:
1858 c[3] = 0;
1859 c[2] = 0;
1860 c[1] = 0;
1861 c[0] = 0;
1862 break;
1863 case NV2A_LOGIC_OP::AND:
1864 c[3] = c[3] & fb[3];
1865 c[2] = c[2] & fb[2];
1866 c[1] = c[1] & fb[1];
1867 c[0] = c[0] & fb[0];
1868 break;
1869 case NV2A_LOGIC_OP::AND_REVERSE:
1870 c[3] = c[3] & (fb[3] ^ 255);
1871 c[2] = c[2] & (fb[2] ^ 255);
1872 c[1] = c[1] & (fb[1] ^ 255);
1873 c[0] = c[0] & (fb[0] ^ 255);
1874 break;
1875 case NV2A_LOGIC_OP::COPY:
1876 default:
1877 break;
1878 case NV2A_LOGIC_OP::AND_INVERTED:
1879 c[3] = (c[3] ^ 255) & fb[3];
1880 c[2] = (c[2] ^ 255) & fb[2];
1881 c[1] = (c[1] ^ 255) & fb[1];
1882 c[0] = (c[0] ^ 255) & fb[0];
1883 break;
1884 case NV2A_LOGIC_OP::NOOP:
1885 c[3] = fb[3];
1886 c[2] = fb[2];
1887 c[1] = fb[1];
1888 c[0] = fb[0];
1889 break;
1890 case NV2A_LOGIC_OP::XOR:
1891 c[3] = c[3] ^ fb[3];
1892 c[2] = c[2] ^ fb[2];
1893 c[1] = c[1] ^ fb[1];
1894 c[0] = c[0] ^ fb[0];
1895 break;
1896 case NV2A_LOGIC_OP::OR:
1897 c[3] = c[3] | fb[3];
1898 c[2] = c[2] | fb[2];
1899 c[1] = c[1] | fb[1];
1900 c[0] = c[0] | fb[0];
1901 break;
1902 case NV2A_LOGIC_OP::NOR:
1903 c[3] = (c[3] | fb[3]) ^ 255;
1904 c[2] = (c[2] | fb[2]) ^ 255;
1905 c[1] = (c[1] | fb[1]) ^ 255;
1906 c[0] = (c[0] | fb[0]) ^ 255;
1907 break;
1908 case NV2A_LOGIC_OP::EQUIV:
1909 c[3] = (c[3] ^ fb[3]) ^ 255;
1910 c[2] = (c[2] ^ fb[2]) ^ 255;
1911 c[1] = (c[1] ^ fb[1]) ^ 255;
1912 c[0] = (c[0] ^ fb[0]) ^ 255;
1913 break;
1914 case NV2A_LOGIC_OP::INVERT:
1915 c[3] = fb[3] ^ 255;
1916 c[2] = fb[2] ^ 255;
1917 c[1] = fb[1] ^ 255;
1918 c[0] = fb[0] ^ 255;
1919 break;
1920 case NV2A_LOGIC_OP::OR_REVERSE:
1921 c[3] = c[3] | (fb[3] ^ 255);
1922 c[2] = c[2] | (fb[2] ^ 255);
1923 c[1] = c[1] | (fb[1] ^ 255);
1924 c[0] = c[0] | (fb[0] ^ 255);
1925 break;
1926 case NV2A_LOGIC_OP::COPY_INVERTED:
1927 c[3] = c[3] ^ 255;
1928 c[2] = c[2] ^ 255;
1929 c[1] = c[1] ^ 255;
1930 c[0] = c[0] ^ 255;
1931 break;
1932 case NV2A_LOGIC_OP::OR_INVERTED:
1933 c[3] = (c[3] ^ 255) | fb[3];
1934 c[2] = (c[2] ^ 255) | fb[2];
1935 c[1] = (c[1] ^ 255) | fb[1];
1936 c[0] = (c[0] ^ 255) | fb[0];
1937 break;
1938 case NV2A_LOGIC_OP::NAND:
1939 c[3] = (c[3] & fb[3]) ^ 255;
1940 c[2] = (c[2] & fb[2]) ^ 255;
1941 c[1] = (c[1] & fb[1]) ^ 255;
1942 c[0] = (c[0] & fb[0]) ^ 255;
1943 break;
1944 case NV2A_LOGIC_OP::SET:
1945 c[3] = 255;
1946 c[2] = 255;
1947 c[1] = 255;
1948 c[0] = 255;
1949 break;
1950 }
1951 }
1952 if (color_mask != 0) {
1953 uint32_t ct,ft,w;
1954
1955 ct = ((uint32_t)c[3] << 24) | ((uint32_t)c[2] << 16) | ((uint32_t)c[1] << 8) | (uint32_t)c[0];
1956 ft = ((uint32_t)fb[3] << 24) | ((uint32_t)fb[2] << 16) | ((uint32_t)fb[1] << 8) | (uint32_t)fb[0];
1957 w = (ft & ~color_mask) | (ct & color_mask);
1958
1959 /* for debugging
1960 if (w == 0x94737d7b)
1961 x++;
1962 */
1963 switch (colorformat_rendertarget) {
1964 case NV2A_COLOR_FORMAT::R5G6B5:
1965 w = ((w >> 8) & 0xf800) + ((w >> 5) & 0x7e0) + ((w >> 3) & 0x1f);
1966 *((uint16_t *)addr) = (uint16_t)w;
1967 break;
1968 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
1969 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
1970 *((uint32_t *)addr) = w;
1971 break;
1972 case NV2A_COLOR_FORMAT::A8R8G8B8:
1973 *((uint32_t *)addr) = w;
1974 break;
1975 case NV2A_COLOR_FORMAT::B8:
1976 *addr = (uint8_t)w;
1977 break;
1978 default:
1979 return;
1980 }
1981 }
1982 if (depth_write_enabled)
1983 depth = udepth;
1984 if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1985 depthandstencil = (depth << 8) | stencil;
1986 *daddr32 = depthandstencil;
1987 }
1988 else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1989 depthandstencil = depth >> 8;
1990 *daddr16 = (uint16_t)depthandstencil;
1991 }
1992 }
1993
render_color(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)1994 void nv2a_renderer::render_color(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
1995 {
1996 int x, lx;
1997
1998 lx = limits_rendertarget.right();
1999 if ((extent.startx < 0) && (extent.stopx <= 0))
2000 return;
2001 if ((extent.startx > lx) && (extent.stopx > lx))
2002 return;
2003 x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2004 if (extent.stopx > lx)
2005 x = x - (extent.stopx - lx - 1);
2006 x--;
2007 while (x >= 0) {
2008 double zf;
2009 uint32_t a8r8g8b8;
2010 int z;
2011 int ca, cr, cg, cb;
2012 int xp = extent.startx + x; // x coordinate of current pixel
2013
2014 z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2015 zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2016 zf = 1.0f / zf;
2017 cb = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].dpdx)) * zf * 255.0f;
2018 cg = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].dpdx)) * zf * 255.0f;
2019 cr = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].dpdx)) * zf * 255.0f;
2020 ca = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].dpdx)) * zf * 255.0f;
2021 if (cb > 255)
2022 cb = 255;
2023 if (cb < 0)
2024 cb = 0;
2025 if (cg > 255)
2026 cg = 255;
2027 if (cg < 0)
2028 cg = 0;
2029 if (cr > 255)
2030 cr = 255;
2031 if (cr < 0)
2032 cr = 0;
2033 if (ca > 255)
2034 ca = 255;
2035 if (ca < 0)
2036 ca = 0;
2037 a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | cb; // pixel color obtained by interpolating the colors of the vertices
2038 write_pixel(xp, scanline, a8r8g8b8, z);
2039 x--;
2040 }
2041 }
2042
render_texture_simple(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)2043 void nv2a_renderer::render_texture_simple(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
2044 {
2045 int x, lx;
2046 uint32_t a8r8g8b8;
2047
2048 if (!objectdata.data->texture[0].enabled) {
2049 return;
2050 }
2051 lx = limits_rendertarget.right();
2052 if ((extent.startx < 0) && (extent.stopx <= 0))
2053 return;
2054 if ((extent.startx > lx) && (extent.stopx > lx))
2055 return;
2056 x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2057 if (extent.stopx > lx)
2058 x = x - (extent.stopx - lx - 1);
2059 x--;
2060 while (x >= 0) {
2061 float zf;
2062 double spf, tpf;
2063 //double rpf, qpf; // disabled to remove "set but not used" warning
2064 int sp, tp;
2065 int z;
2066 int xp = extent.startx + x; // x coordinate of current pixel
2067
2068 z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2069 zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2070 zf = 1.0f / zf;
2071 spf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S].dpdx) * zf;
2072 tpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T].dpdx) * zf;
2073 //rpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R].dpdx) * zf;
2074 //qpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q].dpdx) * zf;
2075 if (objectdata.data->texture[0].rectangle == false) {
2076 sp = spf * (double)(objectdata.data->texture[0].sizes - 1); // x coordinate of texel in texture
2077 tp = tpf * (double)(objectdata.data->texture[0].sizet - 1); // y coordinate of texel in texture
2078 }
2079 else {
2080 sp = spf;
2081 tp = tpf;
2082 }
2083 a8r8g8b8 = texture_get_texel(0, sp, tp);
2084 write_pixel(xp, scanline, a8r8g8b8, z);
2085 x--;
2086 }
2087 }
2088
render_register_combiners(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)2089 void nv2a_renderer::render_register_combiners(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
2090 {
2091 int x, lx, xp;
2092 int tc[4];
2093 float colorf[7][4];
2094 uint32_t color[6];
2095 uint32_t a8r8g8b8;
2096 int z;
2097 int n;
2098
2099 color[0] = color[1] = color[2] = color[3] = color[4] = color[5] = 0;
2100
2101 lx = limits_rendertarget.right();
2102 if ((extent.startx < 0) && (extent.stopx <= 0))
2103 return;
2104 if ((extent.startx > lx) && (extent.stopx > lx))
2105 return;
2106 x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2107 if (extent.stopx > lx)
2108 x = x - (extent.stopx - lx - 1);
2109 x--;
2110 while (x >= 0) {
2111 float zf;
2112
2113 xp = extent.startx + x;
2114 // 1: fetch data
2115 // 1.1: interpolated color from vertices
2116 z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2117 zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2118 zf = 1.0f / zf;
2119 colorf[0][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].dpdx) * zf;
2120 colorf[0][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].dpdx) * zf;
2121 colorf[0][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].dpdx) * zf;
2122 colorf[0][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].dpdx) * zf;
2123 colorf[1][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R].dpdx) * zf;
2124 colorf[1][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G].dpdx) * zf;
2125 colorf[1][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B].dpdx) * zf;
2126 colorf[1][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A].dpdx) * zf;
2127 // 1.2: coordinates for each of the 4 possible textures
2128 for (n = 0; n < 4; n++) {
2129 colorf[n + 2][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + n * 4].dpdx) * zf;
2130 colorf[n + 2][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + n * 4].dpdx) * zf;
2131 colorf[n + 2][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + n * 4].dpdx) * zf;
2132 colorf[n + 2][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + n * 4].dpdx) * zf;
2133 }
2134 // 1.3: fog
2135 combiner_argb8_float(fog_color, colorf[6]);
2136 colorf[6][3] = 1.0f; // should it be from the ofog output of the vertex shader ?
2137 // 1.4: colors from textures
2138 for (n = 0; n < 4; n++) {
2139 if (texture[n].mode == 1) {
2140 if (texture[n].rectangle == false) {
2141 tc[0] = colorf[n + 2][0] * (float)(objectdata.data->texture[n].sizes - 1);
2142 tc[1] = colorf[n + 2][1] * (float)(objectdata.data->texture[n].sizet - 1);
2143 }
2144 else {
2145 tc[0] = colorf[n + 2][0];
2146 tc[1] = colorf[n + 2][1];
2147 }
2148 a8r8g8b8 = texture_get_texel(n, tc[0], tc[1]);
2149 combiner_argb8_float(a8r8g8b8, colorf[n + 2]);
2150 }
2151 else if (texture[n].mode == 4)
2152 ; // nothing
2153 else
2154 combiner_argb8_float(0xff000000, colorf[n + 2]);
2155 }
2156 // 2: compute
2157 // 2.1: initialize
2158 combiner_initialize_registers(threadid, colorf);
2159 // 2.2: general cmbiner stages
2160 for (n = 0; n < combiner.setup.stages; n++) {
2161 // 2.2.1 initialize
2162 combiner_initialize_stage(threadid, n);
2163 // 2.2.2 map inputs
2164 combiner_map_stage_input(threadid, n);
2165 // 2.2.3 compute possible outputs
2166 combiner_compute_rgb_outputs(threadid, n);
2167 combiner_compute_alpha_outputs(threadid, n);
2168 // 2.2.4 map outputs to registers
2169 combiner_map_stage_output(threadid, n);
2170 }
2171 // 2.3: final cmbiner stage
2172 combiner_initialize_final(threadid);
2173 combiner_map_final_input(threadid);
2174 combiner_final_output(threadid);
2175 a8r8g8b8 = combiner_float_argb8(combiner.work[threadid].output);
2176 // 3: write pixel
2177 write_pixel(xp, scanline, a8r8g8b8, z);
2178 x--;
2179 }
2180 }
2181
2182 #if 0
2183 const char *rc_mapping_str[] = {
2184 "UNSIGNED_IDENTITY",
2185 "UNSIGNED_INVERT",
2186 "EXPAND_NORMAL",
2187 "EXPAND_NEGATE",
2188 "HALF_BIAS_NORMAL",
2189 "HALF_BIAS_NEGATE",
2190 "SIGNED_IDENTITY",
2191 "SIGNED_NEGATE"
2192 };
2193
2194 const char *rc_usage_rgb_str[] = {
2195 "RGB",
2196 "ALPHA"
2197 };
2198
2199 const char *rc_usage_alpha_str[] = {
2200 "BLUE",
2201 "ALPHA"
2202 };
2203
2204 const char *rc_variable_str[] = {
2205 "ZERO",
2206 "CONSTANT_COLOR0",
2207 "CONSTANT_COLOR1",
2208 "FOG",
2209 "PRIMARY_COLOR",
2210 "SECONDARY_COLOR",
2211 "???",
2212 "???",
2213 "TEXTURE0",
2214 "TEXTURE1",
2215 "TEXTURE2",
2216 "TEXTURE3",
2217 "SPARE0",
2218 "SPARE1",
2219 "SPARE0_PLUS_SECONDARY_COLOR",
2220 "E_TIMES_F"
2221 };
2222
2223 const char *rc_bias_str[] = {
2224 "NONE",
2225 "BIAS_BY_NEGATIVE_ONE_HALF"
2226 };
2227
2228 const char *rc_scale_str[] = {
2229 "NONE",
2230 "SCALE_BY_TWO",
2231 "SCALE_BY_FOUR",
2232 "SCALE_BY_ONE_HALF"
2233 };
2234
2235 /* Dump the current setup of the register combiners */
2236 void dumpcombiners(uint32_t *m)
2237 {
2238 int a, b, n, v;
2239
2240 n = m[0x1e60 / 4] & 0xf;
2241 printf("Combiners active: %d\n\r", n);
2242 for (a = 0; a < n; a++) {
2243 printf("Combiner %d\n\r", a + 1);
2244 printf(" RC_IN_ALPHA %08X\n\r", m[0x0260 / 4 + a]);
2245 for (b = 24; b >= 0; b = b - 8) {
2246 v = (m[0x0260 / 4 + a] >> b) & 0xf;
2247 printf(" %c_INPUT %s\n\r", 'A' + 3 - b / 8, rc_variable_str[v]);
2248 v = (m[0x0260 / 4 + a] >> (b + 4)) & 1;
2249 printf(" %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - b / 8, rc_usage_alpha_str[v]);
2250 v = (m[0x0260 / 4 + a] >> (b + 5)) & 7;
2251 printf(" %c_MAPPING %s\n\r", 'A' + 3 - b / 8, rc_mapping_str[v]);
2252 }
2253 printf(" RC_IN_RGB %08X\n\r", m[0x0ac0 / 4 + a]);
2254 for (b = 24; b >= 0; b = b - 8) {
2255 v = (m[0x0ac0 / 4 + a] >> b) & 0xf;
2256 printf(" %c_INPUT %s\n\r", 'A' + 3 - b / 8, rc_variable_str[v]);
2257 v = (m[0x0ac0 / 4 + a] >> (b + 4)) & 1;
2258 printf(" %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - b / 8, rc_usage_rgb_str[v]);
2259 v = (m[0x0ac0 / 4 + a] >> (b + 5)) & 7;
2260 printf(" %c_MAPPING %s\n\r", 'A' + 3 - b / 8, rc_mapping_str[v]);
2261 }
2262 printf(" RC_OUT_ALPHA %08X\n\r", m[0x0aa0 / 4 + a]);
2263 v = m[0x0aa0 / 4 + a] & 0xf;
2264 printf(" CD_OUTPUT %s\n\r", rc_variable_str[v]);
2265 v = (m[0x0aa0 / 4 + a] >> 4) & 0xf;
2266 printf(" AB_OUTPUT %s\n\r", rc_variable_str[v]);
2267 v = (m[0x0aa0 / 4 + a] >> 8) & 0xf;
2268 printf(" SUM_OUTPUT %s\n\r", rc_variable_str[v]);
2269 v = (m[0x0aa0 / 4 + a] >> 12) & 1;
2270 printf(" CD_DOT_PRODUCT %d\n\r", v);
2271 v = (m[0x0aa0 / 4 + a] >> 13) & 1;
2272 printf(" AB_DOT_PRODUCT %d\n\r", v);
2273 v = (m[0x0aa0 / 4 + a] >> 14) & 1;
2274 printf(" MUX_SUM %d\n\r", v);
2275 v = (m[0x0aa0 / 4 + a] >> 15) & 1;
2276 printf(" BIAS %s\n\r", rc_bias_str[v]);
2277 v = (m[0x0aa0 / 4 + a] >> 16) & 3;
2278 printf(" SCALE %s\n\r", rc_scale_str[v]);
2279 //v=(m[0x0aa0/4+a] >> 27) & 7;
2280 printf(" RC_OUT_RGB %08X\n\r", m[0x1e40 / 4 + a]);
2281 v = m[0x1e40 / 4 + a] & 0xf;
2282 printf(" CD_OUTPUT %s\n\r", rc_variable_str[v]);
2283 v = (m[0x1e40 / 4 + a] >> 4) & 0xf;
2284 printf(" AB_OUTPUT %s\n\r", rc_variable_str[v]);
2285 v = (m[0x1e40 / 4 + a] >> 8) & 0xf;
2286 printf(" SUM_OUTPUT %s\n\r", rc_variable_str[v]);
2287 v = (m[0x1e40 / 4 + a] >> 12) & 1;
2288 printf(" CD_DOT_PRODUCT %d\n\r", v);
2289 v = (m[0x1e40 / 4 + a] >> 13) & 1;
2290 printf(" AB_DOT_PRODUCT %d\n\r", v);
2291 v = (m[0x1e40 / 4 + a] >> 14) & 1;
2292 printf(" MUX_SUM %d\n\r", v);
2293 v = (m[0x1e40 / 4 + a] >> 15) & 1;
2294 printf(" BIAS %s\n\r", rc_bias_str[v]);
2295 v = (m[0x1e40 / 4 + a] >> 16) & 3;
2296 printf(" SCALE %s\n\r", rc_scale_str[v]);
2297 //v=(m[0x1e40/4+a] >> 27) & 7;
2298 printf("\n\r");
2299 }
2300 printf("Combiner final %08X %08X\n\r", m[0x0288 / 4], m[0x028c / 4]);
2301 for (a = 24; a >= 0; a = a - 8) {
2302 n = (m[0x0288 / 4] >> a) & 0xf;
2303 printf(" %c_INPUT %s\n\r", 'A' + 3 - a / 8, rc_variable_str[n]);
2304 n = (m[0x0288 / 4] >> (a + 4)) & 1;
2305 printf(" %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - a / 8, rc_usage_rgb_str[n]);
2306 n = (m[0x0288 / 4] >> (a + 5)) & 7;
2307 printf(" %c_MAPPING %s\n\r", 'A' + 3 - a / 8, rc_mapping_str[n]);
2308 }
2309 for (a = 24; a >= 8; a = a - 8) {
2310 n = (m[0x028c / 4] >> a) & 0xf;
2311 printf(" %c_INPUT %s\n\r", 'E' + 3 - a / 8, rc_variable_str[n]);
2312 n = (m[0x028c / 4] >> (a + 4)) & 1;
2313 printf(" %c_COMPONENT_USAGE %s\n\r", 'E' + 3 - a / 8, rc_usage_rgb_str[n]);
2314 n = (m[0x028c / 4] >> (a + 5)) & 7;
2315 printf(" %c_MAPPING %s\n\r", 'E' + 3 - a / 8, rc_mapping_str[n]);
2316 }
2317 n = (m[0x028c / 4] >> 7) & 1;
2318 printf(" color sum clamp: %d\n\r", n);
2319 }
2320 #endif
2321
extract_packed_float(uint32_t data,float & first,float & second,float & third)2322 void nv2a_renderer::extract_packed_float(uint32_t data, float &first, float &second, float &third)
2323 {
2324 float f1, f2, f3;
2325 int p1, p2, p3;
2326
2327 p1 = data & 0x7ff;
2328 if (p1 & 0x400)
2329 f1 = (float)(p1 - 0x800) / 1023.0;
2330 else
2331 f1 = (float)p1 / 1023.0;
2332 p2 = (data >> 11) & 0x7ff;
2333 if (p2 & 0x400)
2334 f2 = (float)(p2 - 0x800) / 1023.0;
2335 else
2336 f2 = (float)p2 / 1023.0;
2337 p3 = (data >> 22) & 0x3ff;
2338 if (p3 & 0x200)
2339 f3 = (float)(p3 - 0x400) / 511.0;
2340 else
2341 f3 = (float)p3 / 511.0;
2342 first = f1;
2343 second = f2;
2344 third = f3;
2345 }
2346
read_vertex(address_space & space,offs_t address,vertex_nv & vertex,int attrib)2347 void nv2a_renderer::read_vertex(address_space &space, offs_t address, vertex_nv &vertex, int attrib)
2348 {
2349 uint32_t u;
2350
2351 switch (vertexbuffer.type[attrib])
2352 {
2353 case 0x02: // none
2354 return;
2355 case 0x12: // float1
2356 vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2357 vertex.attribute[attrib].fv[1] = 0;
2358 vertex.attribute[attrib].fv[2] = 0;
2359 vertex.attribute[attrib].fv[3] = 1.0;
2360 break;
2361 case 0x16: // normpacked3
2362 u = space.read_dword(address + 0);
2363 extract_packed_float(u, vertex.attribute[attrib].fv[0], vertex.attribute[attrib].fv[1], vertex.attribute[attrib].fv[2]);
2364 vertex.attribute[attrib].fv[3] = 1.0;
2365 break;
2366 case 0x22: // float2
2367 vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2368 vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2369 vertex.attribute[attrib].fv[2] = 0;
2370 vertex.attribute[attrib].fv[3] = 1.0;
2371 break;
2372 case 0x32: // float3
2373 vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2374 vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2375 vertex.attribute[attrib].iv[2] = space.read_dword(address + 8);
2376 vertex.attribute[attrib].fv[3] = 1.0;
2377 break;
2378 case 0x40: // d3dcolor
2379 u = space.read_dword(address + 0);
2380 // aarrggbb -> (rr, gg, bb, aa)
2381 vertex.attribute[attrib].fv[2] = (u & 0xff) / 255.0;
2382 u = u >> 8;
2383 vertex.attribute[attrib].fv[1] = (u & 0xff) / 255.0;
2384 u = u >> 8;
2385 vertex.attribute[attrib].fv[0] = (u & 0xff) / 255.0;
2386 u = u >> 8;
2387 vertex.attribute[attrib].fv[3] = (u & 0xff) / 255.0;
2388 break;
2389 case 0x42: // float4
2390 vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2391 vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2392 vertex.attribute[attrib].iv[2] = space.read_dword(address + 8);
2393 vertex.attribute[attrib].iv[3] = space.read_dword(address + 12);
2394 break;
2395 default:
2396 machine().logerror("Yet unsupported vertex data type %x\n\r", vertexbuffer.type[attrib]);
2397 return;
2398 }
2399 }
2400
2401 /* Read vertices data from system memory. Method 0x1800 and 0x1808 */
read_vertices_0x180x(address_space & space,int destination,uint32_t address,int limit)2402 int nv2a_renderer::read_vertices_0x180x(address_space &space, int destination, uint32_t address, int limit)
2403 {
2404 uint32_t m, n;
2405 int a, b;
2406
2407 n = destination;
2408 for (m = 0; m < limit; m++) {
2409 memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2410 b = vertexbuffer.enabled;
2411 for (a = 0; a < 16; a++) {
2412 if (b & 1) {
2413 read_vertex(space, vertexbuffer.address[a] + vertex_indexes[indexesleft_first] * vertexbuffer.stride[a], vertex_software[n], a);
2414 }
2415 b = b >> 1;
2416 }
2417 n = (n + 1) & 1023;
2418 indexesleft_first = (indexesleft_first + 1) & 1023;
2419 indexesleft_count--;
2420 }
2421 return limit;
2422 }
2423
2424 /* Read vertices data from system memory. Method 0x1810 */
read_vertices_0x1810(address_space & space,int destination,int offset,int limit)2425 int nv2a_renderer::read_vertices_0x1810(address_space &space, int destination, int offset, int limit)
2426 {
2427 uint32_t m, n;
2428 int a, b;
2429
2430 n = destination;
2431 for (m = 0; m < limit; m++) {
2432 memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2433 b = vertexbuffer.enabled;
2434 for (a = 0; a < 16; a++) {
2435 if (b & 1) {
2436 read_vertex(space, vertexbuffer.address[a] + (m + offset) * vertexbuffer.stride[a], vertex_software[n], a);
2437 }
2438 b = b >> 1;
2439 }
2440 n = (n + 1) & 1023;
2441 }
2442 return m;
2443 }
2444
2445 /* Read vertices data from system memory. Method 0x1818 */
read_vertices_0x1818(address_space & space,int destination,uint32_t address,int limit)2446 int nv2a_renderer::read_vertices_0x1818(address_space &space, int destination, uint32_t address, int limit)
2447 {
2448 uint32_t m, n, vwords;
2449 int a, b;
2450
2451 n = destination;
2452 vwords = vertexbuffer.offset[16];
2453 for (m = 0; m < limit; m++) {
2454 memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2455 b = vertexbuffer.enabled;
2456 for (a = 0; a < 16; a++) {
2457 if (b & 1) {
2458 read_vertex(space, address + vertexbuffer.offset[a] * 4, vertex_software[n], a);
2459 }
2460 b = b >> 1;
2461 }
2462 n = (n + 1) & 1023;
2463 address = address + vwords * 4;
2464 }
2465 return (int)(m*vwords);
2466 }
2467
compute_supersample_factors(float & horizontal,float & vertical)2468 void nv2a_renderer::compute_supersample_factors(float &horizontal, float &vertical)
2469 {
2470 float mx, my;
2471
2472 mx = 1;
2473 my = 1;
2474 switch (((antialias_control & 1) << 2) | antialiasing_rendertarget)
2475 {
2476 case 0:
2477 mx = my = 1;
2478 break;
2479 case 1:
2480 mx = 2; my = 1;
2481 break;
2482 case 2:
2483 mx = my = 2;
2484 break;
2485 case 4:
2486 mx = my = 1;
2487 break;
2488 case 5:
2489 mx = 2;
2490 my = 1;
2491 break;
2492 case 6:
2493 mx = 2;
2494 my = 2;
2495 break;
2496 default:
2497 mx = my = 1;
2498 }
2499 horizontal = mx;
2500 vertical = my;
2501 }
2502
convert_vertices(vertex_nv * source,nv2avertex_t * destination)2503 void nv2a_renderer::convert_vertices(vertex_nv *source, nv2avertex_t *destination)
2504 {
2505 vertex_nv vert;
2506 int u;
2507 float v[4];
2508 double c;
2509
2510 // take each vertex with its attributes and obtain data for drawing
2511 // should use either the vertex program or transformation matrices
2512 if (vertex_pipeline == 4) {
2513 // transformation matrices
2514 // this part needs more testing
2515 for (int i = 0; i < 4; i++) {
2516 v[i] = 0;
2517 for (int j = 0; j < 4; j++)
2518 v[i] += matrix.composite[i][j] * source->attribute[0].fv[j];
2519 };
2520 destination->w = v[3];
2521 destination->x = (v[0] / v[3]) * supersample_factor_x; // source->attribute[0].fv[0];
2522 destination->y = (v[1] / v[3]) * supersample_factor_y; // source->attribute[0].fv[1];
2523 c = v[3];
2524 if (c == 0)
2525 c = FLT_MIN;
2526 c = 1.0f / c;
2527 destination->p[(int)VERTEX_PARAMETER::PARAM_1W] = c;
2528 destination->p[(int)VERTEX_PARAMETER::PARAM_Z] = v[2] * c;
2529 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_R] = source->attribute[3].fv[0] * c;
2530 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_G] = source->attribute[3].fv[1] * c;
2531 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_B] = source->attribute[3].fv[2] * c;
2532 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_A] = source->attribute[3].fv[3] * c;
2533 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R] = source->attribute[4].fv[0] * c;
2534 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G] = source->attribute[4].fv[1] * c;
2535 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B] = source->attribute[4].fv[2] * c;
2536 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A] = source->attribute[4].fv[3] * c;
2537 for (u = 0; u < 4; u++) {
2538 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + u * 4] = source->attribute[9 + u].fv[0] * c;
2539 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + u * 4] = source->attribute[9 + u].fv[1] * c;
2540 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + u * 4] = source->attribute[9 + u].fv[2] * c;
2541 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + u * 4] = source->attribute[9 + u].fv[3] * c;
2542 }
2543 }
2544 else {
2545 // vertex program
2546 // run vertex program
2547 vertexprogram.exec.process(vertexprogram.start_instruction, source, &vert, 1);
2548 // the output of the vertex program has the perspective divide, viewport scale and offset already applied
2549 // copy data for poly.h
2550 destination->w = vert.attribute[0].fv[3];
2551 destination->x = (vert.attribute[0].fv[0] - 0.53125f) * supersample_factor_x;
2552 destination->y = (vert.attribute[0].fv[1] - 0.53125f) * supersample_factor_y;
2553 c = destination->w;
2554 if (c == 0)
2555 c = FLT_MIN;
2556 c = 1.0f / c;
2557 destination->p[(int)VERTEX_PARAMETER::PARAM_1W] = c;
2558 destination->p[(int)VERTEX_PARAMETER::PARAM_Z] = vert.attribute[0].fv[2]; // already divided by w
2559 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_R] = vert.attribute[3].fv[0] * c;
2560 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_G] = vert.attribute[3].fv[1] * c;
2561 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_B] = vert.attribute[3].fv[2] * c;
2562 destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_A] = vert.attribute[3].fv[3] * c;
2563 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R] = vert.attribute[4].fv[0] * c;
2564 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G] = vert.attribute[4].fv[1] * c;
2565 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B] = vert.attribute[4].fv[2] * c;
2566 destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A] = vert.attribute[4].fv[3] * c;
2567 for (u = 0; u < 4; u++) {
2568 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + u * 4] = vert.attribute[9 + u].fv[0] * c;
2569 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + u * 4] = vert.attribute[9 + u].fv[1] * c;
2570 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + u * 4] = vert.attribute[9 + u].fv[2] * c;
2571 destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + u * 4] = vert.attribute[9 + u].fv[3] * c;
2572 }
2573 }
2574 }
2575
clear_render_target(int what,uint32_t value)2576 void nv2a_renderer::clear_render_target(int what, uint32_t value)
2577 {
2578 int xi, yi, xf, yf;
2579 int x, y;
2580 uint32_t color;
2581 uint8_t *addr;
2582 uint32_t mask;
2583 uint32_t offset;
2584
2585 if (what == 0)
2586 return;
2587 mask = 0;
2588 switch (colorformat_rendertarget) {
2589 case NV2A_COLOR_FORMAT::R5G6B5:
2590 if (what & 1)
2591 mask = 0x1f;
2592 if (what & 2)
2593 mask = mask | 0x07e0;
2594 if (what & 4)
2595 mask = mask | 0xf800;
2596 break;
2597 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2598 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2599 for (x = 3; x >= 0; x--) {
2600 if (what & 8)
2601 mask = (mask << 8) | 255;
2602 what = what << 1;
2603 }
2604 break;
2605 case NV2A_COLOR_FORMAT::A8R8G8B8:
2606 for (x = 3; x >= 0; x--) {
2607 if (what & 8)
2608 mask = (mask << 8) | 255;
2609 what = what << 1;
2610 }
2611 break;
2612 case NV2A_COLOR_FORMAT::B8:
2613 if (what & 1)
2614 mask = 255;
2615 break;
2616 default:
2617 return;
2618 }
2619 xi = clear_rendertarget.left()*supersample_factor_x;
2620 yi = clear_rendertarget.top()*supersample_factor_y;
2621 xf = clear_rendertarget.right()*supersample_factor_x;
2622 yf = clear_rendertarget.bottom()*supersample_factor_y;
2623 if ((xi < limits_rendertarget.left()) && (xf < limits_rendertarget.left()))
2624 return;
2625 if ((xi > limits_rendertarget.right()) && (xf > limits_rendertarget.right()))
2626 return;
2627 if ((yi < limits_rendertarget.top()) && (yf < limits_rendertarget.top()))
2628 return;
2629 if ((yi > limits_rendertarget.bottom()) && (yf > limits_rendertarget.bottom()))
2630 return;
2631 if (xi < limits_rendertarget.left())
2632 xi = limits_rendertarget.left();
2633 if (xf > limits_rendertarget.right())
2634 xf = limits_rendertarget.right();
2635 if (yi < limits_rendertarget.top())
2636 yi = limits_rendertarget.top();
2637 if (yf > limits_rendertarget.bottom())
2638 yf = limits_rendertarget.bottom();
2639 if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
2640 offset = (dilated0[dilate_rendertarget][xf] + dilated1[dilate_rendertarget][yf]) * bytespixel_rendertarget;
2641 else // type_rendertarget == LINEAR
2642 offset = pitch_rendertarget * yf + xf * bytespixel_rendertarget;
2643 addr = (uint8_t *)rendertarget + offset;
2644 if ((addr < basemempointer) || (addr > topmempointer))
2645 {
2646 machine().logerror("Bad memory pointer computed in clear_render_target !\n");
2647 return;
2648 }
2649
2650 for (y = yi; y <= yf; y++)
2651 for (x = xi; x <= xf; x++) {
2652 if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
2653 offset = (dilated0[dilate_rendertarget][x] + dilated1[dilate_rendertarget][y]) * bytespixel_rendertarget;
2654 else // type_rendertarget == LINEAR
2655 offset = pitch_rendertarget * y + x * bytespixel_rendertarget;
2656 switch (colorformat_rendertarget) {
2657 case NV2A_COLOR_FORMAT::R5G6B5:
2658 addr = (uint8_t *)rendertarget + offset;
2659 color = *((uint16_t *)addr);
2660 break;
2661 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2662 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2663 addr = (uint8_t *)rendertarget + offset;
2664 color = *((uint32_t *)addr);
2665 break;
2666 case NV2A_COLOR_FORMAT::A8R8G8B8:
2667 addr = (uint8_t *)rendertarget + offset;
2668 color = *((uint32_t *)addr);
2669 break;
2670 case NV2A_COLOR_FORMAT::B8:
2671 addr = (uint8_t *)rendertarget + offset;
2672 color = *addr;
2673 break;
2674 default:
2675 return;
2676 }
2677 color = (color & ~mask) | (value & mask);
2678 switch (colorformat_rendertarget) {
2679 case NV2A_COLOR_FORMAT::R5G6B5:
2680 *((uint16_t *)addr) = color;
2681 break;
2682 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2683 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2684 *((uint32_t *)addr) = color;
2685 break;
2686 case NV2A_COLOR_FORMAT::A8R8G8B8:
2687 *((uint32_t *)addr) = color;
2688 break;
2689 case NV2A_COLOR_FORMAT::B8:
2690 *addr = color;
2691 break;
2692 default:
2693 return;
2694 }
2695 }
2696 #ifdef LOG_NV2A
2697 printf("clearscreen\n\r");
2698 #endif
2699 }
2700
clear_depth_buffer(int what,uint32_t value)2701 void nv2a_renderer::clear_depth_buffer(int what, uint32_t value)
2702 {
2703 int xi, yi, xf, yf;
2704 int x, y;
2705 uint32_t color;
2706 uint8_t *addr;
2707 uint32_t mask;
2708 uint32_t offset;
2709 uint32_t bpp;
2710
2711 if (what == 0)
2712 return;
2713 mask = 0;
2714 switch (depthformat_rendertarget) {
2715 case NV2A_RT_DEPTH_FORMAT::Z24S8:
2716 if (what & 1)
2717 mask = 0xffffff00;
2718 if (what & 2)
2719 mask = mask | 0xff;
2720 bpp = 4;
2721 break;
2722 case NV2A_RT_DEPTH_FORMAT::Z16:
2723 if (what & 1)
2724 mask = 0xffff;
2725 bpp = 2;
2726 break;
2727 default:
2728 return;
2729 }
2730 xi = clear_rendertarget.left()*supersample_factor_x;
2731 yi = clear_rendertarget.top()*supersample_factor_y;
2732 xf = clear_rendertarget.right()*supersample_factor_x;
2733 yf = clear_rendertarget.bottom()*supersample_factor_y;
2734 if ((xi < limits_rendertarget.left()) && (xf < limits_rendertarget.left()))
2735 return;
2736 if ((xi > limits_rendertarget.right()) && (xf > limits_rendertarget.right()))
2737 return;
2738 if ((yi < limits_rendertarget.top()) && (yf < limits_rendertarget.top()))
2739 return;
2740 if ((yi > limits_rendertarget.bottom()) && (yf > limits_rendertarget.bottom()))
2741 return;
2742 if (xi < limits_rendertarget.left())
2743 xi = limits_rendertarget.left();
2744 if (xf > limits_rendertarget.right())
2745 xf = limits_rendertarget.right();
2746 if (yi < limits_rendertarget.top())
2747 yi = limits_rendertarget.top();
2748 if (yf > limits_rendertarget.bottom())
2749 yf = limits_rendertarget.bottom();
2750 offset = pitch_depthbuffer * yf + xf * bpp;
2751 addr = (uint8_t *)depthbuffer + offset;
2752 if ((addr < basemempointer) || (addr > topmempointer))
2753 {
2754 machine().logerror("Bad memory pointer computed in clear_depth_buffer !\n");
2755 return;
2756 }
2757
2758 for (y = yi; y <= yf; y++)
2759 for (x = xi; x <= xf; x++) {
2760 offset = pitch_depthbuffer * y + x * bpp;
2761 switch (depthformat_rendertarget) {
2762 case NV2A_RT_DEPTH_FORMAT::Z16:
2763 addr = (uint8_t *)depthbuffer + offset;
2764 color = *((uint16_t *)addr);
2765 break;
2766 case NV2A_RT_DEPTH_FORMAT::Z24S8:
2767 addr = (uint8_t *)depthbuffer + offset;
2768 color = *((uint32_t *)addr);
2769 break;
2770 default:
2771 return;
2772 }
2773 color = (color & ~mask) | (value & mask);
2774 switch (depthformat_rendertarget) {
2775 case NV2A_RT_DEPTH_FORMAT::Z16:
2776 addr = (uint8_t *)depthbuffer + offset;
2777 *((uint16_t *)addr) = color;
2778 break;
2779 case NV2A_RT_DEPTH_FORMAT::Z24S8:
2780 addr = (uint8_t *)depthbuffer + offset;
2781 *((uint32_t *)addr) = color;
2782 break;
2783 default:
2784 return;
2785 }
2786 }
2787 }
2788
render_triangle_culling(const rectangle & cliprect,nv2avertex_t & _v1,nv2avertex_t & _v2,nv2avertex_t & _v3)2789 uint32_t nv2a_renderer::render_triangle_culling(const rectangle &cliprect, nv2avertex_t &_v1, nv2avertex_t &_v2, nv2avertex_t &_v3)
2790 {
2791 float areax2;
2792 NV2A_GL_CULL_FACE face = NV2A_GL_CULL_FACE::FRONT;
2793
2794 if (backface_culling_enabled == false)
2795 return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2796 if (backface_culling_culled == NV2A_GL_CULL_FACE::FRONT_AND_BACK)
2797 {
2798 triangles_bfculled++;
2799 return 0;
2800 }
2801 areax2 = _v1.x*(_v2.y - _v3.y) + _v2.x*(_v3.y - _v1.y) + _v3.x*(_v1.y - _v2.y);
2802 if (areax2 == 0.0f) {
2803 triangles_bfculled++;
2804 return 0;
2805 }
2806 if (backface_culling_winding == NV2A_GL_FRONT_FACE::CCW)
2807 {
2808 if (-areax2 <= 0)
2809 face = NV2A_GL_CULL_FACE::BACK;
2810 else
2811 face = NV2A_GL_CULL_FACE::FRONT;
2812 } else
2813 {
2814 if (areax2 <= 0)
2815 face = NV2A_GL_CULL_FACE::BACK;
2816 else
2817 face = NV2A_GL_CULL_FACE::FRONT;
2818 }
2819 if (face == NV2A_GL_CULL_FACE::FRONT)
2820 if (backface_culling_culled == NV2A_GL_CULL_FACE::BACK)
2821 return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2822 if (face == NV2A_GL_CULL_FACE::BACK)
2823 if (backface_culling_culled == NV2A_GL_CULL_FACE::FRONT)
2824 return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2825 triangles_bfculled++;
2826 return 0;
2827 }
2828
clip_triangle_w(nv2avertex_t vi[3],nv2avertex_t * vo)2829 int nv2a_renderer::clip_triangle_w(nv2avertex_t vi[3], nv2avertex_t *vo)
2830 {
2831 int idx_prev, idx_curr;
2832 int neg_prev, neg_curr;
2833 double tfactor;
2834 int idx;
2835 const double wthreshold = 0.000001;
2836
2837 idx_prev = 2;
2838 idx_curr = 0;
2839 idx = 0;
2840 neg_prev = vi[idx_prev].w < wthreshold ? 1 : 0;
2841 while (idx_curr < 3)
2842 {
2843 neg_curr = vi[idx_curr].w < wthreshold ? 1 : 0;
2844 if (neg_curr ^ neg_prev)
2845 {
2846 tfactor = (wthreshold - vi[idx_prev].w) / (vi[idx_curr].w - vi[idx_prev].w);
2847 // compute values for the new intermediate point
2848 vo[idx].x = ((vi[idx_curr].x - vi[idx_prev].x) * tfactor) + vi[idx_prev].x;
2849 vo[idx].y = ((vi[idx_curr].y - vi[idx_prev].y) * tfactor) + vi[idx_prev].y;
2850 vo[idx].w = ((vi[idx_curr].w - vi[idx_prev].w) * tfactor) + vi[idx_prev].w;
2851 for (int n = 0; n < (int)VERTEX_PARAMETER::PARAM_Z; n++)
2852 vo[idx].p[n] = ((vi[idx_curr].p[n] - vi[idx_prev].p[n]) * tfactor) + vi[idx_prev].p[n];
2853 vo[idx].p[(int)VERTEX_PARAMETER::PARAM_Z] = ((vi[idx_curr].p[(int)VERTEX_PARAMETER::PARAM_Z] - vi[idx_prev].p[(int)VERTEX_PARAMETER::PARAM_Z]) * tfactor) + vi[idx_prev].p[(int)VERTEX_PARAMETER::PARAM_Z];
2854 vo[idx].p[(int)VERTEX_PARAMETER::PARAM_1W] = 1.0f / vo[idx].w;
2855 idx++;
2856 }
2857 if (neg_curr == 0)
2858 {
2859 vo[idx].x = vi[idx_curr].x;
2860 vo[idx].y = vi[idx_curr].y;
2861 vo[idx].w = vi[idx_curr].w;
2862 for (int n = 0; n < (int)VERTEX_PARAMETER::PARAM_Z; n++)
2863 vo[idx].p[n] = vi[idx_curr].p[n];
2864 vo[idx].p[(int)VERTEX_PARAMETER::PARAM_Z] = vi[idx_curr].p[(int)VERTEX_PARAMETER::PARAM_Z];
2865 vo[idx].p[(int)VERTEX_PARAMETER::PARAM_1W] = 1.0f / vo[idx].w;
2866 idx++;
2867 }
2868 neg_prev = neg_curr;
2869 idx_prev = idx_curr;
2870 idx_curr++;
2871 }
2872 return idx;
2873 }
2874
render_triangle_clipping(const rectangle & cliprect,nv2avertex_t & _v1,nv2avertex_t & _v2,nv2avertex_t & _v3)2875 uint32_t nv2a_renderer::render_triangle_clipping(const rectangle &cliprect, nv2avertex_t &_v1, nv2avertex_t &_v2, nv2avertex_t &_v3)
2876 {
2877 nv2avertex_t *vp[3];
2878 nv2avertex_t vi[3];
2879 nv2avertex_t vo[8];
2880 int nv;
2881 double c;
2882
2883 if ((_v1.w > 0) && (_v2.w > 0) && (_v3.w > 0))
2884 return render_triangle_culling(cliprect, _v1, _v2, _v3);
2885 if (enable_clipping_w == false)
2886 return 0;
2887 if ((_v1.w <= 0) && (_v2.w <= 0) && (_v3.w <= 0))
2888 return 0;
2889 // assign the elements of the pointer array
2890 vp[0] = &_v1;
2891 vp[1] = &_v2;
2892 vp[2] = &_v3;
2893 // go back to the state before perpective divide
2894 if (vertex_pipeline == 4)
2895 {
2896 for (int n = 0; n < 3; n++)
2897 {
2898 c = vp[n]->w;
2899 vi[n].w = c;
2900 vi[n].x = (vp[n]->x / (double)supersample_factor_x) * c;
2901 vi[n].y = (vp[n]->y / (double)supersample_factor_y) * c;
2902 for (int nn = 0; nn <= (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2903 vi[n].p[nn] = vp[n]->p[nn] * c;
2904 }
2905 } else
2906 {
2907 for (int n = 0; n < 3; n++)
2908 {
2909 c = vp[n]->w;
2910 vi[n].w = c;
2911 // remove perspective correct interpolate
2912 for (int nn = 0; nn < (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2913 vi[n].p[nn] = vp[n]->p[nn] * c;
2914 // remove supersample
2915 vi[n].x = (vp[n]->x / supersample_factor_x) + 0.53125f;
2916 vi[n].y = (vp[n]->y / supersample_factor_y) + 0.53125f;
2917 // remove translate
2918 vi[n].x = vi[n].x - matrix.translate[0];
2919 vi[n].y = vi[n].y - matrix.translate[1];
2920 vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vp[n]->p[(int)VERTEX_PARAMETER::PARAM_Z] - matrix.translate[2];
2921 // remove perspective divide
2922 vi[n].x = vi[n].x * c;
2923 vi[n].y = vi[n].y * c;
2924 vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] * c;
2925 }
2926 }
2927 // do the clipping
2928 nv = clip_triangle_w(vi, vo);
2929 // screen coordinates for the new points
2930 if (vertex_pipeline == 4)
2931 {
2932 for (int n = 0; n < nv; n++)
2933 {
2934 c = 1 / vo[n].w;
2935 vo[n].x = vo[n].x * (double)supersample_factor_x * c;
2936 vo[n].y = vo[n].y * (double)supersample_factor_y * c;
2937 for (int nn = 0; nn <= (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2938 vo[n].p[nn] = vo[n].p[nn] * c;
2939 }
2940 } else
2941 {
2942 for (int n = 0; n < nv; n++)
2943 {
2944 c = 1 / vo[n].w;
2945 // apply perspective divide
2946 vo[n].x = vo[n].x * c;
2947 vo[n].y = vo[n].y * c;
2948 vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] * c;
2949 // apply translate
2950 vo[n].x = vo[n].x + matrix.translate[0];
2951 vo[n].y = vo[n].y + matrix.translate[1];
2952 vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] + matrix.translate[2];
2953 // apply supersample
2954 vo[n].x = (vo[n].x - 0.53125f) * supersample_factor_x;
2955 vo[n].y = (vo[n].y - 0.53125f) * supersample_factor_y;
2956 // apply perspective correct interpolate
2957 for (int nn = 0; nn < (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2958 vo[n].p[nn] = vo[n].p[nn] * c;
2959 }
2960 }
2961 for (int n = 1; n <= (nv - 2); n++)
2962 render_triangle_culling(cliprect, vo[0], vo[n], vo[n + 1]);
2963 return 0;
2964 }
2965
assemble_primitive(int source,int count)2966 void nv2a_renderer::assemble_primitive(int source, int count)
2967 {
2968 uint32_t pc = primitives_count;
2969 vertex_nv *v;
2970
2971 for (; count > 0; count--) {
2972 v = &vertex_software[source];
2973 if (primitive_type == NV2A_BEGIN_END::QUADS) {
2974 convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
2975 vertex_accumulated++;
2976 if (vertex_accumulated == 4) {
2977 primitives_count++;
2978 vertex_accumulated = 0;
2979 render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[vertex_count + 1], vertex_xy[vertex_count + 2]);
2980 render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[vertex_count + 2], vertex_xy[vertex_count + 3]);
2981 vertex_count = (vertex_count + 4) & 1023;
2982 rasterizer.wait();
2983 }
2984 }
2985 else if (primitive_type == NV2A_BEGIN_END::TRIANGLES) {
2986 convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
2987 vertex_accumulated++;
2988 if (vertex_accumulated == 3) {
2989 primitives_count++;
2990 vertex_accumulated = 0;
2991 render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 2) & 1023]); // 4 rgba, 4 texture units 2 uv
2992 vertex_count = (vertex_count + 3) & 1023;
2993 rasterizer.wait();
2994 }
2995 }
2996 else if (primitive_type == NV2A_BEGIN_END::TRIANGLE_FAN) {
2997 if (vertex_accumulated == 0)
2998 {
2999 convert_vertices(v, vertex_xy + 1024);
3000 vertex_accumulated = 1;
3001 }
3002 else if (vertex_accumulated == 1)
3003 {
3004 convert_vertices(v, vertex_xy);
3005 vertex_accumulated = 2;
3006 vertex_count = 1;
3007 }
3008 else
3009 {
3010 primitives_count++;
3011 // if software sends the vertices 0 1 2 3 4 5 6
3012 // hardware will draw triangles made by (0,1,2) (0,2,3) (0,3,4) (0,4,5) (0,5,6)
3013 convert_vertices(v, vertex_xy + vertex_count);
3014 render_triangle_clipping(limits_rendertarget, vertex_xy[1024], vertex_xy[(vertex_count - 1) & 1023], vertex_xy[vertex_count]);
3015 vertex_count = (vertex_count + 1) & 1023;
3016 rasterizer.wait();
3017 }
3018 }
3019 else if (primitive_type == NV2A_BEGIN_END::TRIANGLE_STRIP) {
3020 if (vertex_accumulated == 0)
3021 {
3022 convert_vertices(v, vertex_xy);
3023 vertex_accumulated = 1;
3024 }
3025 else if (vertex_accumulated == 1)
3026 {
3027 convert_vertices(v, vertex_xy + 1);
3028 vertex_accumulated = 2;
3029 vertex_count = 2;
3030 }
3031 else
3032 {
3033 primitives_count++;
3034 // if software sends the vertices 0 1 2 3 4 5 6
3035 // hardware will draw triangles made by (0,1,2) (1,3,2) (2,3,4) (3,5,4) (4,5,6)
3036 convert_vertices(v, vertex_xy + vertex_count);
3037 if ((vertex_count & 1) == 0)
3038 render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count - 2) & 1023], vertex_xy[(vertex_count - 1) & 1023], vertex_xy[vertex_count]);
3039 else
3040 render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count - 2) & 1023], vertex_xy[vertex_count], vertex_xy[(vertex_count - 1) & 1023]);
3041 vertex_count = (vertex_count + 1) & 1023;
3042 rasterizer.wait();
3043 }
3044 }
3045 else if (primitive_type == NV2A_BEGIN_END::QUAD_STRIP) {
3046 if (vertex_accumulated == 0)
3047 {
3048 convert_vertices(v, vertex_xy);
3049 vertex_accumulated = 1;
3050 }
3051 else if (vertex_accumulated == 1)
3052 {
3053 convert_vertices(v, vertex_xy + 1);
3054 vertex_accumulated = 2;
3055 vertex_count = 0;
3056 }
3057 else
3058 {
3059 convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
3060 vertex_accumulated++;
3061 if (vertex_accumulated == 4)
3062 {
3063 primitives_count++;
3064 // if software sends the vertices 0 1 2 3 4 5 6 7
3065 // hardware will draw triangles made by (0,1,2) (2,1,3) (2,3,4) (4,3,5) (4,5,6) (6,5,7)
3066 render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 2) & 1023]);
3067 render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count + 2) & 1023], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 3) & 1023]);
3068 vertex_accumulated = 2;
3069 vertex_count = (vertex_count + 2) & 1023;
3070 rasterizer.wait();
3071 }
3072 }
3073 }
3074 else {
3075 if (vertex_count == 0)
3076 machine().logerror("Unsupported primitive %d\n", int(primitive_type));
3077 vertex_count++;
3078 }
3079 source = (source + 1) & 1023;
3080 }
3081 primitives_total_count += primitives_count - pc;
3082 }
3083
process_persistent_vertex()3084 void nv2a_renderer::process_persistent_vertex()
3085 {
3086 memcpy(&vertex_software[1025], &persistvertexattr, sizeof(persistvertexattr));
3087 assemble_primitive(1025, 1);
3088 }
3089
compute_limits_rendertarget(uint32_t chanel,uint32_t subchannel)3090 void nv2a_renderer::compute_limits_rendertarget(uint32_t chanel, uint32_t subchannel)
3091 {
3092 uint32_t data;
3093 int x, w;
3094 int y, h;
3095
3096 data = channel[chanel][subchannel].object.method[0x0200 / 4];
3097 x = data & 0xffff;
3098 w = (data >> 16) & 0xffff;
3099 x = x*supersample_factor_x;
3100 w = w*supersample_factor_x;
3101 limits_rendertarget.setx(x, x + w - 1);
3102 data = channel[chanel][subchannel].object.method[0x0204 / 4];
3103 y = data & 0xffff;
3104 h = (data >> 16) & 0xffff;
3105 y = y*supersample_factor_y;
3106 h = h*supersample_factor_y;
3107 limits_rendertarget.sety(y, y + h - 1);
3108 }
3109
compute_size_rendertarget(uint32_t chanel,uint32_t subchannel)3110 void nv2a_renderer::compute_size_rendertarget(uint32_t chanel, uint32_t subchannel)
3111 {
3112 size_rendertarget = pitch_rendertarget*(limits_rendertarget.bottom() + 1);
3113 size_depthbuffer = pitch_depthbuffer*(limits_rendertarget.bottom() + 1);
3114 }
3115
execute_method(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,int & countlen)3116 int nv2a_renderer::execute_method(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, int &countlen)
3117 {
3118 uint32_t data;
3119
3120 data = space.read_dword(address);
3121 channel[chanel][subchannel].object.method[method / 4] = data;
3122 #ifdef LOG_NV2A
3123 //printf("A:%08X CH=%02d SCH=%02d MTHD:%08X D:%08X\n\r",address,chanel,subchannel,maddress,data);
3124 #endif
3125 if (channel[chanel][subchannel].object.objclass == 0x97)
3126 return execute_method_3d(space, chanel, subchannel, method, address, data, countlen);
3127 if (channel[chanel][subchannel].object.objclass == 0x39) // 0180
3128 return execute_method_m2mf(space, chanel, subchannel, method, address, data, countlen);
3129 if (channel[chanel][subchannel].object.objclass == 0x62) // 0184 0188
3130 return execute_method_surf2d(space, chanel, subchannel, method, address, data, countlen);
3131 if (channel[chanel][subchannel].object.objclass == 0x9f) // 019c 02fc
3132 return execute_method_blit(space, chanel, subchannel, method, address, data, countlen);
3133 return 0;
3134 }
3135
execute_method_3d(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t maddress,uint32_t address,uint32_t data,int & countlen)3136 int nv2a_renderer::execute_method_3d(address_space& space, uint32_t chanel, uint32_t subchannel, uint32_t maddress, uint32_t address, uint32_t data, int &countlen)
3137 {
3138 if ((chanel != 0) || (subchannel != 0))
3139 return 0;
3140 if (maddress == 0x17fc) {
3141 #if 0 // useful while debugging to see what coordinates have been used
3142 static int debugvc = 0;
3143 if (debugvc)
3144 if (data == 0)
3145 {
3146 printf("%d %d\n\r", (int)primitive_type, vertex_first);
3147 for (int n = 0; n < vertex_first; n++)
3148 {
3149 if (indexesleft_count > 0)
3150 printf("%d i:%d ", n, vertex_indexes[n]);
3151 else
3152 printf("%d ", n);
3153 printf("X:%f Y:%f Z:%f W:%f x:%f y:%f\n\r", vertex_software[n].attribute[0].fv[0], vertex_software[n].attribute[0].fv[1], vertex_software[n].attribute[0].fv[2], vertex_software[n].attribute[0].fv[3], vertex_xy[n].x, vertex_xy[n].y);
3154 }
3155 }
3156 #endif
3157 vertex_count = 0;
3158 vertex_first = 0;
3159 vertex_accumulated = 0;
3160 indexesleft_count = 0;
3161 indexesleft_first = 0;
3162 primitives_count = 0;
3163 primitive_type = (NV2A_BEGIN_END)data;
3164 if (data == 0)
3165 primitives_batches_count++;
3166 else
3167 {
3168 if (((channel[chanel][subchannel].object.method[0x1e60 / 4] & 7) > 0) && (combiner.used != 0))
3169 render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_register_combiners, this);
3170 else if (texture[0].enabled)
3171 render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_texture_simple, this);
3172 else
3173 render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_color, this);
3174 }
3175 countlen--;
3176 }
3177 if (maddress == 0x1810) {
3178 // draw vertices
3179 int offset, count;
3180 uint32_t n;
3181
3182 offset = data & 0xffffff;
3183 count = (data >> 24) & 0xff;
3184 #ifdef LOG_NV2A
3185 printf("vertex %d %d\n\r", offset, count);
3186 #endif
3187 for (n = 0; n <= count; n++) {
3188 read_vertices_0x1810(space, vertex_first, n + offset, 1);
3189 assemble_primitive(vertex_first, 1);
3190 vertex_first = (vertex_first + 1) & 1023;
3191 }
3192 countlen--;
3193 }
3194 if ((maddress == 0x1800) || (maddress == 0x1808)) {
3195 int mult;
3196
3197 if (maddress == 0x1800)
3198 mult = 2;
3199 else
3200 mult = 1;
3201 // vertices are selected from the vertex buffer using an array of indexes
3202 // each dword after 1800 contains two 16 bit index values to select the vartices
3203 // each dword after 1808 contains a 32 bit index value to select the vartices
3204 while (countlen > 0) {
3205 int n;
3206
3207 data = space.read_dword(address);
3208 n = indexesleft_first + indexesleft_count;
3209 if (mult == 2) {
3210 vertex_indexes[n & 1023] = data & 0xffff;
3211 vertex_indexes[(n + 1) & 1023] = (data >> 16) & 0xffff;
3212 indexesleft_count = indexesleft_count + 2;
3213 }
3214 else {
3215 vertex_indexes[n & 1023] = data;
3216 indexesleft_count = indexesleft_count + 1;
3217 }
3218 address += 4;
3219 countlen--;
3220 read_vertices_0x180x(space, vertex_first, address, mult);
3221 assemble_primitive(vertex_first, mult);
3222 vertex_first = (vertex_first + mult) & 1023;
3223 }
3224 }
3225 if (maddress == 0x1818) {
3226 if (countlen == 0)
3227 machine().logerror("Method 0x1818 with 0 vertices\n");
3228 // vertices are taken from the next words, not from a vertex buffer
3229 // first send primitive type with 17fc
3230 // then countlen number of dwords with 1818
3231 // end with 17fc primitive type 0
3232 // at 1760 16 words specify the vertex format:for each possible vertex attribute the number of components (0=not present) and type of each
3233 while (countlen > 0) {
3234 int c;
3235
3236 c = read_vertices_0x1818(space, vertex_first, address, 1);
3237 countlen = countlen - c;
3238 if (countlen < 0) {
3239 machine().logerror("Method 0x1818 missing %d words\n", -countlen);
3240 countlen = 0;
3241 break;
3242 }
3243 address = address + c * 4;
3244 assemble_primitive(vertex_first, 1);
3245 vertex_first = (vertex_first + 1) & 1023;
3246 }
3247 }
3248 if ((maddress >= 0x1880) && (maddress < 0x1900))
3249 {
3250 int v = maddress - 0x1880; // 16 couples,2 float per couple,16*2*4=128
3251 int attr = v >> 3;
3252 int comp = (v >> 2) & 1;
3253
3254 persistvertexattr.attribute[attr].iv[comp] = data;
3255 if (comp == 1)
3256 {
3257 persistvertexattr.attribute[attr].fv[2] = 0;
3258 persistvertexattr.attribute[attr].fv[3] = 1;
3259 if (attr == 0)
3260 process_persistent_vertex();
3261 }
3262 }
3263 if ((maddress >= 0x1900) && (maddress < 0x1940))
3264 {
3265 int v = maddress - 0x1900; // 16 dwords,2 values per dword
3266 int attr = v >> 2;
3267 uint16_t d1 = data & 0xffff;
3268 uint16_t d2 = data >> 16;
3269
3270 persistvertexattr.attribute[attr].fv[0] = (float)((int16_t)d1);
3271 persistvertexattr.attribute[attr].fv[1] = (float)((int16_t)d2);
3272 persistvertexattr.attribute[attr].fv[2] = 0;
3273 persistvertexattr.attribute[attr].fv[3] = 1;
3274 if (attr == 0)
3275 process_persistent_vertex();
3276 }
3277 if ((maddress >= 0x1940) && (maddress < 0x1980))
3278 {
3279 int v = maddress - 0x1940; // 16 dwords,4 values per dword
3280 int attr = v >> 2;
3281 uint8_t d1 = data & 255;
3282 uint8_t d2 = (data >> 8) & 255;
3283 uint8_t d3 = (data >> 16) & 255;
3284 uint8_t d4 = data >> 24;
3285
3286 // if sending color dword is aabbggrr
3287 persistvertexattr.attribute[attr].fv[0] = (float)d1 / 255.0;
3288 persistvertexattr.attribute[attr].fv[1] = (float)d2 / 255.0;
3289 persistvertexattr.attribute[attr].fv[2] = (float)d3 / 255.0;
3290 persistvertexattr.attribute[attr].fv[3] = (float)d4 / 255.0;
3291 if (attr == 0)
3292 process_persistent_vertex();
3293 }
3294 if ((maddress >= 0x1980) && (maddress < 0x1a00))
3295 {
3296 int v = maddress - 0x1980; // 16 couples,4 values per couple,16*2*4=128
3297 int attr = v >> 3;
3298 int comp = (v >> 1) & 2;
3299 uint16_t d1 = data & 0xffff;
3300 uint16_t d2 = data >> 16;
3301
3302 persistvertexattr.attribute[attr].fv[comp] = (float)((int16_t)d1);
3303 persistvertexattr.attribute[attr].fv[comp+1] = (float)((int16_t)d2);
3304 if (comp == 2)
3305 if (attr == 0)
3306 process_persistent_vertex();
3307 }
3308 if ((maddress >= 0x1a00) && (maddress < 0x1b00))
3309 {
3310 int v = maddress - 0x1a00; // 16 groups,4 float per group
3311 int attr = v >> 4;
3312 int comp = (v >> 2) & 3;
3313
3314 persistvertexattr.attribute[attr].iv[comp] = data;
3315 if (comp == 3)
3316 if (attr == 0)
3317 process_persistent_vertex();
3318 }
3319 if ((maddress >= 0x1518) && (maddress < 0x1528))
3320 {
3321 int v = maddress - 0x1518;
3322 int comp = v >> 2;
3323
3324 persistvertexattr.attribute[(int)NV2A_VERTEX_ATTR::POS].iv[comp] = data;
3325 if (comp == 3)
3326 process_persistent_vertex();
3327 }
3328 else if ((maddress >= 0x1500) && (maddress < 0x1590))
3329 {
3330 machine().logerror("Yet unsupported method %x\n\r", maddress);
3331 }
3332 if ((maddress >= 0x1720) && (maddress < 0x1760)) {
3333 int bit = maddress / 4 - 0x1720 / 4;
3334
3335 if (data & 0x80000000)
3336 vertexbuffer.address[bit] = (data & 0x0fffffff) + dma_offset[7];
3337 else
3338 vertexbuffer.address[bit] = (data & 0x0fffffff) + dma_offset[6];
3339 }
3340 if ((maddress >= 0x1760) && (maddress < 0x17A0)) {
3341 int bit = maddress / 4 - 0x1760 / 4;
3342
3343 vertexbuffer.type[bit] = data & 255;
3344 vertexbuffer.stride[bit] = (data >> 8) & 255;
3345 switch (vertexbuffer.type[bit])
3346 {
3347 case 0x02: // none
3348 vertexbuffer.words[bit] = 0;
3349 break;
3350 case 0x12: // float1
3351 vertexbuffer.words[bit] = 1;
3352 break;
3353 case 0x16: // normpacked3
3354 vertexbuffer.words[bit] = 1;
3355 break;
3356 case 0x22: // float2
3357 vertexbuffer.words[bit] = 2;
3358 break;
3359 case 0x32: // float3
3360 vertexbuffer.words[bit] = 3;
3361 break;
3362 case 0x40: // d3dcolor
3363 vertexbuffer.words[bit] = 1;
3364 break;
3365 case 0x42: // float4
3366 vertexbuffer.words[bit] = 4;
3367 break;
3368 default:
3369 machine().logerror("Yet unsupported vertex data type %x\n\r", vertexbuffer.type[bit]);
3370 vertexbuffer.words[bit] = 0;
3371 }
3372 if (vertexbuffer.words[bit] > 0)
3373 vertexbuffer.enabled |= (1 << bit);
3374 else
3375 vertexbuffer.enabled &= ~(1 << bit);
3376 vertexbuffer.offset[0] = 0;
3377 for (int n = bit + 1; n <= 16; n++) {
3378 if ((vertexbuffer.enabled & (1 << (n - 1))) != 0)
3379 vertexbuffer.offset[n] = vertexbuffer.offset[n - 1] + vertexbuffer.words[n - 1];
3380 else
3381 vertexbuffer.offset[n] = vertexbuffer.offset[n - 1];
3382 }
3383 countlen--;
3384 }
3385 if ((maddress == 0x1d6c) || (maddress == 0x1a4))
3386 countlen--;
3387 if (maddress == 0x0308) {
3388 backface_culling_enabled = data != 0 ? true : false;
3389 }
3390 if (maddress == 0x03a0) {
3391 backface_culling_winding = (NV2A_GL_FRONT_FACE)data;
3392 }
3393 if (maddress == 0x039c) {
3394 backface_culling_culled = (NV2A_GL_CULL_FACE)data;
3395 }
3396 if (maddress == 0x0180) {
3397 geforce_read_dma_object(data, dma_offset[0], dma_size[0]);
3398 }
3399 if (maddress == 0x0184) {
3400 geforce_read_dma_object(data, dma_offset[1], dma_size[1]);
3401 }
3402 if (maddress == 0x0188) {
3403 geforce_read_dma_object(data, dma_offset[2], dma_size[2]);
3404 }
3405 if (maddress == 0x0190) {
3406 geforce_read_dma_object(data, dma_offset[3], dma_size[3]);
3407 }
3408 if (maddress == 0x0194) {
3409 geforce_read_dma_object(data, dma_offset[4], dma_size[4]);
3410 }
3411 if (maddress == 0x0198) {
3412 geforce_read_dma_object(data, dma_offset[5], dma_size[5]);
3413 }
3414 if (maddress == 0x019c) {
3415 geforce_read_dma_object(data, dma_offset[6], dma_size[6]);
3416 }
3417 if (maddress == 0x01a0) {
3418 geforce_read_dma_object(data, dma_offset[7], dma_size[7]);
3419 }
3420 if (maddress == 0x01a4) {
3421 geforce_read_dma_object(data, dma_offset[8], dma_size[8]);
3422 }
3423 if (maddress == 0x01a8) {
3424 geforce_read_dma_object(data, dma_offset[9], dma_size[9]);
3425 }
3426 if (maddress == 0x1d70) {
3427 // with 1d70 write the value at offest [1d6c] inside dma object [1a4]
3428 uint32_t offset, base;
3429 uint32_t dmahand, dmaoff, smasiz;
3430
3431 offset = channel[chanel][subchannel].object.method[0x1d6c / 4];
3432 dmahand = channel[chanel][subchannel].object.method[0x1a4 / 4];
3433 geforce_read_dma_object(dmahand, dmaoff, smasiz);
3434 base = dmaoff;
3435 space.write_dword(base + offset, data);
3436 // software expects to find the parameter of this method at pgraph offset b10
3437 pgraph[0xb10 / 4] = data << 2;
3438 countlen--;
3439 }
3440 if (maddress == 0x1d7c) {
3441 antialias_control = data;
3442 compute_supersample_factors(supersample_factor_x, supersample_factor_y);
3443 compute_limits_rendertarget(chanel, subchannel);
3444 countlen--;
3445 }
3446 if (maddress == 0x1d98) {
3447 int x, w;
3448
3449 x = data & 0xffff;
3450 w = (data >> 16) & 0xffff;
3451 clear_rendertarget.setx(x, w);
3452 countlen--;
3453 }
3454 if (maddress == 0x1d9c) {
3455 int y, h;
3456
3457 y = data & 0xffff;
3458 h = (data >> 16) & 0xffff;
3459 clear_rendertarget.sety(y, h);
3460 countlen--;
3461 }
3462 if (maddress == 0x1d94) {
3463 // possible buffers: color, depth, stencil
3464 // clear framebuffer
3465 clear_render_target((data >> 4) & 15, channel[chanel][subchannel].object.method[0x1d90 / 4]);
3466 clear_depth_buffer(data & 3, channel[chanel][subchannel].object.method[0x1d8c / 4]);
3467 countlen--;
3468 }
3469 if ((maddress >= 0x02c0) && (maddress < 0x2e0)) {
3470 int x, w, i;
3471
3472 i = (maddress - 0x2c0) / 4;
3473 x = data & 0xffff;
3474 w = (data >> 16) & 0xffff;
3475 clippingwindows[i].setx(x, x + w - 1);
3476 }
3477 if ((maddress >= 0x02e0) && (maddress < 0x300)) {
3478 int y, h, i;
3479
3480 i = (maddress - 0x2e0) / 4;
3481 y = data & 0xffff;
3482 h = (data >> 16) & 0xffff;
3483 clippingwindows[i].sety(y, y + h - 1);
3484 }
3485 if (maddress == 0x0200) {
3486 compute_limits_rendertarget(chanel, subchannel);
3487 compute_size_rendertarget(chanel, subchannel);
3488 }
3489 if (maddress == 0x0204) {
3490 compute_limits_rendertarget(chanel, subchannel);
3491 compute_size_rendertarget(chanel, subchannel);
3492 }
3493 if (maddress == 0x0208) {
3494 log2height_rendertarget = (data >> 24) & 255;
3495 log2width_rendertarget = (data >> 16) & 255;
3496 antialiasing_rendertarget = (data >> 12) & 15;
3497 type_rendertarget = (NV2A_RT_TYPE)((data >> 8) & 15);
3498 depthformat_rendertarget = (NV2A_RT_DEPTH_FORMAT)((data >> 4) & 15);
3499 colorformat_rendertarget = (NV2A_COLOR_FORMAT)((data >> 0) & 15);
3500 compute_supersample_factors(supersample_factor_x, supersample_factor_y);
3501 compute_limits_rendertarget(chanel, subchannel);
3502 compute_size_rendertarget(chanel, subchannel);
3503 /* for debugging
3504 if (limits_rendertarget.max_x == 1023)
3505 type_rendertarget = NV2A_RT_TYPE::LINEAR;
3506 */
3507 switch (colorformat_rendertarget) {
3508 case NV2A_COLOR_FORMAT::R5G6B5:
3509 bytespixel_rendertarget = 2;
3510 break;
3511 case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
3512 case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
3513 case NV2A_COLOR_FORMAT::A8R8G8B8:
3514 bytespixel_rendertarget = 4;
3515 break;
3516 case NV2A_COLOR_FORMAT::B8:
3517 bytespixel_rendertarget = 1;
3518 break;
3519 default:
3520 machine().logerror("Unknown render target color format %d\n\r", int(colorformat_rendertarget));
3521 bytespixel_rendertarget = 4;
3522 break;
3523 }
3524 dilate_rendertarget = dilatechose[(log2width_rendertarget << 4) + log2height_rendertarget];
3525 }
3526 if (maddress == 0x020c) {
3527 pitch_rendertarget=data & 0xffff;
3528 pitch_depthbuffer=(data >> 16) & 0xffff;
3529 compute_size_rendertarget(chanel, subchannel);
3530 #ifdef LOG_NV2A
3531 printf("Pitch color %04X zbuffer %04X\n\r", pitch_rendertarget, pitch_depthbuffer);
3532 #endif
3533 countlen--;
3534 }
3535 if (maddress == 0x0100) {
3536 countlen--;
3537 if (data != 0) {
3538 #ifdef LOG_NV2A
3539 machine().logerror("Software method %04x\n", data);
3540 #endif
3541 pgraph[0x704 / 4] = 0x100 | (chanel << 20) | (subchannel << 16);
3542 pgraph[0x708 / 4] = data;
3543 pgraph[0x100 / 4] |= 1;
3544 pgraph[0x108 / 4] |= 1;
3545 if (update_interrupts() == true)
3546 irq_callback(1); // IRQ 3
3547 else
3548 irq_callback(0); // IRQ 3
3549 return 2;
3550 }
3551 else
3552 return 0;
3553 }
3554 if (maddress == 0x0130) {
3555 countlen--;
3556 if (enable_waitvblank == true)
3557 return 1; // block until next vblank
3558 else
3559 return 0;
3560 }
3561 if (maddress == 0x1d8c) {
3562 countlen--;
3563 // it is used to specify the clear value for the depth buffer (zbuffer)
3564 // but also as a parameter for interrupt routines
3565 pgraph[0x1a88 / 4] = data;
3566 }
3567 if (maddress == 0x1d90) {
3568 countlen--;
3569 // it is used to specify the clear value for the color buffer
3570 // but also as a parameter for interrupt routines
3571 pgraph[0x186c / 4] = data;
3572 }
3573 if (maddress == 0x0210) {
3574 // framebuffer offset
3575 old_rendertarget = rendertarget;
3576 // To see it with the image watch extension: @mem(0x000002d2263af060, UINT8, 4, 640, 480, 2560)
3577 rendertarget = (uint32_t *)direct_access_ptr(data);
3578 #ifdef LOG_NV2A
3579 printf("Render target at %08X\n\r", data);
3580 #endif
3581 countlen--;
3582 }
3583 if (maddress == 0x0214) {
3584 // zbuffer offset ?
3585 depthbuffer = (uint32_t *)direct_access_ptr(data);
3586 #ifdef LOG_NV2A
3587 printf("Depth buffer at %08X\n\r",data);
3588 #endif
3589 if ((data == 0) || (data > 0x7ffffffc))
3590 depth_write_enabled = false;
3591 else if (channel[chanel][subchannel].object.method[0x035c / 4] != 0)
3592 depth_write_enabled = true;
3593 else
3594 depth_write_enabled = false;
3595 countlen--;
3596 }
3597 if (maddress == 0x0300) {
3598 alpha_test_enabled = data != 0;
3599 }
3600 if (maddress == 0x033c) {
3601 alpha_func = (NV2A_COMPARISON_OP)data;
3602 }
3603 if (maddress == 0x0340) {
3604 alpha_reference = data;
3605 }
3606 if (maddress == 0x0304) {
3607 if (logical_operation_enabled)
3608 blending_enabled = false;
3609 else
3610 blending_enabled = data != 0;
3611 }
3612 if (maddress == 0x030c) {
3613 depth_test_enabled = data != 0;
3614 }
3615 if (maddress == 0x0354) {
3616 depth_function = (NV2A_COMPARISON_OP)data;
3617 }
3618 if (maddress == 0x0358) {
3619 //color_mask = data;
3620 if (data & 0x000000ff)
3621 data |= 0x000000ff;
3622 if (data & 0x0000ff00)
3623 data |= 0x0000ff00;
3624 if (data & 0x00ff0000)
3625 data |= 0x00ff0000;
3626 if (data & 0xff000000)
3627 data |= 0xff000000;
3628 color_mask = data;
3629 }
3630 if (maddress == 0x035c) {
3631 uint32_t g = channel[chanel][subchannel].object.method[0x0214 / 4];
3632 depth_write_enabled = data != 0;
3633 if ((g == 0) || (g > 0x7ffffffc))
3634 depth_write_enabled = false;
3635 }
3636 if (maddress == 0x032c) {
3637 stencil_test_enabled = data != 0;
3638 }
3639 if (maddress == 0x0364) {
3640 stencil_func = (NV2A_COMPARISON_OP)data;
3641 }
3642 if (maddress == 0x0368) {
3643 if (data > 255)
3644 data = 255;
3645 stencil_ref = data;
3646 }
3647 if (maddress == 0x036c) {
3648 stencil_mask = data;
3649 }
3650 if (maddress == 0x0370) {
3651 stencil_op_fail = (NV2A_STENCIL_OP)data;
3652 }
3653 if (maddress == 0x0374) {
3654 stencil_op_zfail = (NV2A_STENCIL_OP)data;
3655 }
3656 if (maddress == 0x0378) {
3657 stencil_op_zpass = (NV2A_STENCIL_OP)data;
3658 }
3659 if (maddress == 0x0344) {
3660 blend_function_source = (NV2A_BLEND_FACTOR)data;
3661 }
3662 if (maddress == 0x0348) {
3663 blend_function_destination = (NV2A_BLEND_FACTOR)data;
3664 }
3665 if (maddress == 0x034c) {
3666 blend_color = data;
3667 }
3668 if (maddress == 0x0350) {
3669 blend_equation = (NV2A_BLEND_EQUATION)data;
3670 }
3671 if (maddress == 0x0d40) {
3672 if (data != 0)
3673 blending_enabled = false;
3674 else
3675 blending_enabled = channel[chanel][subchannel].object.method[0x0304 / 4] != 0;
3676 logical_operation_enabled = data != 0;
3677 }
3678 if (maddress == 0x0d44) {
3679 logical_operation = (NV2A_LOGIC_OP)data;
3680 }
3681 // Texture Units
3682 if ((maddress >= 0x1b00) && (maddress < 0x1c00)) {
3683 int unit;//,off;
3684
3685 unit = (maddress >> 6) & 3;
3686 //off=maddress & 0xc0;
3687 maddress = maddress & ~0xc0;
3688 if (maddress == 0x1b00) {
3689 uint32_t offset;//,base;
3690 //uint32_t dmahand,dmaoff,dmasiz;
3691
3692 offset = data;
3693 texture[unit].buffer = direct_access_ptr(offset);
3694 /*if (dma0 != 0) {
3695 dmahand=channel[channel][subchannel].object.method[0x184/4];
3696 geforce_read_dma_object(dmahand,dmaoff,dmasiz);
3697 } else if (dma1 != 0) {
3698 dmahand=channel[channel][subchannel].object.method[0x188/4];
3699 geforce_read_dma_object(dmahand,dmaoff,dmasiz);
3700 }*/
3701 }
3702 if (maddress == 0x1b04) {
3703 int basesizeu, basesizev, basesizew, format;
3704 bool rectangle;
3705
3706 texture[unit].dma0 = (data >> 0) & 1;
3707 texture[unit].dma1 = (data >> 1) & 1;
3708 texture[unit].cubic = (data >> 2) & 1;
3709 texture[unit].noborder = (data >> 3) & 1;
3710 texture[unit].dims = (data >> 4) & 15;
3711 texture[unit].mipmap = (data >> 19) & 1;
3712 format = (data >> 8) & 255;
3713 basesizeu = (data >> 20) & 15;
3714 basesizev = (data >> 24) & 15;
3715 basesizew = (data >> 28) & 15;
3716 texture[unit].sizes = 1 << basesizeu;
3717 texture[unit].sizet = 1 << basesizev;
3718 texture[unit].sizer = 1 << basesizew;
3719 texture[unit].dilate = dilatechose[(basesizeu << 4) + basesizev];
3720 texture[unit].format = (NV2A_TEX_FORMAT)format;
3721 switch (texture[unit].format)
3722 {
3723 case NV2A_TEX_FORMAT::A1R5G5B5_RECT:
3724 case NV2A_TEX_FORMAT::R5G6B5_RECT:
3725 case NV2A_TEX_FORMAT::A8R8G8B8_RECT:
3726 case NV2A_TEX_FORMAT::DSDT8_RECT:
3727 case NV2A_TEX_FORMAT::A4R4G4B4_RECT:
3728 case NV2A_TEX_FORMAT::R8G8B8_RECT:
3729 case NV2A_TEX_FORMAT::A8L8_RECT:
3730 case NV2A_TEX_FORMAT::Z24_RECT:
3731 case NV2A_TEX_FORMAT::Z16_RECT:
3732 case NV2A_TEX_FORMAT::HILO16_RECT:
3733 case NV2A_TEX_FORMAT::SIGNED_HILO8_RECT:
3734 rectangle = true;
3735 break;
3736 default:
3737 rectangle = false;
3738 }
3739 texture[unit].rectangle = rectangle;
3740 if (debug_grab_texttype == format) {
3741 FILE *f;
3742 int written;
3743
3744 debug_grab_texttype = -1;
3745 f = fopen(debug_grab_textfile, "wb");
3746 if (f) {
3747 written = (int)fwrite(texture[unit].buffer, texture[unit].sizes * texture[unit].sizet * 4, 1, f);
3748 fclose(f);
3749 machine().logerror("Written %d bytes of texture to specified file\n", written);
3750 }
3751 else
3752 machine().logerror("Unable to save texture to specified file\n");
3753 }
3754 }
3755 if (maddress == 0x1b08) {
3756 texture[unit].addrmodes = (data >> 0) & 15;
3757 texture[unit].addrmodet = (data >> 8) & 15;
3758 texture[unit].addrmoder = (data >> 16) & 15;
3759 }
3760 if (maddress == 0x1b0c) {
3761 texture[unit].colorkey = (data >> 0) & 3;
3762 texture[unit].imagefield = (data >> 3) & 1;
3763 texture[unit].aniso = (data >> 4) & 3;
3764 texture[unit].mipmapmaxlod = (data >> 6) & 0xfff;
3765 texture[unit].mipmapminlod = (data >> 18) & 0xfff;
3766 // enable texture ?
3767 texture[unit].enabled = (data >> 30) & 3;
3768 }
3769 if (maddress == 0x1b10) {
3770 texture[unit].rectangle_pitch = data >> 16;
3771 }
3772 if (maddress == 0x1b1c) {
3773 texture[unit].rectheight = data & 0xffff;
3774 texture[unit].rectwidth = data >> 16;
3775 }
3776 countlen--;
3777 }
3778 if (maddress == 0x1e70) {
3779 texture[0].mode = data & 31;
3780 texture[1].mode = (data >> 5) & 31;
3781 texture[2].mode = (data >> 10) & 31;
3782 texture[3].mode = (data >> 15) & 31;
3783 }
3784 // projection matrix
3785 if ((maddress >= 0x0440) && (maddress < 0x0480)) {
3786 maddress = (maddress - 0x0440) / 4;
3787 *(uint32_t *)(&matrix.projection[maddress >> 2][maddress & 3]) = data;
3788 countlen--;
3789 }
3790 // modelview matrix
3791 if ((maddress >= 0x0480) && (maddress < 0x04c0)) {
3792 maddress = (maddress - 0x0480) / 4;
3793 /* the modelview matrix is obtained by direct3d by multiplying the world matrix and the view matrix
3794 modelview = world * view
3795 given a point in 3d space with coordinates x y and z, to find te transformed coordinates
3796 first create a row vector with components (x,y,z,1) then multiply the vector by the matrix
3797 transformed = rowvector * matrix
3798 in direct3d the matrix is stored as the sequence (first digit row, second digit column)
3799 11 12 13 14
3800 21 22 23 24
3801 31 32 33 34
3802 41 42 43 44
3803 but it is sent transposed as the sequence
3804 11 21 31 41 12 22 32 42 13 23 33 43 14 24 34 44
3805 so in matrix.modelview[x][y] x is the column and y is the row of the direct3d matrix
3806 */
3807 *(uint32_t *)(&matrix.modelview[maddress >> 2][maddress & 3]) = data;
3808 countlen--;
3809 }
3810 // inverse modelview matrix
3811 if ((maddress >= 0x0580) && (maddress < 0x05c0)) {
3812 maddress = (maddress - 0x0580) / 4;
3813 *(uint32_t *)(&matrix.modelview_inverse[maddress >> 2][maddress & 3]) = data;
3814 countlen--;
3815 }
3816 // composite matrix
3817 if ((maddress >= 0x0680) && (maddress < 0x06c0)) {
3818 maddress = (maddress - 0x0680) / 4;
3819 /* the composite matrix is computed by direct3d by multiplying the
3820 world, view, projection and viewport matrices
3821 composite = world * view * projection * viewport
3822 the viewport matrix applies the viewport scale and offset
3823 */
3824 *(uint32_t *)(&matrix.composite[maddress >> 2][maddress & 3]) = data;
3825 countlen--;
3826 }
3827 // viewport translate
3828 if ((maddress >= 0x0a20) && (maddress < 0x0a30)) {
3829 maddress = (maddress - 0x0a20) / 4;
3830 *(uint32_t *)(&matrix.translate[maddress]) = data;
3831 // set corresponding vertex shader constant too
3832 vertexprogram.exec.c_constant[59].iv(maddress, data); // constant -37
3833 #ifdef LOG_NV2A
3834 if (maddress == 3)
3835 machine().logerror("viewport translate = {%f %f %f %f}\n", matrix.translate[0], matrix.translate[1], matrix.translate[2], matrix.translate[3]);
3836 #endif
3837 countlen--;
3838 }
3839 // viewport scale
3840 if ((maddress >= 0x0af0) && (maddress < 0x0b00)) {
3841 maddress = (maddress - 0x0af0) / 4;
3842 *(uint32_t *)(&matrix.scale[maddress]) = data;
3843 // set corresponding vertex shader constant too
3844 vertexprogram.exec.c_constant[58].iv(maddress, data); // constant -38
3845 #ifdef LOG_NV2A
3846 if (maddress == 3)
3847 machine().logerror("viewport scale = {%f %f %f %f}\n", matrix.scale[0], matrix.scale[1], matrix.scale[2], matrix.scale[3]);
3848 #endif
3849 countlen--;
3850 }
3851 // Vertex program (shader)
3852 if (maddress == 0x1e94) {
3853 /*if (data == 2)
3854 machine().logerror("Enabled vertex program\n");
3855 else if (data == 4)
3856 machine().logerror("Enabled fixed function pipeline\n");
3857 else if (data == 6)
3858 machine().logerror("Enabled both fixed function pipeline and vertex program ?\n");
3859 else
3860 machine().logerror("Unknown value %d to method 0x1e94\n",data);*/
3861 vertex_pipeline = data & 6;
3862 countlen--;
3863 }
3864 if (maddress == 0x1e9c) {
3865 //machine().logerror("VP_UPLOAD_FROM_ID %d\n",data);
3866 vertexprogram.upload_instruction_index = data;
3867 vertexprogram.upload_instruction_component = 0;
3868 countlen--;
3869 }
3870 if (maddress == 0x1ea0) {
3871 //machine().logerror("VP_START_FROM_ID %d\n",data);
3872 vertexprogram.instructions = vertexprogram.upload_instruction_index;
3873 vertexprogram.start_instruction = data;
3874 countlen--;
3875 }
3876 if (maddress == 0x1ea4) {
3877 //machine().logerror("VP_UPLOAD_CONST_ID %d\n",data);
3878 vertexprogram.upload_parameter_index = data;
3879 vertexprogram.upload_parameter_component = 0;
3880 countlen--;
3881 }
3882 if ((maddress >= 0x0b00) && (maddress < 0x0b80)) {
3883 //machine().logerror("VP_UPLOAD_INST\n");
3884 if (vertexprogram.upload_instruction_index < 256) {
3885 vertexprogram.exec.op[vertexprogram.upload_instruction_index].i[vertexprogram.upload_instruction_component] = data;
3886 vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified |= (1 << vertexprogram.upload_instruction_component);
3887 }
3888 else
3889 machine().logerror("Need to increase size of vertexprogram.instruction to %d\n\r", vertexprogram.upload_instruction_index);
3890 if (vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified == 15) {
3891 vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified = 0;
3892 vertexprogram.exec.decode_instruction(vertexprogram.upload_instruction_index);
3893 }
3894 vertexprogram.upload_instruction_component++;
3895 if (vertexprogram.upload_instruction_component >= 4) {
3896 vertexprogram.upload_instruction_component = 0;
3897 vertexprogram.upload_instruction_index++;
3898 }
3899 }
3900 if ((maddress >= 0x0b80) && (maddress < 0x0c00)) {
3901 //machine().logerror("VP_UPLOAD_CONST\n");
3902 if (vertexprogram.upload_parameter_index < 192) {
3903 vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].iv(vertexprogram.upload_parameter_component, data);
3904 }
3905 else
3906 machine().logerror("Need to increase size of vertexprogram.parameter to %d\n\r", vertexprogram.upload_parameter_index);
3907 vertexprogram.upload_parameter_component++;
3908 if (vertexprogram.upload_parameter_component >= 4) {
3909 #ifdef LOG_NV2A
3910 if ((vertexprogram.upload_parameter_index == 58) || (vertexprogram.upload_parameter_index == 59))
3911 machine().logerror("vp constant %d (%s) = {%f %f %f %f}\n", vertexprogram.upload_parameter_index,
3912 vertexprogram.upload_parameter_index == 58 ? "viewport scale" : "viewport translate",
3913 vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[0],
3914 vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[1],
3915 vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[2],
3916 vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[3]);
3917 #endif
3918 vertexprogram.upload_parameter_component = 0;
3919 vertexprogram.upload_parameter_index++;
3920 }
3921 }
3922 if ((maddress >= 0x1e80) && (maddress < 0x1e90)) {
3923 machine().logerror("Setting v0 vertex program input component %d to %f\n", (maddress - 0x1e80) / 4, *((float *)&data));
3924 }
3925 if (maddress == 0x1e90) {
3926 machine().logerror("Received explicit method to run vertex program\n");
3927 }
3928 if (maddress == 0x02a8) {
3929 fog_color = data;
3930 }
3931 // Register combiners
3932 if (maddress == 0x0288) {
3933 combiner.setup.final.mapin_rgb.D_input = (Combiner::InputRegister)(data & 15);
3934 combiner.setup.final.mapin_rgb.D_component = (data >> 4) & 1;
3935 combiner.setup.final.mapin_rgb.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3936 combiner.setup.final.mapin_rgb.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3937 combiner.setup.final.mapin_rgb.C_component = (data >> 12) & 1;
3938 combiner.setup.final.mapin_rgb.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3939 combiner.setup.final.mapin_rgb.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3940 combiner.setup.final.mapin_rgb.B_component = (data >> 20) & 1;
3941 combiner.setup.final.mapin_rgb.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3942 combiner.setup.final.mapin_rgb.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3943 combiner.setup.final.mapin_rgb.A_component = (data >> 28) & 1;
3944 combiner.setup.final.mapin_rgb.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3945 countlen--;
3946 }
3947 if (maddress == 0x028c) {
3948 combiner.setup.final.color_sum_clamp = (data >> 7) & 1;
3949 combiner.setup.final.mapin_alpha.G_input = (Combiner::InputRegister)((data >> 8) & 15);
3950 combiner.setup.final.mapin_alpha.G_component = (data >> 12) & 1;
3951 combiner.setup.final.mapin_alpha.G_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3952 combiner.setup.final.mapin_rgb.F_input = (Combiner::InputRegister)((data >> 16) & 15);
3953 combiner.setup.final.mapin_rgb.F_component = (data >> 20) & 1;
3954 combiner.setup.final.mapin_rgb.F_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3955 combiner.setup.final.mapin_rgb.E_input = (Combiner::InputRegister)((data >> 24) & 15);
3956 combiner.setup.final.mapin_rgb.E_component = (data >> 28) & 1;
3957 combiner.setup.final.mapin_rgb.E_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3958 countlen--;
3959 }
3960 if ((maddress >= 0x0260) && (maddress < 0x0280)) {
3961 int n;
3962
3963 n = (maddress - 0x0260) >> 2;
3964 combiner.setup.stage[n].mapin_alpha.D_input = (Combiner::InputRegister)(data & 15);
3965 combiner.setup.stage[n].mapin_alpha.D_component = (data >> 4) & 1;
3966 combiner.setup.stage[n].mapin_alpha.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3967 combiner.setup.stage[n].mapin_alpha.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3968 combiner.setup.stage[n].mapin_alpha.C_component = (data >> 12) & 1;
3969 combiner.setup.stage[n].mapin_alpha.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3970 combiner.setup.stage[n].mapin_alpha.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3971 combiner.setup.stage[n].mapin_alpha.B_component = (data >> 20) & 1;
3972 combiner.setup.stage[n].mapin_alpha.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3973 combiner.setup.stage[n].mapin_alpha.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3974 combiner.setup.stage[n].mapin_alpha.A_component = (data >> 28) & 1;
3975 combiner.setup.stage[n].mapin_alpha.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3976 countlen--;
3977 }
3978 if ((maddress >= 0x0ac0) && (maddress < 0x0ae0)) {
3979 int n;
3980
3981 n = (maddress - 0x0ac0) >> 2;
3982 combiner.setup.stage[n].mapin_rgb.D_input = (Combiner::InputRegister)(data & 15);
3983 combiner.setup.stage[n].mapin_rgb.D_component = (data >> 4) & 1;
3984 combiner.setup.stage[n].mapin_rgb.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3985 combiner.setup.stage[n].mapin_rgb.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3986 combiner.setup.stage[n].mapin_rgb.C_component = (data >> 12) & 1;
3987 combiner.setup.stage[n].mapin_rgb.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3988 combiner.setup.stage[n].mapin_rgb.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3989 combiner.setup.stage[n].mapin_rgb.B_component = (data >> 20) & 1;
3990 combiner.setup.stage[n].mapin_rgb.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3991 combiner.setup.stage[n].mapin_rgb.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3992 combiner.setup.stage[n].mapin_rgb.A_component = (data >> 28) & 1;
3993 combiner.setup.stage[n].mapin_rgb.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3994 countlen--;
3995 }
3996 if ((maddress >= 0x0a60) && (maddress < 0x0a80)) {
3997 int n;
3998
3999 n = (maddress - 0x0a60) >> 2;
4000 combiner_argb8_float(data, combiner.setup.stage[n].constantcolor0);
4001 countlen--;
4002 }
4003 if ((maddress >= 0x0a80) && (maddress < 0x0aa0)) {
4004 int n;
4005
4006 n = (maddress - 0x0a80) >> 2;
4007 combiner_argb8_float(data, combiner.setup.stage[n].constantcolor1);
4008 countlen--;
4009 }
4010 if ((maddress >= 0x0aa0) && (maddress < 0x0ac0)) {
4011 int n;
4012
4013 n = (maddress - 0x0aa0) >> 2;
4014 combiner.setup.stage[n].mapout_alpha.CD_output = (Combiner::InputRegister)(data & 15);
4015 combiner.setup.stage[n].mapout_alpha.AB_output = (Combiner::InputRegister)((data >> 4) & 15);
4016 combiner.setup.stage[n].mapout_alpha.SUM_output = (Combiner::InputRegister)((data >> 8) & 15);
4017 combiner.setup.stage[n].mapout_alpha.CD_dotproduct = (data >> 12) & 1;
4018 combiner.setup.stage[n].mapout_alpha.AB_dotproduct = (data >> 13) & 1;
4019 combiner.setup.stage[n].mapout_alpha.muxsum = (data >> 14) & 1;
4020 combiner.setup.stage[n].mapout_alpha.bias = (data >> 15) & 1;
4021 combiner.setup.stage[n].mapout_alpha.scale = (data >> 16) & 3;
4022 //combiner.=(data >> 27) & 7;
4023 countlen--;
4024 }
4025 if (maddress == 0x1e20) {
4026 combiner_argb8_float(data, combiner.setup.final.constantcolor0);
4027 countlen--;
4028 }
4029 if (maddress == 0x1e24) {
4030 combiner_argb8_float(data, combiner.setup.final.constantcolor1);
4031 countlen--;
4032 }
4033 if ((maddress >= 0x1e40) && (maddress < 0x1e60)) {
4034 int n;
4035
4036 n = (maddress - 0x1e40) >> 2;
4037 combiner.setup.stage[n].mapout_rgb.CD_output = (Combiner::InputRegister)(data & 15);
4038 combiner.setup.stage[n].mapout_rgb.AB_output = (Combiner::InputRegister)((data >> 4) & 15);
4039 combiner.setup.stage[n].mapout_rgb.SUM_output = (Combiner::InputRegister)((data >> 8) & 15);
4040 combiner.setup.stage[n].mapout_rgb.CD_dotproduct = (data >> 12) & 1;
4041 combiner.setup.stage[n].mapout_rgb.AB_dotproduct = (data >> 13) & 1;
4042 combiner.setup.stage[n].mapout_rgb.muxsum = (data >> 14) & 1;
4043 combiner.setup.stage[n].mapout_rgb.bias = (data >> 15) & 1;
4044 combiner.setup.stage[n].mapout_rgb.scale = (data >> 16) & 3;
4045 //combiner.=(data >> 27) & 7;
4046 countlen--;
4047 }
4048 if (maddress == 0x1e60) {
4049 combiner.setup.stages = data & 15;
4050 countlen--;
4051 }
4052 return 0;
4053 }
4054
execute_method_m2mf(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4055 int nv2a_renderer::execute_method_m2mf(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4056 {
4057 if (method == 0x0180) {
4058 #ifdef LOG_NV2A
4059 machine().logerror("m2mf method 0180 notify\n");
4060 #endif
4061 geforce_read_dma_object(data, dma_offset[10], dma_size[10]);
4062 }
4063 return 0;
4064 }
4065
execute_method_surf2d(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4066 int nv2a_renderer::execute_method_surf2d(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4067 {
4068 if (method == 0x0184) {
4069 #ifdef LOG_NV2A
4070 machine().logerror("surf2d method 0184 source\n");
4071 #endif
4072 geforce_read_dma_object(data, dma_offset[11], dma_size[11]);
4073 }
4074 if (method == 0x0188) {
4075 #ifdef LOG_NV2A
4076 machine().logerror("surf2d method 0188 destination\n");
4077 #endif
4078 geforce_read_dma_object(data, dma_offset[12], dma_size[12]);
4079 }
4080 if (method == 0x0300) {
4081 bitblit.format = data; // 0xa is a8r8g8b8
4082 }
4083 if (method == 0x0304) {
4084 bitblit.pitch_source = data & 0xffff;
4085 bitblit.pitch_destination = data >> 16;
4086 }
4087 if (method == 0x0308) {
4088 bitblit.source_address = dma_offset[11] + data;
4089 }
4090 if (method == 0x030c) {
4091 bitblit.destination_address = dma_offset[12] + data;
4092 }
4093 return 0;
4094 }
4095
execute_method_blit(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4096 int nv2a_renderer::execute_method_blit(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4097 {
4098 if (method == 0x019c) {
4099 #ifdef LOG_NV2A
4100 machine().logerror("blit method 019c surface objecct handle %d\n", data); // set to 0x11
4101 #endif
4102 }
4103 if (method == 0x02fc) {
4104 #ifdef LOG_NV2A
4105 machine().logerror("blit method 02fc operation %d\n", data); // 3 is copy from source to destination
4106 #endif
4107 bitblit.op = data;
4108 }
4109 if (method == 0x0300) {
4110 bitblit.sourcex = data & 0xffff;
4111 bitblit.sourcey = data >> 16;
4112 }
4113 if (method == 0x0304) {
4114 bitblit.destinationx = data & 0xffff;
4115 bitblit.destinationy = data >> 16;
4116 }
4117 if (method == 0x0308) {
4118 bitblit.width = data & 0xffff;
4119 bitblit.heigth = data >> 16;
4120 surface_2d_blit();
4121 }
4122 return 0;
4123 }
4124
surface_2d_blit()4125 void nv2a_renderer::surface_2d_blit()
4126 {
4127 int x, y;
4128 uint32_t *src, *dest;
4129 uint32_t *srcrow, *destrow;
4130
4131 if (bitblit.format != 0xa) {
4132 machine().logerror("Unsupported format %d in surface_2d_blit\n", bitblit.format);
4133 return;
4134 }
4135 srcrow = (uint32_t *)direct_access_ptr(bitblit.source_address + bitblit.pitch_source * bitblit.sourcey + bitblit.sourcex * 4);
4136 destrow = (uint32_t *)direct_access_ptr(bitblit.destination_address + bitblit.pitch_destination * bitblit.destinationy + bitblit.destinationx * 4);
4137 for (y = 0; y < bitblit.heigth; y++) {
4138 src = srcrow;
4139 dest = destrow;
4140 for (x = 0; x < bitblit.width; x++) {
4141 *dest = *src;
4142 dest++;
4143 src++;
4144 }
4145 srcrow += bitblit.pitch_source >> 2;
4146 destrow += bitblit.pitch_destination >> 2;
4147 }
4148 }
4149
toggle_register_combiners_usage()4150 bool nv2a_renderer::toggle_register_combiners_usage()
4151 {
4152 combiner.used = 1 - combiner.used;
4153 return combiner.used != 0;
4154 }
4155
toggle_wait_vblank_support()4156 bool nv2a_renderer::toggle_wait_vblank_support()
4157 {
4158 enable_waitvblank = !enable_waitvblank;
4159 return enable_waitvblank;
4160 }
4161
toggle_clipping_w_support()4162 bool nv2a_renderer::toggle_clipping_w_support()
4163 {
4164 enable_clipping_w = !enable_clipping_w;
4165 return enable_clipping_w;
4166 }
4167
debug_grab_texture(int type,const char * filename)4168 void nv2a_renderer::debug_grab_texture(int type, const char *filename)
4169 {
4170 debug_grab_texttype = type;
4171 if (debug_grab_textfile == nullptr)
4172 debug_grab_textfile = (char *)malloc(128);
4173 strncpy(debug_grab_textfile, filename, 127);
4174 }
4175
debug_grab_vertex_program_slot(int slot,uint32_t * instruction)4176 void nv2a_renderer::debug_grab_vertex_program_slot(int slot, uint32_t *instruction)
4177 {
4178 if (slot >= 1024 / 4)
4179 return;
4180 instruction[0] = vertexprogram.exec.op[slot].i[0];
4181 instruction[1] = vertexprogram.exec.op[slot].i[1];
4182 instruction[2] = vertexprogram.exec.op[slot].i[2];
4183 instruction[3] = vertexprogram.exec.op[slot].i[3];
4184 }
4185
combiner_argb8_float(uint32_t color,float reg[4])4186 void nv2a_renderer::combiner_argb8_float(uint32_t color, float reg[4])
4187 {
4188 reg[2] = (float)(color & 0xff) / 255.0f;
4189 reg[1] = (float)((color >> 8) & 0xff) / 255.0f;
4190 reg[0] = (float)((color >> 16) & 0xff) / 255.0f;
4191 reg[3] = (float)((color >> 24) & 0xff) / 255.0f;
4192 }
4193
combiner_float_argb8(float reg[4])4194 uint32_t nv2a_renderer::combiner_float_argb8(float reg[4])
4195 {
4196 uint32_t r, g, b, a;
4197
4198 a = reg[3] * 255.0f;
4199 b = reg[2] * 255.0f;
4200 g = reg[1] * 255.0f;
4201 r = reg[0] * 255.0f;
4202 return (a << 24) | (r << 16) | (g << 8) | b;
4203 }
4204
combiner_map_input_select(int id,Combiner::InputRegister code,int index)4205 float nv2a_renderer::combiner_map_input_select(int id, Combiner::InputRegister code, int index)
4206 {
4207 switch ((int)code) {
4208 case 0:
4209 default:
4210 return combiner.work[id].registers.zero[index];
4211 case 1:
4212 return combiner.work[id].registers.color0[index];
4213 case 2:
4214 return combiner.work[id].registers.color1[index];
4215 case 3:
4216 return combiner.work[id].registers.fogcolor[index];
4217 case 4:
4218 return combiner.work[id].registers.primarycolor[index];
4219 case 5:
4220 return combiner.work[id].registers.secondarycolor[index];
4221 case 8:
4222 return combiner.work[id].registers.texture0color[index];
4223 case 9:
4224 return combiner.work[id].registers.texture1color[index];
4225 case 10:
4226 return combiner.work[id].registers.texture2color[index];
4227 case 11:
4228 return combiner.work[id].registers.texture3color[index];
4229 case 12:
4230 return combiner.work[id].registers.spare0[index];
4231 case 13:
4232 return combiner.work[id].registers.spare1[index];
4233 case 14:
4234 return combiner.work[id].variables.sumclamp[index];
4235 case 15:
4236 return combiner.work[id].variables.EF[index];
4237 }
4238
4239 // never executed
4240 //return 0;
4241 }
4242
combiner_map_input_select_array(int id,Combiner::InputRegister code)4243 float *nv2a_renderer::combiner_map_input_select_array(int id, Combiner::InputRegister code)
4244 {
4245 switch ((int)code) {
4246 case 0:
4247 default:
4248 return combiner.work[id].registers.zero;
4249 case 1:
4250 return combiner.work[id].registers.color0;
4251 case 2:
4252 return combiner.work[id].registers.color1;
4253 case 3:
4254 return combiner.work[id].registers.fogcolor;
4255 case 4:
4256 return combiner.work[id].registers.primarycolor;
4257 case 5:
4258 return combiner.work[id].registers.secondarycolor;
4259 case 8:
4260 return combiner.work[id].registers.texture0color;
4261 case 9:
4262 return combiner.work[id].registers.texture1color;
4263 case 10:
4264 return combiner.work[id].registers.texture2color;
4265 case 11:
4266 return combiner.work[id].registers.texture3color;
4267 case 12:
4268 return combiner.work[id].registers.spare0;
4269 case 13:
4270 return combiner.work[id].registers.spare1;
4271 case 14:
4272 return combiner.work[id].variables.sumclamp;
4273 case 15:
4274 return combiner.work[id].variables.EF;
4275 }
4276
4277 // never executed
4278 //return 0;
4279 }
4280
combiner_map_output_select_array(int id,Combiner::InputRegister code)4281 float *nv2a_renderer::combiner_map_output_select_array(int id, Combiner::InputRegister code)
4282 {
4283 switch ((int)code) {
4284 case 0:
4285 return nullptr;
4286 case 1:
4287 return nullptr;
4288 case 2:
4289 return nullptr;
4290 case 3:
4291 return nullptr;
4292 case 4:
4293 return combiner.work[id].registers.primarycolor;
4294 case 5:
4295 return combiner.work[id].registers.secondarycolor;
4296 case 8:
4297 return combiner.work[id].registers.texture0color;
4298 case 9:
4299 return combiner.work[id].registers.texture1color;
4300 case 10:
4301 return combiner.work[id].registers.texture2color;
4302 case 11:
4303 return combiner.work[id].registers.texture3color;
4304 case 12:
4305 return combiner.work[id].registers.spare0;
4306 case 13:
4307 return combiner.work[id].registers.spare1;
4308 case 14:
4309 return nullptr;
4310 case 15:
4311 default:
4312 return nullptr;
4313 }
4314 }
4315
combiner_map_input_function(Combiner::MapFunction code,float value)4316 float nv2a_renderer::combiner_map_input_function(Combiner::MapFunction code, float value)
4317 {
4318 switch ((int)code) {
4319 case 0: // unsigned identity
4320 return std::max(0.0f, value);
4321 case 1: // unsigned invert
4322 return 1.0f - std::min(std::max(value, 0.0f), 1.0f);
4323 case 2: // expand normal
4324 return 2.0f * std::max(0.0f, value) - 1.0f;
4325 case 3: // expand negate
4326 return -2.0f * std::max(0.0f, value) + 1.0f;
4327 case 4: // half bias normal
4328 return std::max(0.0f, value) - 0.5f;
4329 case 5: // half bias negate
4330 return -std::max(0.0f, value) + 0.5f;
4331 case 6: // signed identyty
4332 return value;
4333 case 7: // signed negate
4334 default:
4335 return -value;
4336 }
4337
4338 // never executed
4339 //return 0;
4340 }
4341
combiner_map_input_function_array(Combiner::MapFunction code,float * data)4342 void nv2a_renderer::combiner_map_input_function_array(Combiner::MapFunction code, float *data)
4343 {
4344 switch ((int)code) {
4345 case 0:
4346 data[0] = std::max(0.0f, data[0]);
4347 data[1] = std::max(0.0f, data[1]);
4348 data[2] = std::max(0.0f, data[2]);
4349 break;
4350 case 1:
4351 data[0] = 1.0f - std::min(std::max(data[0], 0.0f), 1.0f);
4352 data[1] = 1.0f - std::min(std::max(data[1], 0.0f), 1.0f);
4353 data[2] = 1.0f - std::min(std::max(data[2], 0.0f), 1.0f);
4354 break;
4355 case 2:
4356 data[0] = 2.0f * std::max(0.0f, data[0]) - 1.0f;
4357 data[1] = 2.0f * std::max(0.0f, data[1]) - 1.0f;
4358 data[2] = 2.0f * std::max(0.0f, data[2]) - 1.0f;
4359 break;
4360 case 3:
4361 data[0] = -2.0f * std::max(0.0f, data[0]) + 1.0f;
4362 data[1] = -2.0f * std::max(0.0f, data[1]) + 1.0f;
4363 data[2] = -2.0f * std::max(0.0f, data[2]) + 1.0f;
4364 break;
4365 case 4:
4366 data[0] = std::max(0.0f, data[0]) - 0.5f;
4367 data[1] = std::max(0.0f, data[1]) - 0.5f;
4368 data[2] = std::max(0.0f, data[2]) - 0.5f;
4369 break;
4370 case 5:
4371 data[0] = -std::max(0.0f, data[0]) + 0.5f;
4372 data[1] = -std::max(0.0f, data[1]) + 0.5f;
4373 data[2] = -std::max(0.0f, data[2]) + 0.5f;
4374 break;
4375 case 6:
4376 return;
4377 case 7:
4378 default:
4379 data[0] = -data[0];
4380 data[1] = -data[1];
4381 data[2] = -data[2];
4382 break;
4383 }
4384 }
4385
combiner_initialize_registers(int id,float rgba[6][4])4386 void nv2a_renderer::combiner_initialize_registers(int id, float rgba[6][4])
4387 {
4388 for (int n = 0; n < 4; n++) {
4389 combiner.work[id].registers.primarycolor[n] = rgba[0][n];
4390 combiner.work[id].registers.secondarycolor[n] = rgba[1][n];
4391 combiner.work[id].registers.texture0color[n] = rgba[2][n];
4392 combiner.work[id].registers.texture1color[n] = rgba[3][n];
4393 combiner.work[id].registers.texture2color[n] = rgba[4][n];
4394 combiner.work[id].registers.texture3color[n] = rgba[5][n];
4395 combiner.work[id].registers.fogcolor[n] = rgba[6][n];
4396 }
4397 combiner.work[id].registers.spare0[3] = combiner.work[id].registers.texture0color[3]; // alpha of spare 0 must be the alpha of the pixel from texture 0
4398 combiner.work[id].registers.zero[0] = combiner.work[id].registers.zero[1] = combiner.work[id].registers.zero[2] = combiner.work[id].registers.zero[3] = 0;
4399 }
4400
combiner_initialize_stage(int id,int stage_number)4401 void nv2a_renderer::combiner_initialize_stage(int id, int stage_number)
4402 {
4403 int n = stage_number;
4404
4405 // put register_constantcolor0 in register_color0
4406 combiner.work[id].registers.color0[0] = combiner.setup.stage[n].constantcolor0[0];
4407 combiner.work[id].registers.color0[1] = combiner.setup.stage[n].constantcolor0[1];
4408 combiner.work[id].registers.color0[2] = combiner.setup.stage[n].constantcolor0[2];
4409 combiner.work[id].registers.color0[3] = combiner.setup.stage[n].constantcolor0[3];
4410 // put register_constantcolor1 in register_color1
4411 combiner.work[id].registers.color1[0] = combiner.setup.stage[n].constantcolor1[0];
4412 combiner.work[id].registers.color1[1] = combiner.setup.stage[n].constantcolor1[1];
4413 combiner.work[id].registers.color1[2] = combiner.setup.stage[n].constantcolor1[2];
4414 combiner.work[id].registers.color1[3] = combiner.setup.stage[n].constantcolor1[3];
4415 }
4416
combiner_initialize_final(int id)4417 void nv2a_renderer::combiner_initialize_final(int id)
4418 {
4419 // put register_constantcolor0 in register_color0
4420 combiner.work[id].registers.color0[0] = combiner.setup.final.constantcolor0[0];
4421 combiner.work[id].registers.color0[1] = combiner.setup.final.constantcolor0[1];
4422 combiner.work[id].registers.color0[2] = combiner.setup.final.constantcolor0[2];
4423 combiner.work[id].registers.color0[3] = combiner.setup.final.constantcolor0[3];
4424 // put register_constantcolor1 in register_color1
4425 combiner.work[id].registers.color1[0] = combiner.setup.final.constantcolor1[0];
4426 combiner.work[id].registers.color1[1] = combiner.setup.final.constantcolor1[1];
4427 combiner.work[id].registers.color1[2] = combiner.setup.final.constantcolor1[2];
4428 combiner.work[id].registers.color1[3] = combiner.setup.final.constantcolor1[3];
4429 }
4430
combiner_map_stage_input(int id,int stage_number)4431 void nv2a_renderer::combiner_map_stage_input(int id, int stage_number)
4432 {
4433 int n = stage_number;
4434 int c, d, i;
4435 float v, *pv;
4436
4437 // rgb portion
4438 // A
4439 // get pointer to rgb components of selected input register
4440 pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.A_input);
4441 c = combiner.setup.stage[n].mapin_rgb.A_component * 3;
4442 i = combiner.setup.stage[n].mapin_rgb.A_component ^ 1;
4443 // copy components to A
4444 for (d = 0; d < 3; d++) {
4445 combiner.work[id].variables.A[d] = pv[c];
4446 c += i;
4447 }
4448 // apply mapping function
4449 combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.A_mapping, combiner.work[id].variables.A);
4450 // B
4451 pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.B_input);
4452 c = combiner.setup.stage[n].mapin_rgb.B_component * 3;
4453 i = combiner.setup.stage[n].mapin_rgb.B_component ^ 1;
4454 for (d = 0; d < 3; d++) {
4455 combiner.work[id].variables.B[d] = pv[c];
4456 c += i;
4457 }
4458 combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.B_mapping, combiner.work[id].variables.B);
4459 // C
4460 pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.C_input);
4461 c = combiner.setup.stage[n].mapin_rgb.C_component * 3;
4462 i = combiner.setup.stage[n].mapin_rgb.C_component ^ 1;
4463 for (d = 0; d < 3; d++) {
4464 combiner.work[id].variables.C[d] = pv[c];
4465 c += i;
4466 }
4467 combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.C_mapping, combiner.work[id].variables.C);
4468 // D
4469 pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.D_input);
4470 c = combiner.setup.stage[n].mapin_rgb.D_component * 3;
4471 i = combiner.setup.stage[n].mapin_rgb.D_component ^ 1;
4472 for (d = 0; d < 3; d++) {
4473 combiner.work[id].variables.D[d] = pv[c];
4474 c += i;
4475 }
4476 combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.D_mapping, combiner.work[id].variables.D);
4477
4478 // alpha portion
4479 // A
4480 // get component (blue or alpha) from selected input
4481 v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.A_input, 2 + combiner.setup.stage[n].mapin_alpha.A_component);
4482 // copy component to A
4483 combiner.work[id].variables.A[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.A_mapping, v);
4484 // B
4485 v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.B_input, 2 + combiner.setup.stage[n].mapin_alpha.B_component);
4486 combiner.work[id].variables.B[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.B_mapping, v);
4487 // C
4488 v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.C_input, 2 + combiner.setup.stage[n].mapin_alpha.C_component);
4489 combiner.work[id].variables.C[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.C_mapping, v);
4490 // D
4491 v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.D_input, 2 + combiner.setup.stage[n].mapin_alpha.D_component);
4492 combiner.work[id].variables.D[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.D_mapping, v);
4493 }
4494
combiner_map_stage_output(int id,int stage_number)4495 void nv2a_renderer::combiner_map_stage_output(int id, int stage_number)
4496 {
4497 int n = stage_number;
4498 float *f;
4499
4500 // rgb
4501 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.AB_output);
4502 if (f) {
4503 f[0] = combiner.work[id].functions.RGBop1[0];
4504 f[1] = combiner.work[id].functions.RGBop1[1];
4505 f[2] = combiner.work[id].functions.RGBop1[2];
4506 }
4507 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.CD_output);
4508 if (f) {
4509 f[0] = combiner.work[id].functions.RGBop2[0];
4510 f[1] = combiner.work[id].functions.RGBop2[1];
4511 f[2] = combiner.work[id].functions.RGBop2[2];
4512 }
4513 if ((combiner.setup.stage[n].mapout_rgb.AB_dotproduct | combiner.setup.stage[n].mapout_rgb.CD_dotproduct) == 0) {
4514 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.SUM_output);
4515 if (f) {
4516 f[0] = combiner.work[id].functions.RGBop3[0];
4517 f[1] = combiner.work[id].functions.RGBop3[1];
4518 f[2] = combiner.work[id].functions.RGBop3[2];
4519 }
4520 }
4521 // alpha
4522 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.AB_output);
4523 if (f)
4524 f[3] = combiner.work[id].functions.Aop1;
4525 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.CD_output);
4526 if (f)
4527 f[3] = combiner.work[id].functions.Aop2;
4528 f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.SUM_output);
4529 if (f)
4530 f[3] = combiner.work[id].functions.Aop3;
4531 }
4532
combiner_map_final_input(int id)4533 void nv2a_renderer::combiner_map_final_input(int id)
4534 {
4535 int c, d, i;
4536 float *pv;
4537
4538 // E
4539 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.E_input);
4540 c = combiner.setup.final.mapin_rgb.E_component * 3;
4541 i = combiner.setup.final.mapin_rgb.E_component ^ 1;
4542 for (d = 0; d < 3; d++) {
4543 combiner.work[id].variables.E[d] = pv[c];
4544 c += i;
4545 }
4546 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.E_mapping, combiner.work[id].variables.E);
4547 // F
4548 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.F_input);
4549 c = combiner.setup.final.mapin_rgb.F_component * 3;
4550 i = combiner.setup.final.mapin_rgb.F_component ^ 1;
4551 for (d = 0; d < 3; d++) {
4552 combiner.work[id].variables.F[d] = pv[c];
4553 c += i;
4554 }
4555 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.F_mapping, combiner.work[id].variables.F);
4556 // EF
4557 combiner.work[id].variables.EF[0] = combiner.work[id].variables.E[0] * combiner.work[id].variables.F[0];
4558 combiner.work[id].variables.EF[1] = combiner.work[id].variables.E[1] * combiner.work[id].variables.F[1];
4559 combiner.work[id].variables.EF[2] = combiner.work[id].variables.E[2] * combiner.work[id].variables.F[2];
4560 // sumclamp
4561 combiner.work[id].variables.sumclamp[0] = std::max(0.0f, combiner.work[id].registers.spare0[0]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[0]);
4562 combiner.work[id].variables.sumclamp[1] = std::max(0.0f, combiner.work[id].registers.spare0[1]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[1]);
4563 combiner.work[id].variables.sumclamp[2] = std::max(0.0f, combiner.work[id].registers.spare0[2]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[2]);
4564 if (combiner.setup.final.color_sum_clamp != 0) {
4565 combiner.work[id].variables.sumclamp[0] = std::min(combiner.work[id].variables.sumclamp[0], 1.0f);
4566 combiner.work[id].variables.sumclamp[1] = std::min(combiner.work[id].variables.sumclamp[1], 1.0f);
4567 combiner.work[id].variables.sumclamp[2] = std::min(combiner.work[id].variables.sumclamp[2], 1.0f);
4568 }
4569 // A
4570 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.A_input);
4571 c = combiner.setup.final.mapin_rgb.A_component * 3;
4572 i = combiner.setup.final.mapin_rgb.A_component ^ 1;
4573 for (d = 0; d < 3; d++) {
4574 combiner.work[id].variables.A[d] = pv[c];
4575 c += i;
4576 }
4577 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.A_mapping, combiner.work[id].variables.A);
4578 // B
4579 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.B_input);
4580 c = combiner.setup.final.mapin_rgb.B_component * 3;
4581 i = combiner.setup.final.mapin_rgb.B_component ^ 1;
4582 for (d = 0; d < 3; d++) {
4583 combiner.work[id].variables.B[d] = pv[c];
4584 c += i;
4585 }
4586 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.B_mapping, combiner.work[id].variables.B);
4587 // C
4588 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.C_input);
4589 c = combiner.setup.final.mapin_rgb.C_component * 3;
4590 i = combiner.setup.final.mapin_rgb.C_component ^ 1;
4591 for (d = 0; d < 3; d++) {
4592 combiner.work[id].variables.C[d] = pv[c];
4593 c += i;
4594 }
4595 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.C_mapping, combiner.work[id].variables.C);
4596 // D
4597 pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.D_input);
4598 c = combiner.setup.final.mapin_rgb.D_component * 3;
4599 i = combiner.setup.final.mapin_rgb.D_component ^ 1;
4600 for (d = 0; d < 3; d++) {
4601 combiner.work[id].variables.D[d] = pv[c];
4602 c += i;
4603 }
4604 combiner_map_input_function_array(combiner.setup.final.mapin_rgb.D_mapping, combiner.work[id].variables.D);
4605 // G
4606 combiner.work[id].variables.G = combiner_map_input_select(id, combiner.setup.final.mapin_alpha.G_input, 2 + combiner.setup.final.mapin_alpha.G_component);
4607 }
4608
combiner_final_output(int id)4609 void nv2a_renderer::combiner_final_output(int id)
4610 {
4611 // rgb
4612 combiner.work[id].output[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + (1.0f - combiner.work[id].variables.A[0])*combiner.work[id].variables.C[0] + combiner.work[id].variables.D[0];
4613 combiner.work[id].output[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + (1.0f - combiner.work[id].variables.A[1])*combiner.work[id].variables.C[1] + combiner.work[id].variables.D[1];
4614 combiner.work[id].output[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2] + (1.0f - combiner.work[id].variables.A[2])*combiner.work[id].variables.C[2] + combiner.work[id].variables.D[2];
4615 combiner.work[id].output[0] = std::min(combiner.work[id].output[0], 2.0f);
4616 combiner.work[id].output[1] = std::min(combiner.work[id].output[1], 2.0f);
4617 combiner.work[id].output[2] = std::min(combiner.work[id].output[2], 2.0f);
4618 // a
4619 combiner.work[id].output[3] = combiner_map_input_function(combiner.setup.final.mapin_alpha.G_mapping, combiner.work[id].variables.G);
4620 }
4621
combiner_function_AB(int id,float result[4])4622 void nv2a_renderer::combiner_function_AB(int id, float result[4])
4623 {
4624 result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0];
4625 result[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1];
4626 result[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2];
4627 }
4628
combiner_function_AdotB(int id,float result[4])4629 void nv2a_renderer::combiner_function_AdotB(int id, float result[4])
4630 {
4631 result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2];
4632 result[1] = result[0];
4633 result[2] = result[0];
4634 }
4635
combiner_function_CD(int id,float result[4])4636 void nv2a_renderer::combiner_function_CD(int id, float result[4])
4637 {
4638 result[0] = combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0];
4639 result[1] = combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1];
4640 result[2] = combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4641 }
4642
combiner_function_CdotD(int id,float result[4])4643 void nv2a_renderer::combiner_function_CdotD(int id, float result[4])
4644 {
4645 result[0] = combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0] + combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1] + combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4646 result[1] = result[0];
4647 result[2] = result[0];
4648 }
4649
combiner_function_ABmuxCD(int id,float result[4])4650 void nv2a_renderer::combiner_function_ABmuxCD(int id, float result[4])
4651 {
4652 if (combiner.work[id].registers.spare0[3] >= 0.5f)
4653 combiner_function_AB(id, result);
4654 else
4655 combiner_function_CD(id, result);
4656 }
4657
combiner_function_ABsumCD(int id,float result[4])4658 void nv2a_renderer::combiner_function_ABsumCD(int id, float result[4])
4659 {
4660 result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0];
4661 result[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1];
4662 result[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2] + combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4663 }
4664
combiner_compute_rgb_outputs(int id,int stage_number)4665 void nv2a_renderer::combiner_compute_rgb_outputs(int id, int stage_number)
4666 {
4667 int n = stage_number;
4668 int m;
4669 float bias, scale;
4670
4671 // select bias and scale
4672 if (combiner.setup.stage[n].mapout_rgb.bias)
4673 bias = -0.5;
4674 else
4675 bias = 0;
4676 switch (combiner.setup.stage[n].mapout_rgb.scale) {
4677 case 0:
4678 default:
4679 scale = 1.0;
4680 break;
4681 case 1:
4682 scale = 2.0;
4683 break;
4684 case 2:
4685 scale = 4.0;
4686 break;
4687 case 3:
4688 scale = 0.5;
4689 break;
4690 }
4691 // first
4692 if (combiner.setup.stage[n].mapout_rgb.AB_dotproduct) {
4693 m = 1;
4694 combiner_function_AdotB(id, combiner.work[id].functions.RGBop1);
4695 }
4696 else {
4697 m = 0;
4698 combiner_function_AB(id, combiner.work[id].functions.RGBop1);
4699 }
4700 combiner.work[id].functions.RGBop1[0] = std::max(std::min((combiner.work[id].functions.RGBop1[0] + bias) * scale, 1.0f), -1.0f);
4701 combiner.work[id].functions.RGBop1[1] = std::max(std::min((combiner.work[id].functions.RGBop1[1] + bias) * scale, 1.0f), -1.0f);
4702 combiner.work[id].functions.RGBop1[2] = std::max(std::min((combiner.work[id].functions.RGBop1[2] + bias) * scale, 1.0f), -1.0f);
4703 // second
4704 if (combiner.setup.stage[n].mapout_rgb.CD_dotproduct) {
4705 m = m | 1;
4706 combiner_function_CdotD(id, combiner.work[id].functions.RGBop2);
4707 }
4708 else
4709 combiner_function_CD(id, combiner.work[id].functions.RGBop2);
4710 combiner.work[id].functions.RGBop2[0] = std::max(std::min((combiner.work[id].functions.RGBop2[0] + bias) * scale, 1.0f), -1.0f);
4711 combiner.work[id].functions.RGBop2[1] = std::max(std::min((combiner.work[id].functions.RGBop2[1] + bias) * scale, 1.0f), -1.0f);
4712 combiner.work[id].functions.RGBop2[2] = std::max(std::min((combiner.work[id].functions.RGBop2[2] + bias) * scale, 1.0f), -1.0f);
4713 // third
4714 if (m == 0) {
4715 if (combiner.setup.stage[n].mapout_rgb.muxsum)
4716 combiner_function_ABmuxCD(id, combiner.work[id].functions.RGBop3);
4717 else
4718 combiner_function_ABsumCD(id, combiner.work[id].functions.RGBop3);
4719 combiner.work[id].functions.RGBop3[0] = std::max(std::min((combiner.work[id].functions.RGBop3[0] + bias) * scale, 1.0f), -1.0f);
4720 combiner.work[id].functions.RGBop3[1] = std::max(std::min((combiner.work[id].functions.RGBop3[1] + bias) * scale, 1.0f), -1.0f);
4721 combiner.work[id].functions.RGBop3[2] = std::max(std::min((combiner.work[id].functions.RGBop3[2] + bias) * scale, 1.0f), -1.0f);
4722 }
4723 }
4724
combiner_compute_alpha_outputs(int id,int stage_number)4725 void nv2a_renderer::combiner_compute_alpha_outputs(int id, int stage_number)
4726 {
4727 int n = stage_number;
4728 float bias, scale;
4729
4730 // select bias and scale
4731 if (combiner.setup.stage[n].mapout_alpha.bias)
4732 bias = -0.5;
4733 else
4734 bias = 0;
4735 switch (combiner.setup.stage[n].mapout_alpha.scale) {
4736 case 0:
4737 default:
4738 scale = 1.0;
4739 break;
4740 case 1:
4741 scale = 2.0;
4742 break;
4743 case 2:
4744 scale = 4.0;
4745 break;
4746 case 3:
4747 scale = 0.5;
4748 break;
4749 }
4750 // first
4751 combiner.work[id].functions.Aop1 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3];
4752 combiner.work[id].functions.Aop1 = std::max(std::min((combiner.work[id].functions.Aop1 + bias) * scale, 1.0f), -1.0f);
4753 // second
4754 combiner.work[id].functions.Aop2 = combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4755 combiner.work[id].functions.Aop2 = std::max(std::min((combiner.work[id].functions.Aop2 + bias) * scale, 1.0f), -1.0f);
4756 // third
4757 if (combiner.setup.stage[n].mapout_alpha.muxsum) {
4758 if (combiner.work[id].registers.spare0[3] >= 0.5f)
4759 combiner.work[id].functions.Aop3 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3];
4760 else
4761 combiner.work[id].functions.Aop3 = combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4762 }
4763 else
4764 combiner.work[id].functions.Aop3 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3] + combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4765 combiner.work[id].functions.Aop3 = std::max(std::min((combiner.work[id].functions.Aop3 + bias) * scale, 1.0f), -1.0f);
4766 }
4767
WRITE_LINE_MEMBER(nv2a_renderer::vblank_callback)4768 WRITE_LINE_MEMBER(nv2a_renderer::vblank_callback)
4769 {
4770 /*#ifdef LOG_NV2A
4771 printf("vblank_callback\n\r");
4772 #endif*/
4773 if ((state != 0) && (puller_waiting == 1)) {
4774 puller_waiting = 0;
4775 puller_timer_work(nullptr, 0);
4776 }
4777 if (state != 0) {
4778 pcrtc[0x100 / 4] |= 1;
4779 pcrtc[0x808 / 4] |= 0x10000;
4780 }
4781 else {
4782 pcrtc[0x100 / 4] &= ~1;
4783 pcrtc[0x808 / 4] &= ~0x10000;
4784 }
4785 if (update_interrupts() == true)
4786 irq_callback(1); // IRQ 3
4787 else
4788 irq_callback(0); // IRQ 3
4789 }
4790
update_interrupts()4791 bool nv2a_renderer::update_interrupts()
4792 {
4793 if (pcrtc[0x100 / 4] & pcrtc[0x140 / 4])
4794 pmc[0x100 / 4] |= 0x1000000;
4795 else
4796 pmc[0x100 / 4] &= ~0x1000000;
4797 if (pgraph[0x100 / 4] & pgraph[0x140 / 4])
4798 pmc[0x100 / 4] |= 0x1000;
4799 else
4800 pmc[0x100 / 4] &= ~0x1000;
4801 if (((pmc[0x100 / 4] & 0x7fffffff) && (pmc[0x140 / 4] & 1)) || ((pmc[0x100 / 4] & 0x80000000) && (pmc[0x140 / 4] & 2))) {
4802 // send interrupt
4803 return true;
4804 }
4805 else
4806 return false;
4807 }
4808
screen_update_callback(screen_device & screen,bitmap_rgb32 & bitmap,const rectangle & cliprect)4809 uint32_t nv2a_renderer::screen_update_callback(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect)
4810 {
4811 if (displayedtarget != nullptr) {
4812 bitmap_rgb32 bm(displayedtarget, 640, 480, 640);
4813 uint32_t *dst = (uint32_t *)bitmap.raw_pixptr(0, 0);
4814
4815 //printf("updatescreen %08X\n\r",pcrtc[0x800/4]);
4816 memcpy(dst, displayedtarget, bitmap.rowbytes()*bitmap.height());
4817 }
4818 return 0;
4819 }
4820
geforce_assign_object(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t address)4821 void nv2a_renderer::geforce_assign_object(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t address)
4822 {
4823 uint32_t handle, offset, objclass, data;
4824
4825 handle = space.read_dword(address);
4826 offset = geforce_object_offset(handle);
4827 #ifdef LOG_NV2A
4828 machine().logerror(" assign to subchannel %d object at %d in ramin", subchannel, offset);
4829 #endif
4830 channel[chanel][subchannel].object.offset = offset;
4831 data = ramin[offset / 4];
4832 objclass = data & 0xff;
4833 #ifdef LOG_NV2A
4834 machine().logerror(" class %03X\n", objclass);
4835 #endif
4836 channel[chanel][subchannel].object.objclass = objclass;
4837 }
4838
TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)4839 TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)
4840 {
4841 int chanel;
4842 int method, count;
4843 uint32_t *dmaput, *dmaget;
4844 uint32_t cmd;
4845 COMMAND cmdtype;
4846 int countlen;
4847 int ret;
4848 address_space *space = puller_space;
4849 uint32_t subch;
4850
4851 for (chanel = 0; chanel < 32; chanel++) {
4852 dmaput = &channel[chanel][0].regs[0x40 / 4];
4853 dmaget = &channel[chanel][0].regs[0x44 / 4];
4854 while (*dmaget != *dmaput) {
4855 cmd = space->read_dword(*dmaget);
4856 *dmaget += 4;
4857 cmdtype = geforce_commandkind(cmd);
4858 switch (cmdtype)
4859 {
4860 case COMMAND::JUMP:
4861 #ifdef LOG_NV2A
4862 machine().logerror("jump dmaget %08X", *dmaget);
4863 #endif
4864 *dmaget = cmd & 0xfffffffc;
4865 #ifdef LOG_NV2A
4866 machine().logerror(" -> %08X\n\r", *dmaget);
4867 #endif
4868 break;
4869 case COMMAND::INCREASING:
4870 method = cmd & (2047 << 2); // if method >= 0x100 send it to assigned object
4871 subch = (cmd >> 13) & 7;
4872 count = (cmd >> 18) & 2047;
4873 if ((method == 0) && (count == 1)) { // OBJECT method, bind an engine object to a subchannel
4874 geforce_assign_object(*space, chanel, subch, *dmaget);
4875 *dmaget += 4;
4876 }
4877 else {
4878 #ifdef LOG_NV2A
4879 machine().logerror(" subch. %d method %04x count %d\n", subch, method, count);
4880 #endif
4881 ret = 0;
4882 while (count > 0) {
4883 countlen = 1;
4884 ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4885 count--;
4886 method += 4;
4887 *dmaget += 4;
4888 if (ret != 0)
4889 break;
4890 }
4891 if (ret != 0) {
4892 puller_timer->enable(false);
4893 puller_waiting = ret;
4894 return;
4895 }
4896 }
4897 break;
4898 case COMMAND::NON_INCREASING:
4899 method = cmd & (2047 << 2);
4900 subch = (cmd >> 13) & 7;
4901 count = (cmd >> 18) & 2047;
4902 if ((method == 0) && (count == 1)) {
4903 geforce_assign_object(*space, chanel, subch, *dmaget);
4904 *dmaget += 4;
4905 }
4906 else {
4907 #ifdef LOG_NV2A
4908 machine().logerror(" subch. %d method %04x count %d\n", subch, method, count);
4909 #endif
4910 while (count > 0) {
4911 countlen = count;
4912 ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4913 *dmaget += 4 * (count - countlen);
4914 count = countlen;
4915 }
4916 }
4917 break;
4918 case COMMAND::LONG_NON_INCREASING:
4919 method = cmd & (2047 << 2);
4920 subch = (cmd >> 13) & 7;
4921 count = space->read_dword(*dmaget);
4922 *dmaget += 4;
4923 if ((method == 0) && (count == 1)) {
4924 geforce_assign_object(*space, chanel, subch, *dmaget);
4925 *dmaget += 4;
4926 }
4927 else {
4928 #ifdef LOG_NV2A
4929 machine().logerror(" subch. %d method %04x count %d\n", subch, method, count);
4930 #endif
4931 while (count > 0) {
4932 countlen = count;
4933 ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4934 *dmaget += 4 * (count - countlen);
4935 count = countlen;
4936 }
4937 }
4938 break;
4939 default:
4940 machine().logerror(" unimplemented command %08X\n", cmd);
4941 }
4942 }
4943 }
4944 }
4945
geforce_r(offs_t offset,uint32_t mem_mask)4946 uint32_t nv2a_renderer::geforce_r(offs_t offset, uint32_t mem_mask)
4947 {
4948 static int x, ret;
4949
4950 ret = 0;
4951 if (offset == 0x1804f6) {
4952 x = x ^ 0x08080808;
4953 ret = x;
4954 }
4955 if ((offset >= 0x00100000 / 4) && (offset < 0x00101000 / 4)) {
4956 //machine().logerror("NV_2A: read PFB[%06X] mask %08X value %08X\n",offset*4-0x00100000,mem_mask,ret);
4957 if (offset == 0x100200 / 4)
4958 return 3;
4959 }
4960 else if ((offset >= 0x00101000 / 4) && (offset < 0x00102000 / 4)) {
4961 //machine().logerror("NV_2A: read STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,ret);
4962 }
4963 else if ((offset >= 0x00002000 / 4) && (offset < 0x00004000 / 4)) {
4964 ret = pfifo[offset - 0x00002000 / 4];
4965 // PFIFO.CACHE1.STATUS or PFIFO.RUNOUT_STATUS
4966 if ((offset == 0x3214 / 4) || (offset == 0x2400 / 4))
4967 ret = 0x10;
4968 //machine().logerror("NV_2A: read PFIFO[%06X] value %08X\n",offset*4-0x00002000,ret);
4969 }
4970 else if ((offset >= 0x00700000 / 4) && (offset < 0x00800000 / 4)) {
4971 ret = ramin[offset - 0x00700000 / 4];
4972 //machine().logerror("NV_2A: read PRAMIN[%06X] value %08X\n",offset*4-0x00700000,ret);
4973 }
4974 else if ((offset >= 0x00400000 / 4) && (offset < 0x00402000 / 4)) {
4975 ret = pgraph[offset - 0x00400000 / 4];
4976 //machine().logerror("NV_2A: read PGRAPH[%06X] value %08X\n",offset*4-0x00400000,ret);
4977 }
4978 else if ((offset >= 0x00600000 / 4) && (offset < 0x00601000 / 4)) {
4979 ret = pcrtc[offset - 0x00600000 / 4];
4980 //machine().logerror("NV_2A: read PCRTC[%06X] value %08X\n",offset*4-0x00600000,ret);
4981 }
4982 else if ((offset >= 0x00000000 / 4) && (offset < 0x00001000 / 4)) {
4983 ret = pmc[offset - 0x00000000 / 4];
4984 //machine().logerror("NV_2A: read PMC[%06X] value %08X\n",offset*4-0x00000000,ret);
4985 }
4986 else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
4987 // 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
4988 int chanel, subchannel, suboffset;
4989
4990 suboffset = offset - 0x00800000 / 4;
4991 chanel = (suboffset >> (16 - 2)) & 31;
4992 subchannel = (suboffset >> (13 - 2)) & 7;
4993 suboffset = suboffset & 0x7ff;
4994 if (suboffset < 0x80 / 4)
4995 ret = channel[chanel][subchannel].regs[suboffset];
4996 //machine().logerror("NV_2A: read channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,ret);
4997 return ret;
4998 }
4999 //machine().logerror("NV_2A: read at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,ret);
5000 return ret;
5001 }
5002
geforce_w(address_space & space,offs_t offset,uint32_t data,uint32_t mem_mask)5003 void nv2a_renderer::geforce_w(address_space &space, offs_t offset, uint32_t data, uint32_t mem_mask)
5004 {
5005 uint32_t old;
5006 bool update_int;
5007
5008 update_int = false;
5009 if ((offset >= 0x00101000 / 4) && (offset < 0x00102000 / 4)) {
5010 //machine().logerror("NV_2A: write STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,data);
5011 }
5012 else if ((offset >= 0x00002000 / 4) && (offset < 0x00004000 / 4)) {
5013 int e = offset - 0x00002000 / 4;
5014 if (e >= (sizeof(pfifo) / sizeof(uint32_t)))
5015 return;
5016 COMBINE_DATA(pfifo + e);
5017 //machine().logerror("NV_2A: read PFIFO[%06X]=%08X\n",offset*4-0x00002000,data & mem_mask); // 2210 pfifo ramht & 1f0 << 12
5018 }
5019 else if ((offset >= 0x00700000 / 4) && (offset < 0x00800000 / 4)) {
5020 int e = offset - 0x00700000 / 4;
5021 if (e >= (sizeof(ramin) / sizeof(uint32_t)))
5022 return;
5023 COMBINE_DATA(ramin + e);
5024 //machine().logerror("NV_2A: write PRAMIN[%06X]=%08X\n",offset*4-0x00700000,data & mem_mask);
5025 }
5026 else if ((offset >= 0x00400000 / 4) && (offset < 0x00402000 / 4)) {
5027 int e = offset - 0x00400000 / 4;
5028 if (e >= (sizeof(pgraph) / sizeof(uint32_t)))
5029 return;
5030 old = pgraph[e];
5031 COMBINE_DATA(pgraph + e);
5032 if (e == 0x100 / 4) {
5033 pgraph[e] = old & ~data;
5034 if (data & 1)
5035 pgraph[0x108 / 4] = 0;
5036 update_int = true;
5037 }
5038 if (e == 0x140 / 4)
5039 update_int = true;
5040 if (e == 0x720 / 4) {
5041 if ((data & 1) && (puller_waiting == 2)) {
5042 puller_waiting = 0;
5043 puller_timer->enable();
5044 puller_timer->adjust(attotime::zero);
5045 }
5046 }
5047 if ((e >= 0x900 / 4) && (e < 0xa00 / 4))
5048 pgraph[e] = 0;
5049 //machine().logerror("NV_2A: write PGRAPH[%06X]=%08X\n",offset*4-0x00400000,data & mem_mask);
5050 }
5051 else if ((offset >= 0x00600000 / 4) && (offset < 0x00601000 / 4)) {
5052 int e = offset - 0x00600000 / 4;
5053 if (e >= (sizeof(pcrtc) / sizeof(uint32_t)))
5054 return;
5055 old = pcrtc[e];
5056 COMBINE_DATA(pcrtc + e);
5057 if (e == 0x100 / 4) {
5058 pcrtc[e] = old & ~data;
5059 update_int = true;
5060 }
5061 if (e == 0x140 / 4)
5062 update_int = true;
5063 if (e == 0x800 / 4) {
5064 displayedtarget = (uint32_t *)direct_access_ptr(pcrtc[e]);
5065 #ifdef LOG_NV2A
5066 printf("crtc buffer %08X\n\r", data);
5067 #endif
5068 }
5069 //machine().logerror("NV_2A: write PCRTC[%06X]=%08X\n",offset*4-0x00600000,data & mem_mask);
5070 }
5071 else if ((offset >= 0x00000000 / 4) && (offset < 0x00001000 / 4)) {
5072 int e = offset - 0x00000000 / 4;
5073 if (e >= (sizeof(pmc) / sizeof(uint32_t)))
5074 return;
5075 COMBINE_DATA(pmc + e);
5076 if (e == 0x200 / 4) // PMC.ENABLE register
5077 if (data & 0x1100) // either PFIFO or PGRAPH enabled
5078 for (int ch = 0; ch < 32; ch++) // zero dma_get in all the channels
5079 channel[ch][0].regs[0x44 / 4] = 0;
5080 //machine().logerror("NV_2A: write PMC[%06X]=%08X\n",offset*4-0x00000000,data & mem_mask);
5081 }
5082 else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
5083 // 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
5084 int chanel, subchannel, suboffset;
5085 //int method, count, handle, objclass;
5086
5087 suboffset = offset - 0x00800000 / 4;
5088 chanel = (suboffset >> (16 - 2)) & 31;
5089 subchannel = (suboffset >> (13 - 2)) & 7;
5090 suboffset = suboffset & 0x7ff;
5091 //machine().logerror("NV_2A: write channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,data & mem_mask);
5092 COMBINE_DATA(&channel[chanel][subchannel].regs[suboffset]);
5093 if (suboffset >= 0x80 / 4)
5094 return;
5095 if ((suboffset == 0x40 / 4) || (suboffset == 0x44 / 4)) { // DMA_PUT or DMA_GET
5096 uint32_t *dmaput, *dmaget;
5097
5098 dmaput = &channel[chanel][0].regs[0x40 / 4];
5099 dmaget = &channel[chanel][0].regs[0x44 / 4];
5100 //printf("dmaget %08X dmaput %08X\n\r",*dmaget,*dmaput);
5101 if (*dmaget != *dmaput) {
5102 if (puller_waiting == 0) {
5103 puller_space = &space;
5104 puller_timer->enable();
5105 puller_timer->adjust(attotime::zero);
5106 }
5107 }
5108 }
5109 }
5110 //else
5111 // machine().logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data);
5112 if (update_int == true) {
5113 if (update_interrupts() == true)
5114 irq_callback(1); // IRQ 3
5115 else
5116 irq_callback(0); // IRQ 3
5117 }
5118 }
5119
savestate_items()5120 void nv2a_renderer::savestate_items()
5121 {
5122 }
5123
set_ram_base(void * base)5124 void nv2a_renderer::set_ram_base(void *base)
5125 {
5126 basemempointer = (uint8_t*)base;
5127 topmempointer = basemempointer + 512 * 1024 * 1024 - 1;
5128 }
5129
start(address_space * cpu_space)5130 void nv2a_renderer::start(address_space *cpu_space)
5131 {
5132 puller_timer = machine().scheduler().timer_alloc(timer_expired_delegate(FUNC(nv2a_renderer::puller_timer_work), this), (void *)"NV2A Puller Timer");
5133 puller_timer->enable(false);
5134 }
5135