1 // license:BSD-3-Clause
2 // copyright-holders:Samuele Zannoli
3 #include "emu.h"
4 #include "video/poly.h"
5 #include "bitmap.h"
6 #include "includes/xbox_nv2a.h"
7 #include <bitset>
8 #include <cfloat>
9 
10 //#define LOG_NV2A
11 #define DEBUG_CHECKS // enable for debugging
12 
13 char const *const vertex_program_disassembler::srctypes[] = { "??", "Rn", "Vn", "Cn" };
14 char const *const vertex_program_disassembler::scaops[] = { "NOP", "IMV", "RCP", "RCC", "RSQ", "EXP", "LOG", "LIT", "???", "???", "???", "???", "???", "???", "???", "???", "???" };
15 int const vertex_program_disassembler::scapar2[] = { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
16 char const *const vertex_program_disassembler::vecops[] = { "NOP", "MOV", "MUL", "ADD", "MAD", "DP3", "DPH", "DP4", "DST", "MIN", "MAX", "SLT", "SGE", "ARL", "???", "???", "???" };
17 int const vertex_program_disassembler::vecpar2[] = { 0, 4, 6, 5, 7, 6, 6, 6, 6, 6, 6, 6, 6, 4, 0, 0, 0 };
18 char const *const vertex_program_disassembler::vecouts[] = { "oPos", "???", "???", "oD0", "oD1", "oFog", "oPts", "oB0", "oB1", "oT0", "oT1", "oT2", "oT3" };
19 char const vertex_program_disassembler::compchar[] = { 'x', 'y', 'z', 'w' };
20 
21 /*
22 Each vertex program instruction is a 128 bit word made of the fields:
23 d         f
24 w   b     i
25 o   i     e
26 r   t     l
27 d   s     d
28 +-+-----+-------
29 |0|31-0 |not used
30 +-+-----+-------
31 | |31-29|not used
32 | +-----+-------
33 | |28-25|scalar operation
34 | +-----+-------
35 | |24-21|vectorial operation
36 | +-----+-------
37 | |20-13|index for source constant C[]
38 | +-----+-------
39 | |12-9 |input vector index
40 | +-----+-------
41 |1|  8  |parameter A:sign
42 | +-----+-------
43 | | 7-6 |parameter A:swizzle x
44 | +-----+-------
45 | | 5-4 |parameter A:swizzle y
46 | +-----+-------
47 | | 3-2 |parameter A:swizzle z
48 | +-----+-------
49 | | 1-0 |parameter A:swizzle w
50 |-+-----+-------
51 | |31-28|parameter A:parameter Rn index
52 | +-----+-------
53 | |27-26|parameter A:input type 1:Rn 2:Vn 3:C[n]
54 | +-----+-------
55 | | 25  |parameter B:sign
56 | +-----+-------
57 | |24-23|parameter B:swizzle x
58 | +-----+-------
59 | |22-21|parameter B:swizzle y
60 | +-----+-------
61 | |20-19|parameter B:swizzle z
62 | +-----+-------
63 |2|18-17|parameter B:swizzle w
64 | +-----+-------
65 | |16-13|parameter B:parameter Rn index
66 | +-----+-------
67 | |12-11|parameter B:input type 1:Rn 2:Vn 3:C[n]
68 | +-----+-------
69 | | 10  |parameter C:sign
70 | +-----+-------
71 | | 9-8 |parameter C:swizzle x
72 | +-----+-------
73 | | 7-6 |parameter C:swizzle y
74 | +-----+-------
75 | | 5-4 |parameter C:swizzle z
76 | +-----+-------
77 | | 3-2 |parameter C:swizzle w
78 | +-----+-------
79 | | 1-0 |
80 |-+     |parameter C:parameter Rn index
81 | |31-30|
82 | +-----+-------
83 | |29-28|parameter C:input type 1:Rn 2:Vn 3:C[n]
84 | +-----+-------
85 | |27-24|output Rn mask from vectorial operation
86 | +-----+-------
87 | |23-20|output Rn index from vectorial operation
88 | +-----+-------
89 | |19-16|output Rn mask from scalar operation
90 | +-----+-------
91 |3|15-12|output vector write mask
92 | +-----+-------
93 | | 11  |1:output is output vector 0:output is constant C[]
94 | +-----+-------
95 | |10-3 |output vector/constant index
96 | +-----+-------
97 | |  2  |0:output Rn from vectorial operation 1:output Rn from scalar operation
98 | +-----+-------
99 | |  1  |1:add a0x to index for source constant C[]
100 | +-----+-------
101 | |  0  |1:end of program
102 +-+-----+-------
103 Each vertex program instruction can generate up to three destination values using up to three source values.
104 The first possible destination is to Rn from a vectorial operation.
105 The second possible destination is to a vertex shader output or C[n] from a vectorial or scalar operation.
106 The third possible destination is to Rn from a scalar operation.
107 */
decodefields(unsigned int * dwords,int offset,fields & decoded)108 void vertex_program_disassembler::decodefields(unsigned int *dwords, int offset, fields &decoded)
109 {
110 	unsigned int srcbits[3];
111 	int a;
112 
113 	srcbits[0] = ((dwords[1 + offset] & 0x1ff) << 6) | (dwords[2 + offset] >> 26);
114 	srcbits[1] = (dwords[2 + offset] >> 11) & 0x7fff;
115 	srcbits[2] = ((dwords[2 + offset] & 0x7ff) << 4) | (dwords[3 + offset] >> 28);
116 	decoded.ScaOperation = (int)(dwords[1 + offset] >> 25) & 0xf;
117 	decoded.VecOperation = (int)(dwords[1 + offset] >> 21) & 0xf;
118 	decoded.SourceConstantIndex = (int)(dwords[1 + offset] >> 13) & 0xff;
119 	decoded.InputIndex = (int)(dwords[1 + offset] >> 9) & 0xf;
120 	for (a = 0; a < 3; a++)
121 	{
122 		decoded.src[a].Sign = (int)(srcbits[a] >> 14) & 1;
123 		decoded.src[a].SwizzleX = (int)(srcbits[a] >> 12) & 3;
124 		decoded.src[a].SwizzleY = (int)(srcbits[a] >> 10) & 3;
125 		decoded.src[a].SwizzleZ = (int)(srcbits[a] >> 8) & 3;
126 		decoded.src[a].SwizzleW = (int)(srcbits[a] >> 6) & 3;
127 		decoded.src[a].TempIndex = (int)(srcbits[a] >> 2) & 0xf;
128 		decoded.src[a].ParameterType = (int)(srcbits[a] >> 0) & 3;
129 	}
130 
131 	decoded.VecTempWriteMask = (int)(dwords[3 + offset] >> 24) & 0xf;
132 	decoded.VecTempIndex = (int)(dwords[3 + offset] >> 20) & 0xf;
133 	decoded.ScaTempWriteMask = (int)(dwords[3 + offset] >> 16) & 0xf;
134 	decoded.OutputWriteMask = (int)(dwords[3 + offset] >> 12) & 0xf;
135 	decoded.OutputSelect = (int)(dwords[3 + offset] >> 11) & 0x1;
136 	decoded.OutputIndex = (int)(dwords[3 + offset] >> 3) & 0xff;
137 	decoded.MultiplexerControl = (int)(dwords[3 + offset] >> 2) & 0x1;
138 	decoded.Usea0x = (int)(dwords[3 + offset] >> 1) & 0x1;
139 	decoded.EndOfProgram = (int)(dwords[3 + offset] >> 0) & 0x1;
140 }
141 
disassemble_mask(int mask,char * s)142 int vertex_program_disassembler::disassemble_mask(int mask, char *s)
143 {
144 	int l;
145 
146 	*s = 0;
147 	if (mask == 15)
148 		return 0;
149 	s[0] = '.';
150 	l = 1;
151 	if ((mask & 8) != 0)
152 	{
153 		s[l] = 'x';
154 		l++;
155 	}
156 	if ((mask & 4) != 0)
157 	{
158 		s[l] = 'y';
159 		l++;
160 	}
161 	if ((mask & 2) != 0)
162 	{
163 		s[l] = 'z';
164 		l++;
165 	}
166 	if ((mask & 1) != 0)
167 	{
168 		s[l] = 'w';
169 		l++;
170 	}
171 	s[l] = 0;
172 	return l;
173 }
174 
disassemble_swizzle(sourcefields f,char * s)175 int vertex_program_disassembler::disassemble_swizzle(sourcefields f, char *s)
176 {
177 	int t, l;
178 
179 	t = 4;
180 	if (f.SwizzleW == 3)
181 	{
182 		t = t - 1;
183 		if (f.SwizzleZ == 2)
184 		{
185 			t = t - 1;
186 			if (f.SwizzleY == 1)
187 			{
188 				t = t - 1;
189 				if (f.SwizzleX == 0)
190 				{
191 					t = t - 1;
192 				}
193 			}
194 		}
195 	}
196 	*s = 0;
197 	if (t == 0)
198 		return 0;
199 	s[0] = '.';
200 	l = 1;
201 	if (t > 0)
202 	{
203 		s[l] = compchar[f.SwizzleX];
204 		l++;
205 	}
206 	if (t > 1)
207 	{
208 		s[l] = compchar[f.SwizzleY];
209 		l++;
210 	}
211 	if (t > 2)
212 	{
213 		s[l] = compchar[f.SwizzleZ];
214 		l++;
215 	}
216 	if (t > 3)
217 	{
218 		s[l] = compchar[f.SwizzleW];
219 		l++;
220 	}
221 	s[l] = 0;
222 	return l;
223 }
224 
disassemble_source(sourcefields f,fields fi,char * s)225 int vertex_program_disassembler::disassemble_source(sourcefields f, fields fi, char *s)
226 {
227 	int l;
228 
229 	if (f.ParameterType == 0) {
230 		strcpy(s, ",???");
231 		return 4;
232 	}
233 	l = 0;
234 	if (f.Sign != 0) {
235 		s[l] = '-';
236 		l++;
237 	}
238 	if (f.ParameterType == 1) {
239 		s[l] = 'r';
240 		l = l + 1 + sprintf(s + l + 1, "%d", f.TempIndex);
241 	}
242 	else if (f.ParameterType == 2){
243 		s[l] = 'v';
244 		l = l + 1 + sprintf(s + l + 1, "%d", fi.InputIndex);
245 	}
246 	else
247 	{
248 		if (fi.Usea0x != 0)
249 		{
250 			if (fi.SourceConstantIndex >= 96) {
251 				strcpy(s + l, "c[");
252 				l = l + 2;
253 				l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
254 				strcpy(s + l, "+a0.x]");
255 				l = l + 6;
256 			}
257 			else {
258 				strcpy(s + l, "c[a0.x");
259 				l = l + 6;
260 				l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
261 				s[l] = ']';
262 				l++;
263 			}
264 		}
265 		else {
266 			strcpy(s + l, "c[");
267 			l = l + 2;
268 			l = l + sprintf(s + l, "%d", fi.SourceConstantIndex - 96);
269 			s[l] = ']';
270 			l++;
271 		}
272 	}
273 	l = l + disassemble_swizzle(f, s + l);
274 	s[l] = 0;
275 	return l;
276 }
277 
disassemble_output(fields f,char * s)278 int vertex_program_disassembler::disassemble_output(fields f, char *s)
279 {
280 	int l;
281 
282 	if (f.OutputSelect == 1) {
283 		strcpy(s, vecouts[f.OutputIndex]);
284 		return strlen(s);
285 	}
286 	else {
287 		strcpy(s, "c[");
288 		l = 2;
289 		l = l + sprintf(s + l, "%d", f.OutputIndex - 96);
290 		s[l] = ']';
291 		l++;
292 	}
293 	s[l] = 0;
294 	return l;
295 }
296 
output_types(fields f,int * o)297 int vertex_program_disassembler::output_types(fields f, int *o)
298 {
299 	o[0] = o[1] = o[2] = o[3] = o[4] = o[5] = 0;
300 	if ((f.VecOperation > 0) && (f.VecTempWriteMask != 0))
301 		o[0] = 1;
302 	if ((f.VecOperation > 0) && (f.OutputWriteMask != 0) && (f.MultiplexerControl == 0))
303 		o[1] = 1;
304 	if ((f.ScaOperation > 0) && (f.OutputWriteMask != 0) && (f.MultiplexerControl == 1))
305 		o[2] = 1;
306 	if ((f.ScaOperation > 0) && (f.ScaTempWriteMask != 0))
307 		o[3] = 1;
308 	if (f.VecOperation == 13)
309 		o[4] = 1;
310 	if (f.EndOfProgram == 1)
311 		o[5] = 1;
312 	return o[0] + o[1] + o[2] + o[3] + o[4] + o[5];
313 }
314 
disassemble(unsigned int * instruction,char * line)315 int vertex_program_disassembler::disassemble(unsigned int *instruction, char *line)
316 {
317 	int b, p;
318 	char *c;
319 
320 	if (state == 0) {
321 		decodefields(instruction, 0, f);
322 		output_types(f, o);
323 		state = 1;
324 	}
325 	if (o[0] != 0)
326 	{
327 		o[0] = 0;
328 		c = line;
329 		strcpy(c, vecops[f.VecOperation]);
330 		c = c + strlen(c);
331 		strcpy(c, " r");
332 		c = c + 2;
333 		c = c + sprintf(c, "%d", f.VecTempIndex);
334 		c = c + disassemble_mask(f.VecTempWriteMask, c);
335 		b = 0;
336 		for (p = 4; p != 0; p = p >> 1)
337 		{
338 			if ((vecpar2[f.VecOperation] & p) != 0) {
339 				c[0] = ',';
340 				c++;
341 				c = c + disassemble_source(f.src[b], f, c);
342 			}
343 			b++;
344 		}
345 		*c = 0;
346 		return 1;
347 	}
348 	if (o[1] != 0)
349 	{
350 		o[1] = 0;
351 		c = line;
352 		strcpy(c, vecops[f.VecOperation]);
353 		c = c + strlen(c);
354 		*c = ' ';
355 		c++;
356 		c = c + disassemble_output(f, c);
357 		c = c + disassemble_mask(f.OutputWriteMask, c);
358 		b = 0;
359 		for (p = 4; p != 0; p = p >> 1)
360 		{
361 			if ((vecpar2[f.VecOperation] & p) != 0) {
362 				*c = ',';
363 				c++;
364 				c = c + disassemble_source(f.src[b], f, c);
365 			}
366 			b++;
367 		}
368 		*c = 0;
369 		return 1;
370 	}
371 	if (o[2] != 0)
372 	{
373 		o[2] = 0;
374 		c = line;
375 		strcpy(c, scaops[f.ScaOperation]);
376 		c = c + strlen(c);
377 		*c = ' ';
378 		c++;
379 		c = c + disassemble_output(f, c);
380 		c = c + disassemble_mask(f.OutputWriteMask, c);
381 		b = 0;
382 		for (p = 4; p != 0; p = p >> 1)
383 		{
384 			if ((scapar2[f.ScaOperation] & p) != 0) {
385 				*c = ',';
386 				c++;
387 				c = c + disassemble_source(f.src[b], f, c);
388 			}
389 			b++;
390 		}
391 		*c = 0;
392 		return 1;
393 	}
394 	if (o[3] != 0)
395 	{
396 		if (f.VecOperation > 0)
397 			b = 1;
398 		else
399 			b = f.VecTempIndex;
400 		o[3] = 0;
401 		c = line;
402 		strcpy(c, scaops[f.ScaOperation]);
403 		c = c + strlen(c);
404 		strcpy(c, " r");
405 		c = c + 2;
406 		c = c + sprintf(c, "%d", b);
407 		c = c + disassemble_mask(f.ScaTempWriteMask, c);
408 		b = 0;
409 		for (p = 4; p != 0; p = p >> 1)
410 		{
411 			if ((scapar2[f.ScaOperation] & p) != 0) {
412 				*c = ',';
413 				c++;
414 				c = c + disassemble_source(f.src[b], f, c);
415 			}
416 			b++;
417 		}
418 		*c = 0;
419 		return 1;
420 	}
421 	if (o[4] != 0)
422 	{
423 		o[4] = 0;
424 		c = line;
425 		c = c + sprintf(c, "MOV a0.x,");
426 		c = c + disassemble_source(f.src[0], f, c);
427 		*c = 0;
428 		return 1;
429 	}
430 	if (o[5] != 0)
431 	{
432 		o[5] = 0;
433 		strcpy(line, "END");
434 		return 1;
435 	}
436 	state = 0;
437 	return 0;
438 }
439 
vertex_program_simulator()440 vertex_program_simulator::vertex_program_simulator()
441 {
442 	for (auto & elem : op)
443 		elem.modified = 0;
444 	initialize_constants();
445 }
446 
set_data(vertex_nv * in,vertex_nv * out)447 void vertex_program_simulator::set_data(vertex_nv *in, vertex_nv *out)
448 {
449 	input = in;
450 	output = out;
451 }
452 
reset()453 void vertex_program_simulator::reset()
454 {
455 	ip = 0;
456 	a0x = 0;
457 	initialize_outputs();
458 	initialize_temps();
459 }
460 
decode_instruction(int address)461 void vertex_program_simulator::decode_instruction(int address)
462 {
463 	instruction *i;
464 
465 	i = &op[address];
466 	i->d.NegateA = i->i[1] & (1 << 8);
467 	i->d.ParameterTypeA = (i->i[2] >> 26) & 3;
468 	i->d.TempIndexA = (i->i[2] >> 28) & 15;
469 	i->d.SwizzleA[0] = (i->i[1] >> 6) & 3;
470 	i->d.SwizzleA[1] = (i->i[1] >> 4) & 3;
471 	i->d.SwizzleA[2] = (i->i[1] >> 2) & 3;
472 	i->d.SwizzleA[3] = (i->i[1] >> 0) & 3;
473 	i->d.NegateB = i->i[2] & (1 << 25);
474 	i->d.ParameterTypeB = (i->i[2] >> 11) & 3;
475 	i->d.TempIndexB = (i->i[2] >> 13) & 15;
476 	i->d.SwizzleB[0] = (i->i[2] >> 23) & 3;
477 	i->d.SwizzleB[1] = (i->i[2] >> 21) & 3;
478 	i->d.SwizzleB[2] = (i->i[2] >> 19) & 3;
479 	i->d.SwizzleB[3] = (i->i[2] >> 17) & 3;
480 	i->d.NegateC = i->i[2] & (1 << 10);
481 	i->d.ParameterTypeC = (i->i[3] >> 28) & 3;
482 	i->d.TempIndexC = ((i->i[2] & 3) << 2) + (i->i[3] >> 30);
483 	i->d.SwizzleC[0] = (i->i[2] >> 8) & 3;
484 	i->d.SwizzleC[1] = (i->i[2] >> 6) & 3;
485 	i->d.SwizzleC[2] = (i->i[2] >> 4) & 3;
486 	i->d.SwizzleC[3] = (i->i[2] >> 2) & 3;
487 	i->d.VecOperation = (VectorialOperation)((i->i[1] >> 21) & 15);
488 	i->d.ScaOperation = (ScalarOperation)((i->i[1] >> 25) & 15);
489 	i->d.OutputWriteMask = ((i->i[3] >> 12) & 15);
490 	i->d.MultiplexerControl = i->i[3] & 4; // 0 : output Rn from vectorial operation 4 : output Rn from scalar operation
491 	i->d.VecTempIndex = (i->i[3] >> 20) & 15;
492 	i->d.OutputIndex = (i->i[3] >> 3) & 255;
493 	i->d.OutputSelect = i->i[3] & 0x800;
494 	i->d.VecTempWriteMask = (i->i[3] >> 24) & 15;
495 	i->d.ScaTempWriteMask = (i->i[3] >> 16) & 15;
496 	i->d.InputIndex = (i->i[1] >> 9) & 15;
497 	i->d.SourceConstantIndex = (i->i[1] >> 13) & 255;
498 	i->d.Usea0x = i->i[3] & 2;
499 	i->d.EndOfProgram = i->i[3] & 1;
500 }
501 
step()502 int vertex_program_simulator::step()
503 {
504 	int p1, p2;
505 	float tmp[3 * 4] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
506 	float tmpv[4] = { 0, 0, 0, 0};
507 	float tmps[4] = { 0, 0, 0, 0};
508 	instruction::decoded *d;
509 
510 #if 0 // useful while debugging to see what instrucion is being executed
511 	static int debugvpi = 0;
512 	char disbuffer[256];
513 	if (debugvpi) {
514 		char *pp;
515 		vertex_program_disassembler vdis;
516 
517 		pp = disbuffer;
518 		while (vdis.disassemble(op[ip].i, pp) != 0) {
519 			pp = pp + strlen(pp);
520 			*pp = '\n';
521 			pp++;
522 			*pp = 0;
523 			printf("%s", disbuffer);
524 		}
525 	}
526 #endif
527 
528 	if (op[ip].modified)
529 		decode_instruction(ip);
530 	d = &(op[ip].d);
531 	// prepare inputs
532 	//  input A
533 	generate_input(&tmp[0], d->NegateA, d->ParameterTypeA, d->TempIndexA, d->SwizzleA);
534 	//  input B
535 	generate_input(&tmp[4], d->NegateB, d->ParameterTypeB, d->TempIndexB, d->SwizzleB);
536 	//  input C
537 	generate_input(&tmp[8], d->NegateC, d->ParameterTypeC, d->TempIndexC, d->SwizzleC);
538 	// compute 2 instructions
539 	//  vectorial
540 	compute_vectorial_operation(tmpv, d->VecOperation, tmp);
541 	//  scalar
542 	compute_scalar_operation(tmps, d->ScaOperation, tmp);
543 	// assign destinations
544 	if (d->VecOperation != VecNOP) {
545 		if (d->VecOperation == VecARL)
546 			//o[4] = 1;
547 			a0x = (int)tmpv[0];
548 		else {
549 			if (d->VecTempWriteMask != 0) { // assign to Rn
550 				//o[0] = 1;
551 				assign_register(d->VecTempIndex, tmpv, d->VecTempWriteMask);
552 			}
553 			if ((d->OutputWriteMask != 0) && (d->MultiplexerControl == 0)) {
554 				//o[1] = 1;
555 				if (d->OutputSelect) { // assign to output
556 					assign_output(d->OutputIndex, tmpv, d->OutputWriteMask);
557 					// remeber, output position == r12
558 					if (d->OutputIndex == 0)
559 						for (p1 = 0; p1 < 4; p1++) {
560 							r_register[12].fv[p1] = output->attribute[d->OutputIndex].fv[p1];
561 						}
562 				}
563 				else { // assign to constant
564 					assign_constant(d->OutputIndex, tmpv, d->OutputWriteMask);
565 				}
566 			}
567 		}
568 	}
569 	if (d->ScaOperation != ScaNOP) {
570 		if (d->ScaTempWriteMask != 0) { // assign to Rn
571 			//o[3] = 1;
572 			if (d->VecOperation != VecNOP)
573 				p2 = 1;
574 			else
575 				p2 = d->VecTempIndex;
576 			assign_register(p2, tmps, d->ScaTempWriteMask);
577 		}
578 		if ((d->OutputWriteMask != 0) && (d->MultiplexerControl != 0)) { // assign to output
579 			//o[2] = 1;
580 			assign_output(d->OutputIndex, tmps, d->OutputWriteMask);
581 			// remeber, output position == r12
582 			if (d->OutputIndex == 0) {
583 				for (p1 = 0; p1 < 4; p1++) {
584 					r_register[12].fv[p1] = output->attribute[d->OutputIndex].fv[p1];
585 				}
586 			}
587 		}
588 	}
589 	return d->EndOfProgram;
590 }
591 
execute()592 void vertex_program_simulator::execute()
593 {
594 	int c;
595 
596 	c = 0;
597 	do {
598 		c = step();
599 		ip++;
600 	} while (c == 0);
601 }
602 
jump(int address)603 void vertex_program_simulator::jump(int address)
604 {
605 	ip = address;
606 }
607 
process(int address,vertex_nv * in,vertex_nv * out,int count)608 void vertex_program_simulator::process(int address, vertex_nv *in, vertex_nv *out, int count)
609 {
610 #if 0 // useful while debugging to see what is being executed
611 	static int debugvps = 0;
612 	if (debugvps) {
613 		FILE *f;
614 		char *pp;
615 		vertex_program_disassembler vdis;
616 		char disbuffer[128];
617 
618 		debugvps--;
619 		if ((f = fopen("vertexshader_debug.txt", "wb")) != nullptr) {
620 			jump(address);
621 			fprintf(f, "SHADER:\n");
622 			for (int t = 0; t < 128; t++) {
623 				pp = disbuffer;
624 				while (vdis.disassemble(op[ip + t].i, pp) != 0) {
625 					pp = pp + strlen(pp);
626 					*pp = '\n';
627 					pp++;
628 					*pp = 0;
629 				}
630 				fprintf(f, "%08X %08X %08X %s", op[ip + t].i[1], op[ip + t].i[2], op[ip + t].i[3], disbuffer);
631 				if (op[ip + t].i[3] & 1)
632 					break;
633 			}
634 			fprintf(f, "INPUTS:\n");
635 			for (int t = 0; t < 16; t++)
636 				fprintf(f, "v%d %f %f %f %f\n", t, in->attribute[t].fv[0], in->attribute[t].fv[1], in->attribute[t].fv[2], in->attribute[t].fv[3]);
637 			fprintf(f, "CONSTANTS:\n");
638 			for (int t = 0; t < 192; t++)
639 				fprintf(f, "c[%d] %f %f %f %f\n", t - 96, c_constant[t].fv[0], c_constant[t].fv[1], c_constant[t].fv[2], c_constant[t].fv[3]);
640 			fclose(f);
641 		}
642 	}
643 #endif
644 	set_data(in, out);
645 	while (count > 0) {
646 		reset();
647 		jump(address);
648 		execute();
649 		input++;
650 		output++;
651 		count--;
652 	}
653 }
654 
status()655 int vertex_program_simulator::status()
656 {
657 	return ip;
658 }
659 
initialize_outputs()660 void vertex_program_simulator::initialize_outputs()
661 {
662 	for (int n = 0; n < 16; n++) {
663 		output->attribute[n].fv[0] = output->attribute[n].fv[1] = output->attribute[n].fv[2] = 0;
664 		output->attribute[n].fv[3] = 1;
665 	}
666 }
667 
initialize_temps()668 void vertex_program_simulator::initialize_temps()
669 {
670 	for (auto & elem : r_register) {
671 		for (int m = 0; m < 4; m++)
672 			elem.fv[m] = 0;
673 	}
674 }
675 
initialize_constants()676 void vertex_program_simulator::initialize_constants()
677 {
678 	for (auto & elem : c_constant) {
679 		for (int m = 0; m < 4;m++)
680 			elem.fv[m] = 0;
681 	}
682 }
683 
generate_input(float t[4],int sign,int type,int temp,int swizzle[4])684 void vertex_program_simulator::generate_input(float t[4], int sign, int type, int temp, int swizzle[4])
685 {
686 	float sgn = 1;
687 
688 	if (sign)
689 		sgn = -1;
690 	if (type == 1) {
691 		t[0] = sgn * r_register[temp].fv[swizzle[0]];
692 		t[1] = sgn * r_register[temp].fv[swizzle[1]];
693 		t[2] = sgn * r_register[temp].fv[swizzle[2]];
694 		t[3] = sgn * r_register[temp].fv[swizzle[3]];
695 	}
696 	else if (type == 2) {
697 		int InputIndex = op[ip].d.InputIndex;
698 		t[0] = sgn * input->attribute[InputIndex].fv[swizzle[0]];
699 		t[1] = sgn * input->attribute[InputIndex].fv[swizzle[1]];
700 		t[2] = sgn * input->attribute[InputIndex].fv[swizzle[2]];
701 		t[3] = sgn * input->attribute[InputIndex].fv[swizzle[3]];
702 	}
703 	else if (type == 3) {
704 		int SourceConstantIndex = op[ip].d.SourceConstantIndex;
705 		if (op[ip].d.Usea0x)
706 			SourceConstantIndex = SourceConstantIndex + a0x;
707 		t[0] = sgn * c_constant[SourceConstantIndex].fv[swizzle[0]];
708 		t[1] = sgn * c_constant[SourceConstantIndex].fv[swizzle[1]];
709 		t[2] = sgn * c_constant[SourceConstantIndex].fv[swizzle[2]];
710 		t[3] = sgn * c_constant[SourceConstantIndex].fv[swizzle[3]];
711 	}
712 }
713 
compute_vectorial_operation(float t_out[4],int instruction,float par_in[3* 4])714 void vertex_program_simulator::compute_vectorial_operation(float t_out[4], int instruction, float par_in[3 * 4])
715 {
716 	const int p1_A = 0;
717 	const int p2_B = 4;
718 	const int p3_C = 8;
719 
720 	// t_out <= instruction(par_in)
721 	switch (instruction) {
722 	case 0: // "NOP"
723 		break;
724 	case 1: // "MOV"
725 		t_out[0] = par_in[p1_A + 0];
726 		t_out[1] = par_in[p1_A + 1];
727 		t_out[2] = par_in[p1_A + 2];
728 		t_out[3] = par_in[p1_A + 3];
729 		break;
730 	case 2: // "MUL"
731 		t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0];
732 		t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1];
733 		t_out[2] = par_in[p1_A + 2] * par_in[p2_B + 2];
734 		t_out[3] = par_in[p1_A + 3] * par_in[p2_B + 3];
735 		break;
736 	case 3: // "ADD"
737 		t_out[0] = par_in[p1_A + 0] + par_in[p3_C + 0];
738 		t_out[1] = par_in[p1_A + 1] + par_in[p3_C + 1];
739 		t_out[2] = par_in[p1_A + 2] + par_in[p3_C + 2];
740 		t_out[3] = par_in[p1_A + 3] + par_in[p3_C + 3];
741 		break;
742 	case 4: // "MAD"
743 		t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p3_C + 0];
744 		t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p3_C + 1];
745 		t_out[2] = par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p3_C + 2];
746 		t_out[3] = par_in[p1_A + 3] * par_in[p2_B + 3] + par_in[p3_C + 3];
747 		break;
748 	case 5: // "DP3"
749 		t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2];
750 		t_out[1] = t_out[2] = t_out[3] = t_out[0];
751 		break;
752 	case 6: // "DPH"
753 		t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p2_B + 3];
754 		t_out[1] = t_out[2] = t_out[3] = t_out[0];
755 		break;
756 	case 7: // "DP4"
757 		t_out[0] = par_in[p1_A + 0] * par_in[p2_B + 0] + par_in[p1_A + 1] * par_in[p2_B + 1] + par_in[p1_A + 2] * par_in[p2_B + 2] + par_in[p1_A + 3] * par_in[p2_B + 3];
758 		t_out[1] = t_out[2] = t_out[3] = t_out[0];
759 		break;
760 	case 8: // "DST"
761 		t_out[0] = 1.0;
762 		t_out[1] = par_in[p1_A + 1] * par_in[p2_B + 1];
763 		t_out[2] = par_in[p1_A + 2];
764 		t_out[3] = par_in[p2_B + 3];
765 		break;
766 	case 9: // "MIN"
767 		t_out[0] = fmin(par_in[p1_A + 0], par_in[p2_B + 0]);
768 		t_out[1] = fmin(par_in[p1_A + 1], par_in[p2_B + 1]);
769 		t_out[2] = fmin(par_in[p1_A + 2], par_in[p2_B + 2]);
770 		t_out[3] = fmin(par_in[p1_A + 3], par_in[p2_B + 3]);
771 		break;
772 	case 10: // "MAX"
773 		t_out[0] = fmax(par_in[p1_A + 0], par_in[p2_B + 0]);
774 		t_out[1] = fmax(par_in[p1_A + 1], par_in[p2_B + 1]);
775 		t_out[2] = fmax(par_in[p1_A + 2], par_in[p2_B + 2]);
776 		t_out[3] = fmax(par_in[p1_A + 3], par_in[p2_B + 3]);
777 		break;
778 	case 11: // "SLT"
779 		t_out[0] = (par_in[p1_A + 0] < par_in[p2_B + 0]) ? 1.0 : 0;
780 		t_out[1] = (par_in[p1_A + 1] < par_in[p2_B + 1]) ? 1.0 : 0;
781 		t_out[2] = (par_in[p1_A + 2] < par_in[p2_B + 2]) ? 1.0 : 0;
782 		t_out[3] = (par_in[p1_A + 3] < par_in[p2_B + 3]) ? 1.0 : 0;
783 		break;
784 	case 12: // "SGE"
785 		t_out[0] = (par_in[p1_A + 0] >= par_in[p2_B + 0]) ? 1.0 : 0;
786 		t_out[1] = (par_in[p1_A + 1] >= par_in[p2_B + 1]) ? 1.0 : 0;
787 		t_out[2] = (par_in[p1_A + 2] >= par_in[p2_B + 2]) ? 1.0 : 0;
788 		t_out[3] = (par_in[p1_A + 3] >= par_in[p2_B + 3]) ? 1.0 : 0;
789 		break;
790 	case 13: // "ARL"
791 		t_out[0] = par_in[p1_A + 0];
792 	}
793 }
794 
compute_scalar_operation(float t_out[4],int instruction,float par_in[3* 4])795 void vertex_program_simulator::compute_scalar_operation(float t_out[4], int instruction, float par_in[3 * 4])
796 {
797 	//const int p1_A = 0;
798 	//const int p2_B = 4;
799 	const int p3_C = 8;
800 	union {
801 		float f;
802 		unsigned int i;
803 	} t;
804 	int e;
805 
806 	// t_out <= instruction(par_in)
807 	switch (instruction) {
808 	case 0: // "NOP"
809 		break;
810 	case 1: // "IMV"
811 		t_out[0] = par_in[p3_C + 0];
812 		t_out[1] = par_in[p3_C + 1];
813 		t_out[2] = par_in[p3_C + 2];
814 		t_out[3] = par_in[p3_C + 3];
815 		break;
816 	case 2: // "RCP"
817 		if (par_in[p3_C + 0] == 0)
818 			t.f = std::numeric_limits<float>::infinity();
819 		else if (par_in[p3_C + 0] == 1.0f)
820 			t.f = 1.0f;
821 		else
822 			t.f = 1.0f / par_in[p3_C + 0];
823 		t_out[0] = t_out[1] = t_out[2] = t_out[3] = t.f;
824 		break;
825 	case 3: // "RCC"
826 		t.f = par_in[p3_C + 0];
827 		if ((t.f < 0) && (t.f > -5.42101e-20f))
828 			t.f = -5.42101e-20f;
829 		else if ((t.f >= 0) && (t.f < 5.42101e-20f))
830 			t.f = 5.42101e-20f;
831 		if (t.f != 1.0f)
832 			t.f = 1.0f / t.f;
833 		t_out[0] = t_out[1] = t_out[2] = t_out[3] = t.f;
834 		break;
835 	case 4: // "RSQ"
836 		t_out[0] = t_out[1] = t_out[2] = t_out[3] = 1.0f / sqrtf(fabsf(par_in[p3_C + 0]));
837 		break;
838 	case 5: // "EXP"
839 		t_out[0] = pow(2, floor(par_in[p3_C + 0]));
840 		t_out[1] = par_in[p3_C + 0] - floorf(par_in[p3_C + 0]);
841 		t.f = pow(2, par_in[p3_C + 0]);
842 		t.i = t.i & 0xffffff00;
843 		t_out[2] = t.f;
844 		t_out[3] = 1.0;
845 		break;
846 	case 6: // "LOG"
847 		t_out[1] = frexp(par_in[p3_C + 0], &e)*2.0; // frexp gives mantissa as 0.5....1
848 		t_out[0] = e - 1;
849 #ifndef __OS2__
850 		t.f = log2(fabsf(par_in[p3_C + 0]));
851 #else
852 		static double log_2 = 0.0;
853 		if (log_2 == 0.0)
854 			log_2 = log(2);
855 		t.f = log(abs(par_in[p3_C + 0])) / log_2;
856 #endif
857 		t.i = t.i & 0xffffff00;
858 		t_out[2] = t.f;
859 		t_out[3] = 1.0;
860 		break;
861 	case 7: // "LIT"
862 		t_out[0] = 1.0;
863 		t_out[1] = fmax(0, fmin(par_in[p3_C + 0], 1.0f));
864 		t_out[2] = par_in[p3_C + 0] > 0 ? pow(fmax(par_in[p3_C + 1], 0), par_in[p3_C + 3]) : 0;
865 		t_out[3] = 1.0;
866 		break;
867 	}
868 }
869 
assign_output(int index,float t[4],int mask)870 void vertex_program_simulator::assign_output(int index, float t[4], int mask)
871 {
872 	for (int p1 = 0; p1 < 4; p1++) {
873 		if (mask & 8)
874 			output->attribute[index].fv[p1] = t[p1];
875 		mask = mask << 1;
876 	}
877 }
878 
assign_register(int index,float t[4],int mask)879 void vertex_program_simulator::assign_register(int index, float t[4], int mask)
880 {
881 	for (int p1 = 0; p1 < 4; p1++) {
882 		if (mask & 8)
883 			r_register[index].fv[p1] = t[p1];
884 		mask = mask << 1;
885 	}
886 }
887 
assign_constant(int index,float t[4],int mask)888 void vertex_program_simulator::assign_constant(int index, float t[4], int mask)
889 {
890 	for (int p1 = 0; p1 < 4; p1++) {
891 		if (mask & 8)
892 			c_constant[index].fv[p1] = t[p1];
893 		mask = mask << 1;
894 	}
895 }
896 
897 /*
898  * Graphics
899  */
900 
dilate0(uint32_t value,int bits)901 uint32_t nv2a_renderer::dilate0(uint32_t value, int bits) // dilate first "bits" bits in "value"
902 {
903 	uint32_t x, m1, m2, m3;
904 	int a;
905 
906 	x = value;
907 	for (a = 0; a < bits; a++)
908 	{
909 		m2 = 1 << (a << 1);
910 		m1 = m2 - 1;
911 		m3 = (~m1) << 1;
912 		x = (x & m1) + (x & m2) + ((x & m3) << 1);
913 	}
914 	return x;
915 }
916 
dilate1(uint32_t value,int bits)917 uint32_t nv2a_renderer::dilate1(uint32_t value, int bits) // dilate first "bits" bits in "value"
918 {
919 	uint32_t x, m1, m2, m3;
920 	int a;
921 
922 	x = value;
923 	for (a = 0; a < bits; a++)
924 	{
925 		m2 = 1 << (a << 1);
926 		m1 = m2 - 1;
927 		m3 = (~m1) << 1;
928 		x = (x & m1) + ((x & m2) << 1) + ((x & m3) << 1);
929 	}
930 	return x;
931 }
932 
computedilated(void)933 void nv2a_renderer::computedilated(void)
934 {
935 	int a, b;
936 
937 	for (b = 0; b < 16; b++)
938 		for (a = 0; a < 2048; a++) {
939 			dilated0[b][a] = dilate0(a, b);
940 			dilated1[b][a] = dilate1(a, b);
941 		}
942 	for (b = 0; b < 16; b++)
943 		for (a = 0; a < 16; a++)
944 			dilatechose[(b << 4) + a] = (a < b ? a : b);
945 }
946 
direct_access_ptr(offs_t address)947 inline uint8_t *nv2a_renderer::direct_access_ptr(offs_t address)
948 {
949 #ifdef DEBUG_CHECKS
950 	if (address >= 512*1024*1024)
951 		machine().logerror("Bad address in direct_access_ptr !\n");
952 #endif
953 	return basemempointer + address;
954 }
955 
geforce_commandkind(uint32_t word)956 nv2a_renderer::COMMAND nv2a_renderer::geforce_commandkind(uint32_t word)
957 {
958 	if ((word & 0x00000003) == 0x00000002)
959 		return COMMAND::CALL;
960 	if ((word & 0x00000003) == 0x00000001)
961 		return COMMAND::JUMP;
962 	if ((word & 0xE0030003) == 0x40000000)
963 		return COMMAND::NON_INCREASING;
964 	if ((word & 0xE0000003) == 0x20000000)
965 		return COMMAND::OLD_JUMP;
966 	if ((word & 0xFFFF0003) == 0x00030000)
967 		return COMMAND::LONG_NON_INCREASING;
968 	if ((word & 0xFFFFFFFF) == 0x00020000)
969 		return COMMAND::RETURN;
970 	if ((word & 0xFFFF0003) == 0x00010000)
971 		return COMMAND::SLI_CONDITIONAL;
972 	if ((word & 0xE0030003) == 0x00000000)
973 		return COMMAND::INCREASING;
974 	return COMMAND::INVALID;
975 }
976 
geforce_object_offset(uint32_t handle)977 uint32_t nv2a_renderer::geforce_object_offset(uint32_t handle)
978 {
979 	uint32_t h = ((((handle >> 11) ^ handle) >> 11) ^ handle) & 0x7ff;
980 	uint32_t o = (pfifo[0x210 / 4] & 0x1ff) << 8; // 0x1ff is not certain
981 	uint32_t e = o + h * 8; // at 0xfd000000+0x00700000
982 	uint32_t w;
983 
984 	if (ramin[e / 4] != handle) {
985 		// this should never happen
986 		for (uint32_t aa = o / 4; aa < (sizeof(ramin) / 4); aa = aa + 2) {
987 			if (ramin[aa] == handle) {
988 				e = aa * 4;
989 			}
990 		}
991 	}
992 	w = ramin[e / 4 + 1];
993 	return (w & 0xffff) * 0x10; // 0xffff is not certain
994 }
995 
geforce_read_dma_object(uint32_t handle,uint32_t & offset,uint32_t & size)996 void nv2a_renderer::geforce_read_dma_object(uint32_t handle, uint32_t &offset, uint32_t &size)
997 {
998 	//uint32_t objclass,pt_present,pt_linear,access,target,rorw;
999 	uint32_t dma_adjust, dma_frame;
1000 	uint32_t o = geforce_object_offset(handle);
1001 
1002 	o = o / 4;
1003 	//objclass=ramin[o] & 0xfff;
1004 	//pt_present=(ramin[o] >> 12) & 1;
1005 	//pt_linear=(ramin[o] >> 13) & 1;
1006 	//access=(ramin[o] >> 14) & 3;
1007 	//target=(ramin[o] >> 16) & 3;
1008 	dma_adjust = (ramin[o] >> 20) & 0xfff;
1009 	size = ramin[o + 1];
1010 	//rorw=ramin[o+2] & 1;
1011 	dma_frame = ramin[o + 2] & 0xfffff000;
1012 	offset = dma_frame + dma_adjust;
1013 }
1014 
1015 /*void debug(uint32_t *bmp, int width, int eight, float x1, float y1, float x2, float y2, uint32_t color)
1016 {
1017 int xx1,yy1,xx2,yy2;
1018 
1019     xx1=x1;
1020     xx2=x2;
1021     yy1=y1;
1022     yy2=y2;
1023     if (xx1 == xx2) {
1024         if (yy1 > yy2) {
1025             int t=yy1;
1026             yy1=yy2;
1027             yy2=t;
1028         }
1029         for (int y=yy1;y <= yy2;y++) {
1030             *(bmp+y*width+xx1) = color;
1031         }
1032     } else if (yy1 == yy2) {
1033         if (xx1 > xx2) {
1034             int t=xx1;
1035             xx1=xx2;
1036             xx2=t;
1037         }
1038         for (int x=xx1;x <= xx2;x++)
1039             *(bmp+yy1*width+x) = color;
1040     }
1041 }*/
1042 
convert_a4r4g4b4_a8r8g8b8(uint32_t a4r4g4b4)1043 inline uint32_t convert_a4r4g4b4_a8r8g8b8(uint32_t a4r4g4b4)
1044 {
1045 	uint32_t a8r8g8b8;
1046 	int ca, cr, cg, cb;
1047 
1048 	cb = pal4bit(a4r4g4b4 & 0x000f);
1049 	cg = pal4bit((a4r4g4b4 & 0x00f0) >> 4);
1050 	cr = pal4bit((a4r4g4b4 & 0x0f00) >> 8);
1051 	ca = pal4bit((a4r4g4b4 & 0xf000) >> 12);
1052 	a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1053 	return a8r8g8b8;
1054 }
1055 
convert_a1r5g5b5_a8r8g8b8(uint32_t a1r5g5b5)1056 inline uint32_t convert_a1r5g5b5_a8r8g8b8(uint32_t a1r5g5b5)
1057 {
1058 	uint32_t a8r8g8b8;
1059 	int ca, cr, cg, cb;
1060 
1061 	cb = pal5bit(a1r5g5b5 & 0x001f);
1062 	cg = pal5bit((a1r5g5b5 & 0x03e0) >> 5);
1063 	cr = pal5bit((a1r5g5b5 & 0x7c00) >> 10);
1064 	ca = a1r5g5b5 & 0x8000 ? 0xff : 0;
1065 	a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1066 	return a8r8g8b8;
1067 }
1068 
convert_r5g6b5_r8g8b8(uint32_t r5g6b5)1069 inline uint32_t convert_r5g6b5_r8g8b8(uint32_t r5g6b5)
1070 {
1071 	uint32_t r8g8b8;
1072 	int cr, cg, cb;
1073 
1074 	cb = pal5bit(r5g6b5 & 0x001f);
1075 	cg = pal6bit((r5g6b5 & 0x07e0) >> 5);
1076 	cr = pal5bit((r5g6b5 & 0xf800) >> 11);
1077 	r8g8b8 = (cr << 16) | (cg << 8) | (cb); // color converted to 8 bits per component
1078 	return r8g8b8;
1079 }
1080 
texture_get_texel(int number,int x,int y)1081 uint32_t nv2a_renderer::texture_get_texel(int number, int x, int y)
1082 {
1083 	uint32_t to, s, c, sa, ca;
1084 	uint32_t a4r4g4b4, a1r5g5b5, r5g6b5;
1085 	int bx, by;
1086 	int color0, color1, color0m2, color1m2, alpha0, alpha1;
1087 	uint32_t codes;
1088 	uint64_t alphas;
1089 	int cr, cg, cb;
1090 	int sizeu, sizev;
1091 
1092 	if (texture[number].rectangle == false) {
1093 		sizeu = texture[number].sizes;
1094 		sizev = texture[number].sizet;
1095 	}
1096 	else {
1097 		sizeu = texture[number].rectwidth;
1098 		sizev = texture[number].rectheight;
1099 	}
1100 	switch (texture[number].addrmodes) {
1101 	default:
1102 	case 1: // wrap
1103 		x = x % sizeu;
1104 		if (x < 0)
1105 			x = sizeu + x;
1106 		break;
1107 	case 3: // clamp
1108 		if (x < 0)
1109 			x = 0;
1110 		if (x >= sizeu)
1111 			x = sizeu - 1;
1112 		break;
1113 	}
1114 	switch (texture[number].addrmodet) {
1115 	default:
1116 	case 1: // wrap
1117 		y = y % sizev;
1118 		if (y < 0)
1119 			y = sizev + y;
1120 		break;
1121 	case 3: // clamp
1122 		if (y < 0)
1123 			y = 0;
1124 		if (y >= sizev)
1125 			y = sizev - 1;
1126 		break;
1127 	}
1128 	switch (texture[number].format) {
1129 	case NV2A_TEX_FORMAT::A8R8G8B8:
1130 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1131 		return *(((uint32_t *)texture[number].buffer) + to); // get texel color
1132 	case NV2A_TEX_FORMAT::X8R8G8B8:
1133 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1134 		return 0xff000000 | (*(((uint32_t*)texture[number].buffer) + to) & 0xffffff); // get texel color
1135 	case NV2A_TEX_FORMAT::DXT1:
1136 		bx = x >> 2;
1137 		by = y >> 2;
1138 		x = x & 3;
1139 		y = y & 3;
1140 		to = bx + by*(sizeu >> 2);
1141 		color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 0);
1142 		color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1143 		codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1144 		s = (y << 3) + (x << 1);
1145 		c = (codes >> s) & 3;
1146 		c = c + (color0 > color1 ? 0 : 4);
1147 		color0m2 = color0 << 1;
1148 		color1m2 = color1 << 1;
1149 		switch (c) {
1150 		case 0:
1151 			return 0xff000000 + convert_r5g6b5_r8g8b8(color0);
1152 		case 1:
1153 			return 0xff000000 + convert_r5g6b5_r8g8b8(color1);
1154 		case 2:
1155 			cb = pal5bit(((color0m2 & 0x003e) + (color1 & 0x001f)) / 3);
1156 			cg = pal6bit(((color0m2 & 0x0fc0) + (color1 & 0x07e0)) / 3 >> 5);
1157 			cr = pal5bit(((color0m2 & 0x1f000) + color1) / 3 >> 11);
1158 			return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1159 		case 3:
1160 			cb = pal5bit(((color1m2 & 0x003e) + (color0 & 0x001f)) / 3);
1161 			cg = pal6bit(((color1m2 & 0x0fc0) + (color0 & 0x07e0)) / 3 >> 5);
1162 			cr = pal5bit(((color1m2 & 0x1f000) + color0) / 3 >> 11);
1163 			return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1164 		case 4:
1165 			return 0xff000000 + convert_r5g6b5_r8g8b8(color0);
1166 		case 5:
1167 			return 0xff000000 + convert_r5g6b5_r8g8b8(color1);
1168 		case 6:
1169 			cb = pal5bit(((color0 & 0x001f) + (color1 & 0x001f)) / 2);
1170 			cg = pal6bit(((color0 & 0x07e0) + (color1 & 0x07e0)) / 2 >> 5);
1171 			cr = pal5bit(((color0 & 0xf800) + (color1 & 0xf800)) / 2 >> 11);
1172 			return 0xff000000 | (cr << 16) | (cg << 8) | (cb);
1173 		default:
1174 			return 0xff000000;
1175 		}
1176 	case NV2A_TEX_FORMAT::DXT3:
1177 		bx = x >> 2;
1178 		by = y >> 2;
1179 		x = x & 3;
1180 		y = y & 3;
1181 		to = (bx + by*(sizeu >> 2)) << 1;
1182 		color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 4);
1183 		color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 5);
1184 		codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 3);
1185 		alphas = *(((uint64_t *)texture[number].buffer) + to);
1186 		s = (y << 3) + (x << 1);
1187 		sa = ((y << 2) + x) << 2;
1188 		c = (codes >> s) & 3;
1189 		ca = (alphas >> sa) & 15;
1190 		switch (c) {
1191 		case 0:
1192 			return ((ca + (ca << 4)) << 24) + convert_r5g6b5_r8g8b8(color0);
1193 		case 1:
1194 			return ((ca + (ca << 4)) << 24) + convert_r5g6b5_r8g8b8(color1);
1195 		case 2:
1196 			cb = pal5bit((2 * (color0 & 0x001f) + (color1 & 0x001f)) / 3);
1197 			cg = pal6bit((2 * (color0 & 0x07e0) + (color1 & 0x07e0)) / 3 >> 5);
1198 			cr = pal5bit((2 * (color0 & 0xf800) + (color1 & 0xf800)) / 3 >> 11);
1199 			return ((ca + (ca << 4)) << 24) | (cr << 16) | (cg << 8) | (cb);
1200 		default:
1201 			cb = pal5bit(((color0 & 0x001f) + 2 * (color1 & 0x001f)) / 3);
1202 			cg = pal6bit(((color0 & 0x07e0) + 2 * (color1 & 0x07e0)) / 3 >> 5);
1203 			cr = pal5bit(((color0 & 0xf800) + 2 * (color1 & 0xf800)) / 3 >> 11);
1204 			return ((ca + (ca << 4)) << 24) | (cr << 16) | (cg << 8) | (cb);
1205 		}
1206 	case NV2A_TEX_FORMAT::A4R4G4B4:
1207 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1208 		a4r4g4b4 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1209 		return convert_a4r4g4b4_a8r8g8b8(a4r4g4b4);
1210 	case NV2A_TEX_FORMAT::A8:
1211 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1212 		c = *(((uint8_t*)texture[number].buffer) + to); // get texel color
1213 		return c << 24;
1214 	case NV2A_TEX_FORMAT::A1R5G5B5:
1215 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1216 		a1r5g5b5 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1217 		return convert_a1r5g5b5_a8r8g8b8(a1r5g5b5);
1218 	case NV2A_TEX_FORMAT::R5G6B5:
1219 		to = dilated0[texture[number].dilate][x] + dilated1[texture[number].dilate][y]; // offset of texel in texture memory
1220 		r5g6b5 = *(((uint16_t *)texture[number].buffer) + to); // get texel color
1221 		return 0xff000000 + convert_r5g6b5_r8g8b8(r5g6b5);
1222 	case NV2A_TEX_FORMAT::R8G8B8_RECT:
1223 		to = texture[number].rectangle_pitch*y + (x << 2);
1224 		return *((uint32_t *)(((uint8_t *)texture[number].buffer) + to));
1225 	case NV2A_TEX_FORMAT::A8R8G8B8_RECT:
1226 		to = texture[number].rectangle_pitch*y + (x << 2);
1227 		return *((uint32_t *)(((uint8_t *)texture[number].buffer) + to));
1228 	case NV2A_TEX_FORMAT::DXT5:
1229 		bx = x >> 2;
1230 		by = y >> 2;
1231 		x = x & 3;
1232 		y = y & 3;
1233 		to = (bx + by*(sizeu >> 2)) << 1;
1234 		color0 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 4);
1235 		color1 = *((uint16_t *)(((uint64_t *)texture[number].buffer) + to) + 5);
1236 		codes = *((uint32_t *)(((uint64_t *)texture[number].buffer) + to) + 3);
1237 		alpha0 = *((uint8_t *)(((uint64_t *)texture[number].buffer) + to) + 0);
1238 		alpha1 = *((uint8_t *)(((uint64_t *)texture[number].buffer) + to) + 1);
1239 		alphas = *(((uint64_t *)texture[number].buffer) + to);
1240 		s = (y << 3) + (x << 1);
1241 		sa = ((y << 2) + x) * 3;
1242 		c = (codes >> s) & 3;
1243 		ca = (alphas >> sa) & 7;
1244 		ca = ca + (alpha0 > alpha1 ? 0 : 8);
1245 		switch (ca) {
1246 		case 0:
1247 			ca = alpha0;
1248 			break;
1249 		case 1:
1250 			ca = alpha1;
1251 			break;
1252 		case 2:
1253 			ca = (6 * alpha0 + 1 * alpha1) / 7;
1254 			break;
1255 		case 3:
1256 			ca = (5 * alpha0 + 2 * alpha1) / 7;
1257 			break;
1258 		case 4:
1259 			ca = (4 * alpha0 + 3 * alpha1) / 7;
1260 			break;
1261 		case 5:
1262 			ca = (3 * alpha0 + 4 * alpha1) / 7;
1263 			break;
1264 		case 6:
1265 			ca = (2 * alpha0 + 5 * alpha1) / 7;
1266 			break;
1267 		case 7:
1268 			ca = (1 * alpha0 + 6 * alpha1) / 7;
1269 			break;
1270 		case 8:
1271 			ca = alpha0;
1272 			break;
1273 		case 9:
1274 			ca = alpha1;
1275 			break;
1276 		case 10:
1277 			ca = (4 * alpha0 + 1 * alpha1) / 5;
1278 			break;
1279 		case 11:
1280 			ca = (3 * alpha0 + 2 * alpha1) / 5;
1281 			break;
1282 		case 12:
1283 			ca = (2 * alpha0 + 3 * alpha1) / 5;
1284 			break;
1285 		case 13:
1286 			ca = (1 * alpha0 + 4 * alpha1) / 5;
1287 			break;
1288 		case 14:
1289 			ca = 0;
1290 			break;
1291 		case 15:
1292 			ca = 255;
1293 			break;
1294 		}
1295 		switch (c) {
1296 		case 0:
1297 			return (ca << 24) + convert_r5g6b5_r8g8b8(color0);
1298 		case 1:
1299 			return (ca << 24) + convert_r5g6b5_r8g8b8(color1);
1300 		case 2:
1301 			cb = pal5bit((2 * (color0 & 0x001f) + (color1 & 0x001f)) / 3);
1302 			cg = pal6bit((2 * (color0 & 0x07e0) + (color1 & 0x07e0)) / 3 >> 5);
1303 			cr = pal5bit((2 * (color0 & 0xf800) + (color1 & 0xf800)) / 3 >> 11);
1304 			return (ca << 24) | (cr << 16) | (cg << 8) | (cb);
1305 		default:
1306 			cb = pal5bit(((color0 & 0x001f) + 2 * (color1 & 0x001f)) / 3);
1307 			cg = pal6bit(((color0 & 0x07e0) + 2 * (color1 & 0x07e0)) / 3 >> 5);
1308 			cr = pal5bit(((color0 & 0xf800) + 2 * (color1 & 0xf800)) / 3 >> 11);
1309 			return (ca << 24) | (cr << 16) | (cg << 8) | (cb);
1310 		}
1311 	default:
1312 		return 0xff00ff00;
1313 	}
1314 }
1315 
read_pixel(int x,int y,int32_t c[4])1316 inline uint8_t *nv2a_renderer::read_pixel(int x, int y, int32_t c[4])
1317 {
1318 	uint32_t offset;
1319 	uint32_t color;
1320 	uint32_t *addr;
1321 	uint16_t *addr16;
1322 	uint8_t *addr8;
1323 
1324 	if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
1325 		offset = (dilated0[dilate_rendertarget][x] + dilated1[dilate_rendertarget][y]) * bytespixel_rendertarget;
1326 	else // type_rendertarget == LINEAR
1327 		offset = pitch_rendertarget * y + x * bytespixel_rendertarget;
1328 #ifdef DEBUG_CHECKS
1329 	if (offset >= size_rendertarget)
1330 	{
1331 		machine().logerror("Bad offset computed in read_pixel !\n");
1332 		offset = 0;
1333 	}
1334 #endif
1335 	switch (colorformat_rendertarget) {
1336 	case NV2A_COLOR_FORMAT::R5G6B5:
1337 		addr16 = (uint16_t *)((uint8_t *)rendertarget + offset);
1338 		color = *addr16;
1339 		c[3] = 0xff;
1340 		c[2] = pal5bit((color & 0xf800) >> 11);
1341 		c[1] = pal6bit((color & 0x07e0) >> 5);
1342 		c[0] = pal5bit(color & 0x1f);
1343 		return (uint8_t *)addr16;
1344 	case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
1345 	case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
1346 		addr = (uint32_t *)((uint8_t *)rendertarget + offset);
1347 		color = *addr;
1348 
1349 		c[3] = 0xff;
1350 		c[2] = (color >> 16) & 255;
1351 		c[1] = (color >> 8) & 255;
1352 		c[0] = color & 255;
1353 		return (uint8_t *)addr;
1354 	case NV2A_COLOR_FORMAT::A8R8G8B8:
1355 		addr = (uint32_t *)((uint8_t *)rendertarget + offset);
1356 		color = *addr;
1357 		c[3] = color >> 24;
1358 		c[2] = (color >> 16) & 255;
1359 		c[1] = (color >> 8) & 255;
1360 		c[0] = color & 255;
1361 		return (uint8_t *)addr;
1362 	case NV2A_COLOR_FORMAT::B8:
1363 		addr8 = (uint8_t *)rendertarget + offset;
1364 		c[0] = *addr8;
1365 		c[1] = c[2] = 0;
1366 		c[3] = 0xff;
1367 		return addr8;
1368 	default:
1369 		return nullptr;
1370 	}
1371 	return nullptr;
1372 }
1373 
write_pixel(int x,int y,uint32_t color,int z)1374 void nv2a_renderer::write_pixel(int x, int y, uint32_t color, int z)
1375 {
1376 	uint8_t *addr;
1377 	uint32_t *daddr32;
1378 	uint16_t *daddr16;
1379 	uint32_t depthandstencil;
1380 	int32_t c[4], fb[4], s[4], d[4], cc[4];
1381 	uint32_t depth, stencil, stenc, stenv;
1382 	uint32_t udepth;
1383 	bool stencil_passed;
1384 	bool depth_passed;
1385 
1386 	if ((z > 0xffffff) || (z < 0) || (x < 0))
1387 		return;
1388 	udepth = (uint32_t)z;
1389 	fb[3] = fb[2] = fb[1] = fb[0] = 0;
1390 	addr = nullptr;
1391 	if (color_mask != 0)
1392 		addr = read_pixel(x, y, fb);
1393 	if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1394 #ifdef DEBUG_CHECKS
1395 		if (((pitch_depthbuffer / 4) * y + x) >= size_depthbuffer)
1396 		{
1397 			machine().logerror("Bad depthbuffer offset computed in write_pixel !\n");
1398 			return;
1399 		}
1400 #endif
1401 		daddr32 = depthbuffer + (pitch_depthbuffer / 4) * y + x;
1402 		depthandstencil = *daddr32;
1403 		depth = depthandstencil >> 8;
1404 		stencil = depthandstencil & 255;
1405 		daddr16 = nullptr;
1406 	}
1407 	else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1408 #ifdef DEBUG_CHECKS
1409 		if (((pitch_depthbuffer / 2) * y + x) >= size_depthbuffer)
1410 		{
1411 			machine().logerror("Bad depthbuffer offset computed in write_pixel !\n");
1412 			return;
1413 		}
1414 #endif
1415 		daddr16 = (uint16_t *)depthbuffer + (pitch_depthbuffer / 2) * y + x;
1416 		depthandstencil = *daddr16;
1417 		depth = (depthandstencil << 8) | 0xff;
1418 		stencil = 0;
1419 		daddr32 = nullptr;
1420 	}
1421 	else {
1422 		daddr32 = nullptr;
1423 		daddr16 = nullptr;
1424 		depth = 0xffffff;
1425 		stencil = 0;
1426 	}
1427 	c[3] = color >> 24;
1428 	c[2] = (color >> 16) & 255;
1429 	c[1] = (color >> 8) & 255;
1430 	c[0] = color & 255;
1431 	cc[3] = blend_color >> 24;
1432 	cc[2] = (blend_color >> 16) & 255;
1433 	cc[1] = (blend_color >> 8) & 255;
1434 	cc[0] = blend_color & 255;
1435 	// ownership test and scissor test not done
1436 	// alpha test
1437 	if (alpha_test_enabled) {
1438 		switch (alpha_func) {
1439 			case NV2A_COMPARISON_OP::NEVER:
1440 				return;
1441 			case NV2A_COMPARISON_OP::ALWAYS:
1442 			default:
1443 				break;
1444 			case NV2A_COMPARISON_OP::LESS:
1445 				if (c[3] >= alpha_reference)
1446 					return;
1447 				break;
1448 			case NV2A_COMPARISON_OP::LEQUAL:
1449 				if (c[3] > alpha_reference)
1450 					return;
1451 				break;
1452 			case NV2A_COMPARISON_OP::EQUAL:
1453 				if (c[3] != alpha_reference)
1454 					return;
1455 				break;
1456 			case NV2A_COMPARISON_OP::GEQUAL:
1457 				if (c[3] < alpha_reference)
1458 					return;
1459 				break;
1460 			case NV2A_COMPARISON_OP::GREATER:
1461 				if (c[3] <= alpha_reference)
1462 					return;
1463 				break;
1464 			case NV2A_COMPARISON_OP::NOTEQUAL:
1465 				if (c[3] == alpha_reference)
1466 					return;
1467 				break;
1468 		}
1469 	}
1470 	// stencil test
1471 	stencil_passed = true;
1472 	if (stencil_test_enabled) {
1473 		stenc=stencil_mask & stencil_ref;
1474 		stenv=stencil_mask & stencil;
1475 		switch (stencil_func) {
1476 		case NV2A_COMPARISON_OP::NEVER:
1477 			stencil_passed = false;
1478 			break;
1479 		case NV2A_COMPARISON_OP::LESS:
1480 			if (stenc >= stenv)
1481 				stencil_passed = false;
1482 			break;
1483 		case NV2A_COMPARISON_OP::EQUAL:
1484 			if (stenc != stenv)
1485 				stencil_passed = false;
1486 			break;
1487 		case NV2A_COMPARISON_OP::LEQUAL:
1488 			if (stenc > stenv)
1489 				stencil_passed = false;
1490 			break;
1491 		case NV2A_COMPARISON_OP::GREATER:
1492 			if (stenc <= stenv)
1493 				stencil_passed = false;
1494 			break;
1495 		case NV2A_COMPARISON_OP::NOTEQUAL:
1496 			if (stenc == stenv)
1497 				stencil_passed = false;
1498 			break;
1499 		case NV2A_COMPARISON_OP::GEQUAL:
1500 			if (stenc < stenv)
1501 				stencil_passed = false;
1502 			break;
1503 		case NV2A_COMPARISON_OP::ALWAYS:
1504 		default:
1505 			break;
1506 		}
1507 		if (stencil_passed == false) {
1508 			switch (stencil_op_fail) {
1509 			case NV2A_STENCIL_OP::ZEROOP:
1510 				stencil = 0;
1511 				break;
1512 			case NV2A_STENCIL_OP::INVERTOP:
1513 				stencil = stencil ^ 255;
1514 				break;
1515 			case NV2A_STENCIL_OP::KEEP:
1516 			default:
1517 				break;
1518 			case NV2A_STENCIL_OP::REPLACE:
1519 				stencil = stencil_ref;
1520 				break;
1521 			case NV2A_STENCIL_OP::INCR:
1522 				if (stencil < 255)
1523 					stencil++;
1524 				break;
1525 			case NV2A_STENCIL_OP::DECR:
1526 				if (stencil > 0)
1527 					stencil--;
1528 				break;
1529 			case NV2A_STENCIL_OP::INCR_WRAP:
1530 				if (stencil < 255)
1531 					stencil++;
1532 				else
1533 					stencil = 0;
1534 				break;
1535 			case NV2A_STENCIL_OP::DECR_WRAP:
1536 				if (stencil > 0)
1537 					stencil--;
1538 				else
1539 					stencil = 255;
1540 				break;
1541 			}
1542 			if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1543 				depthandstencil = (depth << 8) | stencil;
1544 				*daddr32 = depthandstencil;
1545 			}
1546 			else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1547 				depthandstencil = depth >> 8;
1548 				*daddr16 = (uint16_t)depthandstencil;
1549 			}
1550 			return;
1551 		}
1552 	}
1553 	// depth buffer test
1554 	depth_passed = true;
1555 	if (depth_test_enabled) {
1556 		switch (depth_function) {
1557 			case NV2A_COMPARISON_OP::NEVER:
1558 				depth_passed = false;
1559 				break;
1560 			case NV2A_COMPARISON_OP::LESS:
1561 				if (udepth >= depth)
1562 					depth_passed = false;
1563 				break;
1564 			case NV2A_COMPARISON_OP::EQUAL:
1565 				if (udepth != depth)
1566 					depth_passed = false;
1567 				break;
1568 			case NV2A_COMPARISON_OP::LEQUAL:
1569 				if (udepth > depth)
1570 					depth_passed = false;
1571 				break;
1572 			case NV2A_COMPARISON_OP::GREATER:
1573 				if (udepth <= depth)
1574 					depth_passed = false;
1575 				break;
1576 			case NV2A_COMPARISON_OP::NOTEQUAL:
1577 				if (udepth == depth)
1578 					depth_passed = false;
1579 				break;
1580 			case NV2A_COMPARISON_OP::GEQUAL:
1581 				if (udepth < depth)
1582 					depth_passed = false;
1583 				break;
1584 			case NV2A_COMPARISON_OP::ALWAYS:
1585 			default:
1586 				break;
1587 		}
1588 		if (depth_passed == false) {
1589 			switch (stencil_op_zfail) {
1590 			case NV2A_STENCIL_OP::ZEROOP:
1591 				stencil = 0;
1592 				break;
1593 			case NV2A_STENCIL_OP::INVERTOP:
1594 				stencil = stencil ^ 255;
1595 				break;
1596 			case NV2A_STENCIL_OP::KEEP:
1597 			default:
1598 				break;
1599 			case NV2A_STENCIL_OP::REPLACE:
1600 				stencil = stencil_ref;
1601 				break;
1602 			case NV2A_STENCIL_OP::INCR:
1603 				if (stencil < 255)
1604 					stencil++;
1605 				break;
1606 			case NV2A_STENCIL_OP::DECR:
1607 				if (stencil > 0)
1608 					stencil--;
1609 				break;
1610 			case NV2A_STENCIL_OP::INCR_WRAP:
1611 				if (stencil < 255)
1612 					stencil++;
1613 				else
1614 					stencil = 0;
1615 				break;
1616 			case NV2A_STENCIL_OP::DECR_WRAP:
1617 				if (stencil > 0)
1618 					stencil--;
1619 				else
1620 					stencil = 255;
1621 				break;
1622 			}
1623 			if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1624 				depthandstencil = (depth << 8) | stencil;
1625 				*daddr32 = depthandstencil;
1626 			}
1627 			else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1628 				depthandstencil = depth >> 8;
1629 				*daddr16 = (uint16_t)depthandstencil;
1630 			}
1631 			return;
1632 		}
1633 		switch (stencil_op_zpass) {
1634 		case NV2A_STENCIL_OP::ZEROOP:
1635 			stencil = 0;
1636 			break;
1637 		case NV2A_STENCIL_OP::INVERTOP:
1638 			stencil = stencil ^ 255;
1639 			break;
1640 		case NV2A_STENCIL_OP::KEEP:
1641 		default:
1642 			break;
1643 		case NV2A_STENCIL_OP::REPLACE:
1644 			stencil = stencil_ref;
1645 			break;
1646 		case NV2A_STENCIL_OP::INCR:
1647 			if (stencil < 255)
1648 				stencil++;
1649 			break;
1650 		case NV2A_STENCIL_OP::DECR:
1651 			if (stencil > 0)
1652 				stencil--;
1653 			break;
1654 		case NV2A_STENCIL_OP::INCR_WRAP:
1655 			if (stencil < 255)
1656 				stencil++;
1657 			else
1658 				stencil = 0;
1659 			break;
1660 		case NV2A_STENCIL_OP::DECR_WRAP:
1661 			if (stencil > 0)
1662 				stencil--;
1663 			else
1664 				stencil = 255;
1665 			break;
1666 		}
1667 	}
1668 	// blending
1669 	if (blending_enabled) {
1670 		switch (blend_function_source) {
1671 			case NV2A_BLEND_FACTOR::ZERO:
1672 				s[3] = s[2] = s[1] = s[0] = 0;
1673 				break;
1674 			case NV2A_BLEND_FACTOR::ONE:
1675 			default:
1676 				s[3] = s[2] = s[1] = s[0] = 255;
1677 				break;
1678 			case NV2A_BLEND_FACTOR::DST_COLOR:
1679 				s[3] = fb[3];
1680 				s[2] = fb[2];
1681 				s[1] = fb[1];
1682 				s[0] = fb[0];
1683 				break;
1684 			case NV2A_BLEND_FACTOR::ONE_MINUS_DST_COLOR:
1685 				s[3] = fb[3] ^ 255;
1686 				s[2] = fb[2] ^ 255;
1687 				s[1] = fb[1] ^ 255;
1688 				s[0] = fb[0] ^ 255;
1689 				break;
1690 			case NV2A_BLEND_FACTOR::SRC_ALPHA:
1691 				s[3] = s[2] = s[1] = s[0] = c[3];
1692 				break;
1693 			case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_ALPHA:
1694 				s[3] = s[2] = s[1] = s[0] = c[3] ^ 255;
1695 				break;
1696 			case NV2A_BLEND_FACTOR::DST_ALPHA:
1697 				s[3] = s[2] = s[1] = s[0] = fb[3];
1698 				break;
1699 			case NV2A_BLEND_FACTOR::ONE_MINUS_DST_ALPHA:
1700 				s[3] = s[2] = s[1] = s[0] = fb[3] ^ 255;
1701 				break;
1702 			case NV2A_BLEND_FACTOR::CONSTANT_COLOR:
1703 				s[3] = cc[3];
1704 				s[2] = cc[2];
1705 				s[1] = cc[1];
1706 				s[0] = cc[0];
1707 				break;
1708 			case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_COLOR:
1709 				s[3] = cc[3] ^ 255;
1710 				s[2] = cc[2] ^ 255;
1711 				s[1] = cc[1] ^ 255;
1712 				s[0] = cc[0] ^ 255;
1713 				break;
1714 			case NV2A_BLEND_FACTOR::CONSTANT_ALPHA:
1715 				s[3] = s[2] = s[1] = s[0] = cc[3];
1716 				break;
1717 			case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_ALPHA:
1718 				s[3] = s[2] = s[1] = s[0] = cc[3] ^ 255;
1719 				break;
1720 			case NV2A_BLEND_FACTOR::SRC_ALPHA_SATURATE:
1721 				s[3] = 255;
1722 				if (c[3] < (fb[3] ^ 255))
1723 					s[2] = c[3];
1724 				else
1725 					s[2] = fb[3];
1726 				s[1] = s[0] = s[2];
1727 				break;
1728 		}
1729 		switch (blend_function_destination) {
1730 			case NV2A_BLEND_FACTOR::ZERO:
1731 			default:
1732 				d[3] = d[2] = d[1] = d[0] = 0;
1733 				break;
1734 			case NV2A_BLEND_FACTOR::ONE:
1735 				d[3] = d[2] = d[1] = d[0] = 255;
1736 				break;
1737 			case NV2A_BLEND_FACTOR::SRC_COLOR:
1738 				d[3] = c[3];
1739 				d[2] = c[2];
1740 				d[1] = c[1];
1741 				d[0] = c[0];
1742 				break;
1743 			case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_COLOR:
1744 				d[3] = c[3] ^ 255;
1745 				d[2] = c[2] ^ 255;
1746 				d[1] = c[1] ^ 255;
1747 				d[0] = c[0] ^ 255;
1748 				break;
1749 			case NV2A_BLEND_FACTOR::SRC_ALPHA:
1750 				d[3] = d[2] = d[1] = d[0] = c[3];
1751 				break;
1752 			case NV2A_BLEND_FACTOR::ONE_MINUS_SRC_ALPHA:
1753 				d[3] = d[2] = d[1] = d[0] = c[3] ^ 255;
1754 				break;
1755 			case NV2A_BLEND_FACTOR::DST_ALPHA:
1756 				d[3] = d[2] = d[1] = d[0] = fb[3];
1757 				break;
1758 			case NV2A_BLEND_FACTOR::ONE_MINUS_DST_ALPHA:
1759 				d[3] = d[2] = d[1] = d[0] = fb[3] ^ 255;
1760 				break;
1761 			case NV2A_BLEND_FACTOR::CONSTANT_COLOR:
1762 				d[3] = cc[3];
1763 				d[2] = cc[2];
1764 				d[1] = cc[1];
1765 				d[0] = cc[0];
1766 				break;
1767 			case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_COLOR:
1768 				d[3] = cc[3] ^ 255;
1769 				d[2] = cc[2] ^ 255;
1770 				d[1] = cc[1] ^ 255;
1771 				d[0] = cc[0] ^ 255;
1772 				break;
1773 			case NV2A_BLEND_FACTOR::CONSTANT_ALPHA:
1774 				d[3] = d[2] = d[1] = d[0] = cc[3];
1775 				break;
1776 			case NV2A_BLEND_FACTOR::ONE_MINUS_CONSTANT_ALPHA:
1777 				d[3] = d[2] = d[1] = d[0] = cc[3] ^ 255;
1778 				break;
1779 		}
1780 		switch (blend_equation) {
1781 			case NV2A_BLEND_EQUATION::FUNC_ADD:
1782 				c[3] = (c[3] * s[3] + fb[3] * d[3]) / 255;
1783 				if (c[3] > 255)
1784 					c[3] = 255;
1785 				c[2] = (c[2] * s[2] + fb[2] * d[2]) / 255;
1786 				if (c[2] > 255)
1787 					c[2] = 255;
1788 				c[1] = (c[1] * s[1] + fb[1] * d[1]) / 255;
1789 				if (c[1] > 255)
1790 					c[1] = 255;
1791 				c[0] = (c[0] * s[0] + fb[0] * d[0]) / 255;
1792 				if (c[0] > 255)
1793 					c[0] = 255;
1794 				break;
1795 			case NV2A_BLEND_EQUATION::FUNC_SUBTRACT:
1796 				c[3] = (c[3] * s[3] - fb[3] * d[3]) / 255;
1797 				if (c[3] < 0)
1798 					c[3] = 255;
1799 				c[2] = (c[2] * s[2] - fb[2] * d[2]) / 255;
1800 				if (c[2] < 0)
1801 					c[2] = 255;
1802 				c[1] = (c[1] * s[1] - fb[1] * d[1]) / 255;
1803 				if (c[1] < 0)
1804 					c[1] = 255;
1805 				c[0] = (c[0] * s[0] - fb[0] * d[0]) / 255;
1806 				if (c[0] < 0)
1807 					c[0] = 255;
1808 				break;
1809 			case NV2A_BLEND_EQUATION::FUNC_REVERSE_SUBTRACT:
1810 				c[3] = (fb[3] * d[3] - c[3] * s[3]) / 255;
1811 				if (c[3] < 0)
1812 					c[3] = 255;
1813 				c[2] = (fb[2] * d[2] - c[2] * s[2]) / 255;
1814 				if (c[2] < 0)
1815 					c[2] = 255;
1816 				c[1] = (fb[1] * d[1] - c[1] * s[1]) / 255;
1817 				if (c[1] < 0)
1818 					c[1] = 255;
1819 				c[0] = (fb[0] * d[0] - c[0] * s[0]) / 255;
1820 				if (c[0] < 0)
1821 					c[0] = 255;
1822 				break;
1823 			case NV2A_BLEND_EQUATION::MIN:
1824 				c[3] = s[3];
1825 				if (d[3] < c[3])
1826 					c[3] = d[3];
1827 				c[2] = s[2];
1828 				if (d[2] < c[2])
1829 					c[2] = d[2];
1830 				c[1] = s[1];
1831 				if (d[1] < c[1])
1832 					c[1] = d[1];
1833 				c[0] = s[0];
1834 				if (d[0] < c[0])
1835 					c[0] = d[0];
1836 				break;
1837 			case NV2A_BLEND_EQUATION::MAX:
1838 				c[3] = s[3];
1839 				if (d[3] > c[3])
1840 					c[3] = d[3];
1841 				c[2] = s[2];
1842 				if (d[2] > c[2])
1843 					c[2] = d[2];
1844 				c[1] = s[1];
1845 				if (d[1] > c[1])
1846 					c[1] = d[1];
1847 				c[0] = s[0];
1848 				if (d[0] > c[0])
1849 					c[0] = d[0];
1850 				break;
1851 		}
1852 	}
1853 	// dithering not done
1854 	// logical operation
1855 	if (logical_operation_enabled) {
1856 		switch (logical_operation) {
1857 			case  NV2A_LOGIC_OP::CLEAR:
1858 				c[3] = 0;
1859 				c[2] = 0;
1860 				c[1] = 0;
1861 				c[0] = 0;
1862 				break;
1863 			case  NV2A_LOGIC_OP::AND:
1864 				c[3] = c[3] & fb[3];
1865 				c[2] = c[2] & fb[2];
1866 				c[1] = c[1] & fb[1];
1867 				c[0] = c[0] & fb[0];
1868 				break;
1869 			case  NV2A_LOGIC_OP::AND_REVERSE:
1870 				c[3] = c[3] & (fb[3] ^ 255);
1871 				c[2] = c[2] & (fb[2] ^ 255);
1872 				c[1] = c[1] & (fb[1] ^ 255);
1873 				c[0] = c[0] & (fb[0] ^ 255);
1874 				break;
1875 			case  NV2A_LOGIC_OP::COPY:
1876 			default:
1877 				break;
1878 			case  NV2A_LOGIC_OP::AND_INVERTED:
1879 				c[3] = (c[3] ^ 255) & fb[3];
1880 				c[2] = (c[2] ^ 255) & fb[2];
1881 				c[1] = (c[1] ^ 255) & fb[1];
1882 				c[0] = (c[0] ^ 255) & fb[0];
1883 				break;
1884 			case  NV2A_LOGIC_OP::NOOP:
1885 				c[3] = fb[3];
1886 				c[2] = fb[2];
1887 				c[1] = fb[1];
1888 				c[0] = fb[0];
1889 				break;
1890 			case  NV2A_LOGIC_OP::XOR:
1891 				c[3] = c[3] ^ fb[3];
1892 				c[2] = c[2] ^ fb[2];
1893 				c[1] = c[1] ^ fb[1];
1894 				c[0] = c[0] ^ fb[0];
1895 				break;
1896 			case  NV2A_LOGIC_OP::OR:
1897 				c[3] = c[3] | fb[3];
1898 				c[2] = c[2] | fb[2];
1899 				c[1] = c[1] | fb[1];
1900 				c[0] = c[0] | fb[0];
1901 				break;
1902 			case  NV2A_LOGIC_OP::NOR:
1903 				c[3] = (c[3] | fb[3]) ^ 255;
1904 				c[2] = (c[2] | fb[2]) ^ 255;
1905 				c[1] = (c[1] | fb[1]) ^ 255;
1906 				c[0] = (c[0] | fb[0]) ^ 255;
1907 				break;
1908 			case  NV2A_LOGIC_OP::EQUIV:
1909 				c[3] = (c[3] ^ fb[3]) ^ 255;
1910 				c[2] = (c[2] ^ fb[2]) ^ 255;
1911 				c[1] = (c[1] ^ fb[1]) ^ 255;
1912 				c[0] = (c[0] ^ fb[0]) ^ 255;
1913 				break;
1914 			case  NV2A_LOGIC_OP::INVERT:
1915 				c[3] = fb[3] ^ 255;
1916 				c[2] = fb[2] ^ 255;
1917 				c[1] = fb[1] ^ 255;
1918 				c[0] = fb[0] ^ 255;
1919 				break;
1920 			case  NV2A_LOGIC_OP::OR_REVERSE:
1921 				c[3] = c[3] | (fb[3] ^ 255);
1922 				c[2] = c[2] | (fb[2] ^ 255);
1923 				c[1] = c[1] | (fb[1] ^ 255);
1924 				c[0] = c[0] | (fb[0] ^ 255);
1925 				break;
1926 			case  NV2A_LOGIC_OP::COPY_INVERTED:
1927 				c[3] = c[3] ^ 255;
1928 				c[2] = c[2] ^ 255;
1929 				c[1] = c[1] ^ 255;
1930 				c[0] = c[0] ^ 255;
1931 				break;
1932 			case  NV2A_LOGIC_OP::OR_INVERTED:
1933 				c[3] = (c[3] ^ 255) | fb[3];
1934 				c[2] = (c[2] ^ 255) | fb[2];
1935 				c[1] = (c[1] ^ 255) | fb[1];
1936 				c[0] = (c[0] ^ 255) | fb[0];
1937 				break;
1938 			case  NV2A_LOGIC_OP::NAND:
1939 				c[3] = (c[3] & fb[3]) ^ 255;
1940 				c[2] = (c[2] & fb[2]) ^ 255;
1941 				c[1] = (c[1] & fb[1]) ^ 255;
1942 				c[0] = (c[0] & fb[0]) ^ 255;
1943 				break;
1944 			case  NV2A_LOGIC_OP::SET:
1945 				c[3] = 255;
1946 				c[2] = 255;
1947 				c[1] = 255;
1948 				c[0] = 255;
1949 				break;
1950 		}
1951 	}
1952 	if (color_mask != 0) {
1953 		uint32_t ct,ft,w;
1954 
1955 		ct = ((uint32_t)c[3] << 24) | ((uint32_t)c[2] << 16) | ((uint32_t)c[1] << 8) | (uint32_t)c[0];
1956 		ft = ((uint32_t)fb[3] << 24) | ((uint32_t)fb[2] << 16) | ((uint32_t)fb[1] << 8) | (uint32_t)fb[0];
1957 		w = (ft & ~color_mask) | (ct & color_mask);
1958 
1959 /* for debugging
1960         if (w == 0x94737d7b)
1961             x++;
1962 */
1963 		switch (colorformat_rendertarget) {
1964 		case NV2A_COLOR_FORMAT::R5G6B5:
1965 			w = ((w >> 8) & 0xf800) + ((w >> 5) & 0x7e0) + ((w >> 3) & 0x1f);
1966 			*((uint16_t *)addr) = (uint16_t)w;
1967 			break;
1968 		case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
1969 		case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
1970 			*((uint32_t *)addr) = w;
1971 			break;
1972 		case NV2A_COLOR_FORMAT::A8R8G8B8:
1973 			*((uint32_t *)addr) = w;
1974 			break;
1975 		case NV2A_COLOR_FORMAT::B8:
1976 			*addr = (uint8_t)w;
1977 			break;
1978 		default:
1979 			return;
1980 		}
1981 	}
1982 	if (depth_write_enabled)
1983 		depth = udepth;
1984 	if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z24S8) {
1985 		depthandstencil = (depth << 8) | stencil;
1986 		*daddr32 = depthandstencil;
1987 	}
1988 	else if (depthformat_rendertarget == NV2A_RT_DEPTH_FORMAT::Z16) {
1989 		depthandstencil = depth >> 8;
1990 		*daddr16 = (uint16_t)depthandstencil;
1991 	}
1992 }
1993 
render_color(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)1994 void nv2a_renderer::render_color(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
1995 {
1996 	int x, lx;
1997 
1998 	lx = limits_rendertarget.right();
1999 	if ((extent.startx < 0) && (extent.stopx <= 0))
2000 		return;
2001 	if ((extent.startx > lx) && (extent.stopx > lx))
2002 		return;
2003 	x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2004 	if (extent.stopx > lx)
2005 		x = x - (extent.stopx - lx - 1);
2006 	x--;
2007 	while (x >= 0) {
2008 		double zf;
2009 		uint32_t a8r8g8b8;
2010 		int z;
2011 		int ca, cr, cg, cb;
2012 		int xp = extent.startx + x; // x coordinate of current pixel
2013 
2014 		z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2015 		zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2016 		zf = 1.0f / zf;
2017 		cb = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].dpdx)) * zf * 255.0f;
2018 		cg = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].dpdx)) * zf * 255.0f;
2019 		cr = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].dpdx)) * zf * 255.0f;
2020 		ca = ((extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].dpdx)) * zf * 255.0f;
2021 		if (cb > 255)
2022 			cb = 255;
2023 		if (cb < 0)
2024 			cb = 0;
2025 		if (cg > 255)
2026 			cg = 255;
2027 		if (cg < 0)
2028 			cg = 0;
2029 		if (cr > 255)
2030 			cr = 255;
2031 		if (cr < 0)
2032 			cr = 0;
2033 		if (ca > 255)
2034 			ca = 255;
2035 		if (ca < 0)
2036 			ca = 0;
2037 		a8r8g8b8 = (ca << 24) | (cr << 16) | (cg << 8) | cb; // pixel color obtained by interpolating the colors of the vertices
2038 		write_pixel(xp, scanline, a8r8g8b8, z);
2039 		x--;
2040 	}
2041 }
2042 
render_texture_simple(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)2043 void nv2a_renderer::render_texture_simple(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
2044 {
2045 	int x, lx;
2046 	uint32_t a8r8g8b8;
2047 
2048 	if (!objectdata.data->texture[0].enabled) {
2049 		return;
2050 	}
2051 	lx = limits_rendertarget.right();
2052 	if ((extent.startx < 0) && (extent.stopx <= 0))
2053 		return;
2054 	if ((extent.startx > lx) && (extent.stopx > lx))
2055 		return;
2056 	x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2057 	if (extent.stopx > lx)
2058 		x = x - (extent.stopx - lx - 1);
2059 	x--;
2060 	while (x >= 0) {
2061 		float zf;
2062 		double spf, tpf;
2063 		//double rpf, qpf; // disabled to remove "set but not used" warning
2064 		int sp, tp;
2065 		int z;
2066 		int xp = extent.startx + x; // x coordinate of current pixel
2067 
2068 		z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2069 		zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2070 		zf = 1.0f / zf;
2071 		spf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S].dpdx) * zf;
2072 		tpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T].dpdx) * zf;
2073 		//rpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R].dpdx) * zf;
2074 		//qpf = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q].dpdx) * zf;
2075 		if (objectdata.data->texture[0].rectangle == false) {
2076 			sp = spf * (double)(objectdata.data->texture[0].sizes - 1); // x coordinate of texel in texture
2077 			tp = tpf * (double)(objectdata.data->texture[0].sizet - 1); // y coordinate of texel in texture
2078 		}
2079 		else {
2080 			sp = spf;
2081 			tp = tpf;
2082 		}
2083 		a8r8g8b8 = texture_get_texel(0, sp, tp);
2084 		write_pixel(xp, scanline, a8r8g8b8, z);
2085 		x--;
2086 	}
2087 }
2088 
render_register_combiners(int32_t scanline,const nv2a_rasterizer::extent_t & extent,const nvidia_object_data & objectdata,int threadid)2089 void nv2a_renderer::render_register_combiners(int32_t scanline, const nv2a_rasterizer::extent_t &extent, const nvidia_object_data &objectdata, int threadid)
2090 {
2091 	int x, lx, xp;
2092 	int tc[4];
2093 	float colorf[7][4];
2094 	uint32_t color[6];
2095 	uint32_t a8r8g8b8;
2096 	int z;
2097 	int n;
2098 
2099 	color[0] = color[1] = color[2] = color[3] = color[4] = color[5] = 0;
2100 
2101 	lx = limits_rendertarget.right();
2102 	if ((extent.startx < 0) && (extent.stopx <= 0))
2103 		return;
2104 	if ((extent.startx > lx) && (extent.stopx > lx))
2105 		return;
2106 	x = extent.stopx - extent.startx; // number of pixels to draw (start inclusive, end exclusive)
2107 	if (extent.stopx > lx)
2108 		x = x - (extent.stopx - lx - 1);
2109 	x--;
2110 	while (x >= 0) {
2111 		float zf;
2112 
2113 		xp = extent.startx + x;
2114 		// 1: fetch data
2115 		// 1.1: interpolated color from vertices
2116 		z = (extent.param[(int)VERTEX_PARAMETER::PARAM_Z].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_Z].dpdx);
2117 		zf = (extent.param[(int)VERTEX_PARAMETER::PARAM_1W].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_1W].dpdx);
2118 		zf = 1.0f / zf;
2119 		colorf[0][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_R].dpdx) * zf;
2120 		colorf[0][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_G].dpdx) * zf;
2121 		colorf[0][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_B].dpdx) * zf;
2122 		colorf[0][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_COLOR_A].dpdx) * zf;
2123 		colorf[1][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R].dpdx) * zf;
2124 		colorf[1][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G].dpdx) * zf;
2125 		colorf[1][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B].dpdx) * zf;
2126 		colorf[1][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A].dpdx) * zf;
2127 		// 1.2: coordinates for each of the 4 possible textures
2128 		for (n = 0; n < 4; n++) {
2129 			colorf[n + 2][0] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + n * 4].dpdx) * zf;
2130 			colorf[n + 2][1] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + n * 4].dpdx) * zf;
2131 			colorf[n + 2][2] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + n * 4].dpdx) * zf;
2132 			colorf[n + 2][3] = (extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + n * 4].start + (double)x * extent.param[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + n * 4].dpdx) * zf;
2133 		}
2134 		// 1.3: fog
2135 		combiner_argb8_float(fog_color, colorf[6]);
2136 		colorf[6][3] = 1.0f; // should it be from the ofog output of the vertex shader ?
2137 		// 1.4: colors from textures
2138 		for (n = 0; n < 4; n++) {
2139 			if (texture[n].mode == 1) {
2140 				if (texture[n].rectangle == false) {
2141 					tc[0] = colorf[n + 2][0] * (float)(objectdata.data->texture[n].sizes - 1);
2142 					tc[1] = colorf[n + 2][1] * (float)(objectdata.data->texture[n].sizet - 1);
2143 				}
2144 				else {
2145 					tc[0] = colorf[n + 2][0];
2146 					tc[1] = colorf[n + 2][1];
2147 				}
2148 				a8r8g8b8 = texture_get_texel(n, tc[0], tc[1]);
2149 				combiner_argb8_float(a8r8g8b8, colorf[n + 2]);
2150 			}
2151 			else if (texture[n].mode == 4)
2152 				; // nothing
2153 			else
2154 				combiner_argb8_float(0xff000000, colorf[n + 2]);
2155 		}
2156 		// 2: compute
2157 		// 2.1: initialize
2158 		combiner_initialize_registers(threadid, colorf);
2159 		// 2.2: general cmbiner stages
2160 		for (n = 0; n < combiner.setup.stages; n++) {
2161 			// 2.2.1 initialize
2162 			combiner_initialize_stage(threadid, n);
2163 			// 2.2.2 map inputs
2164 			combiner_map_stage_input(threadid, n);
2165 			// 2.2.3 compute possible outputs
2166 			combiner_compute_rgb_outputs(threadid, n);
2167 			combiner_compute_alpha_outputs(threadid, n);
2168 			// 2.2.4 map outputs to registers
2169 			combiner_map_stage_output(threadid, n);
2170 		}
2171 		// 2.3: final cmbiner stage
2172 		combiner_initialize_final(threadid);
2173 		combiner_map_final_input(threadid);
2174 		combiner_final_output(threadid);
2175 		a8r8g8b8 = combiner_float_argb8(combiner.work[threadid].output);
2176 		// 3: write pixel
2177 		write_pixel(xp, scanline, a8r8g8b8, z);
2178 		x--;
2179 	}
2180 }
2181 
2182 #if 0
2183 const char *rc_mapping_str[] = {
2184 	"UNSIGNED_IDENTITY",
2185 	"UNSIGNED_INVERT",
2186 	"EXPAND_NORMAL",
2187 	"EXPAND_NEGATE",
2188 	"HALF_BIAS_NORMAL",
2189 	"HALF_BIAS_NEGATE",
2190 	"SIGNED_IDENTITY",
2191 	"SIGNED_NEGATE"
2192 };
2193 
2194 const char *rc_usage_rgb_str[] = {
2195 	"RGB",
2196 	"ALPHA"
2197 };
2198 
2199 const char *rc_usage_alpha_str[] = {
2200 	"BLUE",
2201 	"ALPHA"
2202 };
2203 
2204 const char *rc_variable_str[] = {
2205 	"ZERO",
2206 	"CONSTANT_COLOR0",
2207 	"CONSTANT_COLOR1",
2208 	"FOG",
2209 	"PRIMARY_COLOR",
2210 	"SECONDARY_COLOR",
2211 	"???",
2212 	"???",
2213 	"TEXTURE0",
2214 	"TEXTURE1",
2215 	"TEXTURE2",
2216 	"TEXTURE3",
2217 	"SPARE0",
2218 	"SPARE1",
2219 	"SPARE0_PLUS_SECONDARY_COLOR",
2220 	"E_TIMES_F"
2221 };
2222 
2223 const char *rc_bias_str[] = {
2224 	"NONE",
2225 	"BIAS_BY_NEGATIVE_ONE_HALF"
2226 };
2227 
2228 const char *rc_scale_str[] = {
2229 	"NONE",
2230 	"SCALE_BY_TWO",
2231 	"SCALE_BY_FOUR",
2232 	"SCALE_BY_ONE_HALF"
2233 };
2234 
2235 /* Dump the current setup of the register combiners */
2236 void dumpcombiners(uint32_t *m)
2237 {
2238 	int a, b, n, v;
2239 
2240 	n = m[0x1e60 / 4] & 0xf;
2241 	printf("Combiners active: %d\n\r", n);
2242 	for (a = 0; a < n; a++) {
2243 		printf("Combiner %d\n\r", a + 1);
2244 		printf(" RC_IN_ALPHA %08X\n\r", m[0x0260 / 4 + a]);
2245 		for (b = 24; b >= 0; b = b - 8) {
2246 			v = (m[0x0260 / 4 + a] >> b) & 0xf;
2247 			printf("  %c_INPUT %s\n\r", 'A' + 3 - b / 8, rc_variable_str[v]);
2248 			v = (m[0x0260 / 4 + a] >> (b + 4)) & 1;
2249 			printf("  %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - b / 8, rc_usage_alpha_str[v]);
2250 			v = (m[0x0260 / 4 + a] >> (b + 5)) & 7;
2251 			printf("  %c_MAPPING %s\n\r", 'A' + 3 - b / 8, rc_mapping_str[v]);
2252 		}
2253 		printf(" RC_IN_RGB %08X\n\r", m[0x0ac0 / 4 + a]);
2254 		for (b = 24; b >= 0; b = b - 8) {
2255 			v = (m[0x0ac0 / 4 + a] >> b) & 0xf;
2256 			printf("  %c_INPUT %s\n\r", 'A' + 3 - b / 8, rc_variable_str[v]);
2257 			v = (m[0x0ac0 / 4 + a] >> (b + 4)) & 1;
2258 			printf("  %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - b / 8, rc_usage_rgb_str[v]);
2259 			v = (m[0x0ac0 / 4 + a] >> (b + 5)) & 7;
2260 			printf("  %c_MAPPING %s\n\r", 'A' + 3 - b / 8, rc_mapping_str[v]);
2261 		}
2262 		printf(" RC_OUT_ALPHA %08X\n\r", m[0x0aa0 / 4 + a]);
2263 		v = m[0x0aa0 / 4 + a] & 0xf;
2264 		printf("  CD_OUTPUT %s\n\r", rc_variable_str[v]);
2265 		v = (m[0x0aa0 / 4 + a] >> 4) & 0xf;
2266 		printf("  AB_OUTPUT %s\n\r", rc_variable_str[v]);
2267 		v = (m[0x0aa0 / 4 + a] >> 8) & 0xf;
2268 		printf("  SUM_OUTPUT %s\n\r", rc_variable_str[v]);
2269 		v = (m[0x0aa0 / 4 + a] >> 12) & 1;
2270 		printf("  CD_DOT_PRODUCT %d\n\r", v);
2271 		v = (m[0x0aa0 / 4 + a] >> 13) & 1;
2272 		printf("  AB_DOT_PRODUCT %d\n\r", v);
2273 		v = (m[0x0aa0 / 4 + a] >> 14) & 1;
2274 		printf("  MUX_SUM %d\n\r", v);
2275 		v = (m[0x0aa0 / 4 + a] >> 15) & 1;
2276 		printf("  BIAS %s\n\r", rc_bias_str[v]);
2277 		v = (m[0x0aa0 / 4 + a] >> 16) & 3;
2278 		printf("  SCALE %s\n\r", rc_scale_str[v]);
2279 		//v=(m[0x0aa0/4+a] >> 27) & 7;
2280 		printf(" RC_OUT_RGB %08X\n\r", m[0x1e40 / 4 + a]);
2281 		v = m[0x1e40 / 4 + a] & 0xf;
2282 		printf("  CD_OUTPUT %s\n\r", rc_variable_str[v]);
2283 		v = (m[0x1e40 / 4 + a] >> 4) & 0xf;
2284 		printf("  AB_OUTPUT %s\n\r", rc_variable_str[v]);
2285 		v = (m[0x1e40 / 4 + a] >> 8) & 0xf;
2286 		printf("  SUM_OUTPUT %s\n\r", rc_variable_str[v]);
2287 		v = (m[0x1e40 / 4 + a] >> 12) & 1;
2288 		printf("  CD_DOT_PRODUCT %d\n\r", v);
2289 		v = (m[0x1e40 / 4 + a] >> 13) & 1;
2290 		printf("  AB_DOT_PRODUCT %d\n\r", v);
2291 		v = (m[0x1e40 / 4 + a] >> 14) & 1;
2292 		printf("  MUX_SUM %d\n\r", v);
2293 		v = (m[0x1e40 / 4 + a] >> 15) & 1;
2294 		printf("  BIAS %s\n\r", rc_bias_str[v]);
2295 		v = (m[0x1e40 / 4 + a] >> 16) & 3;
2296 		printf("  SCALE %s\n\r", rc_scale_str[v]);
2297 		//v=(m[0x1e40/4+a] >> 27) & 7;
2298 		printf("\n\r");
2299 	}
2300 	printf("Combiner final %08X %08X\n\r", m[0x0288 / 4], m[0x028c / 4]);
2301 	for (a = 24; a >= 0; a = a - 8) {
2302 		n = (m[0x0288 / 4] >> a) & 0xf;
2303 		printf("  %c_INPUT %s\n\r", 'A' + 3 - a / 8, rc_variable_str[n]);
2304 		n = (m[0x0288 / 4] >> (a + 4)) & 1;
2305 		printf("  %c_COMPONENT_USAGE %s\n\r", 'A' + 3 - a / 8, rc_usage_rgb_str[n]);
2306 		n = (m[0x0288 / 4] >> (a + 5)) & 7;
2307 		printf("  %c_MAPPING %s\n\r", 'A' + 3 - a / 8, rc_mapping_str[n]);
2308 	}
2309 	for (a = 24; a >= 8; a = a - 8) {
2310 		n = (m[0x028c / 4] >> a) & 0xf;
2311 		printf("  %c_INPUT %s\n\r", 'E' + 3 - a / 8, rc_variable_str[n]);
2312 		n = (m[0x028c / 4] >> (a + 4)) & 1;
2313 		printf("  %c_COMPONENT_USAGE %s\n\r", 'E' + 3 - a / 8, rc_usage_rgb_str[n]);
2314 		n = (m[0x028c / 4] >> (a + 5)) & 7;
2315 		printf("  %c_MAPPING %s\n\r", 'E' + 3 - a / 8, rc_mapping_str[n]);
2316 	}
2317 	n = (m[0x028c / 4] >> 7) & 1;
2318 	printf(" color sum clamp: %d\n\r", n);
2319 }
2320 #endif
2321 
extract_packed_float(uint32_t data,float & first,float & second,float & third)2322 void nv2a_renderer::extract_packed_float(uint32_t data, float &first, float &second, float &third)
2323 {
2324 	float f1, f2, f3;
2325 	int p1, p2, p3;
2326 
2327 	p1 = data & 0x7ff;
2328 	if (p1 & 0x400)
2329 		f1 = (float)(p1 - 0x800) / 1023.0;
2330 	else
2331 		f1 = (float)p1 / 1023.0;
2332 	p2 = (data >> 11) & 0x7ff;
2333 	if (p2 & 0x400)
2334 		f2 = (float)(p2 - 0x800) / 1023.0;
2335 	else
2336 		f2 = (float)p2 / 1023.0;
2337 	p3 = (data >> 22) & 0x3ff;
2338 	if (p3 & 0x200)
2339 		f3 = (float)(p3 - 0x400) / 511.0;
2340 	else
2341 		f3 = (float)p3 / 511.0;
2342 	first = f1;
2343 	second = f2;
2344 	third = f3;
2345 }
2346 
read_vertex(address_space & space,offs_t address,vertex_nv & vertex,int attrib)2347 void nv2a_renderer::read_vertex(address_space &space, offs_t address, vertex_nv &vertex, int attrib)
2348 {
2349 	uint32_t u;
2350 
2351 	switch (vertexbuffer.type[attrib])
2352 	{
2353 	case 0x02: // none
2354 		return;
2355 	case 0x12: // float1
2356 		vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2357 		vertex.attribute[attrib].fv[1] = 0;
2358 		vertex.attribute[attrib].fv[2] = 0;
2359 		vertex.attribute[attrib].fv[3] = 1.0;
2360 		break;
2361 	case 0x16: // normpacked3
2362 		u = space.read_dword(address + 0);
2363 		extract_packed_float(u, vertex.attribute[attrib].fv[0], vertex.attribute[attrib].fv[1], vertex.attribute[attrib].fv[2]);
2364 		vertex.attribute[attrib].fv[3] = 1.0;
2365 		break;
2366 	case 0x22: // float2
2367 		vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2368 		vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2369 		vertex.attribute[attrib].fv[2] = 0;
2370 		vertex.attribute[attrib].fv[3] = 1.0;
2371 		break;
2372 	case 0x32: // float3
2373 		vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2374 		vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2375 		vertex.attribute[attrib].iv[2] = space.read_dword(address + 8);
2376 		vertex.attribute[attrib].fv[3] = 1.0;
2377 		break;
2378 	case 0x40: // d3dcolor
2379 		u = space.read_dword(address + 0);
2380 		// aarrggbb -> (rr, gg, bb, aa)
2381 		vertex.attribute[attrib].fv[2] = (u & 0xff) / 255.0;
2382 		u = u >> 8;
2383 		vertex.attribute[attrib].fv[1] = (u & 0xff) / 255.0;
2384 		u = u >> 8;
2385 		vertex.attribute[attrib].fv[0] = (u & 0xff) / 255.0;
2386 		u = u >> 8;
2387 		vertex.attribute[attrib].fv[3] = (u & 0xff) / 255.0;
2388 		break;
2389 	case 0x42: // float4
2390 		vertex.attribute[attrib].iv[0] = space.read_dword(address + 0);
2391 		vertex.attribute[attrib].iv[1] = space.read_dword(address + 4);
2392 		vertex.attribute[attrib].iv[2] = space.read_dword(address + 8);
2393 		vertex.attribute[attrib].iv[3] = space.read_dword(address + 12);
2394 		break;
2395 	default:
2396 		machine().logerror("Yet unsupported vertex data type %x\n\r", vertexbuffer.type[attrib]);
2397 		return;
2398 	}
2399 }
2400 
2401 /* Read vertices data from system memory. Method 0x1800 and 0x1808 */
read_vertices_0x180x(address_space & space,int destination,uint32_t address,int limit)2402 int nv2a_renderer::read_vertices_0x180x(address_space &space, int destination, uint32_t address, int limit)
2403 {
2404 	uint32_t m, n;
2405 	int a, b;
2406 
2407 	n = destination;
2408 	for (m = 0; m < limit; m++) {
2409 		memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2410 		b = vertexbuffer.enabled;
2411 		for (a = 0; a < 16; a++) {
2412 			if (b & 1) {
2413 				read_vertex(space, vertexbuffer.address[a] + vertex_indexes[indexesleft_first] * vertexbuffer.stride[a], vertex_software[n], a);
2414 			}
2415 			b = b >> 1;
2416 		}
2417 		n = (n + 1) & 1023;
2418 		indexesleft_first = (indexesleft_first + 1) & 1023;
2419 		indexesleft_count--;
2420 	}
2421 	return limit;
2422 }
2423 
2424 /* Read vertices data from system memory. Method 0x1810 */
read_vertices_0x1810(address_space & space,int destination,int offset,int limit)2425 int nv2a_renderer::read_vertices_0x1810(address_space &space, int destination, int offset, int limit)
2426 {
2427 	uint32_t m, n;
2428 	int a, b;
2429 
2430 	n = destination;
2431 	for (m = 0; m < limit; m++) {
2432 		memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2433 		b = vertexbuffer.enabled;
2434 		for (a = 0; a < 16; a++) {
2435 			if (b & 1) {
2436 				read_vertex(space, vertexbuffer.address[a] + (m + offset) * vertexbuffer.stride[a], vertex_software[n], a);
2437 			}
2438 			b = b >> 1;
2439 		}
2440 		n = (n + 1) & 1023;
2441 	}
2442 	return m;
2443 }
2444 
2445 /* Read vertices data from system memory. Method 0x1818 */
read_vertices_0x1818(address_space & space,int destination,uint32_t address,int limit)2446 int nv2a_renderer::read_vertices_0x1818(address_space &space, int destination, uint32_t address, int limit)
2447 {
2448 	uint32_t m, n, vwords;
2449 	int a, b;
2450 
2451 	n = destination;
2452 	vwords = vertexbuffer.offset[16];
2453 	for (m = 0; m < limit; m++) {
2454 		memcpy(&vertex_software[n], &persistvertexattr, sizeof(persistvertexattr));
2455 		b = vertexbuffer.enabled;
2456 		for (a = 0; a < 16; a++) {
2457 			if (b & 1) {
2458 				read_vertex(space, address + vertexbuffer.offset[a] * 4, vertex_software[n], a);
2459 			}
2460 			b = b >> 1;
2461 		}
2462 		n = (n + 1) & 1023;
2463 		address = address + vwords * 4;
2464 	}
2465 	return (int)(m*vwords);
2466 }
2467 
compute_supersample_factors(float & horizontal,float & vertical)2468 void nv2a_renderer::compute_supersample_factors(float &horizontal, float &vertical)
2469 {
2470 	float mx, my;
2471 
2472 	mx = 1;
2473 	my = 1;
2474 	switch (((antialias_control & 1) << 2) | antialiasing_rendertarget)
2475 	{
2476 	case 0:
2477 		mx = my = 1;
2478 		break;
2479 	case 1:
2480 		mx = 2; my = 1;
2481 		break;
2482 	case 2:
2483 		mx = my = 2;
2484 		break;
2485 	case 4:
2486 		mx = my = 1;
2487 		break;
2488 	case 5:
2489 		mx = 2;
2490 		my = 1;
2491 		break;
2492 	case 6:
2493 		mx = 2;
2494 		my = 2;
2495 		break;
2496 	default:
2497 		mx = my = 1;
2498 	}
2499 	horizontal = mx;
2500 	vertical = my;
2501 }
2502 
convert_vertices(vertex_nv * source,nv2avertex_t * destination)2503 void nv2a_renderer::convert_vertices(vertex_nv *source, nv2avertex_t *destination)
2504 {
2505 	vertex_nv vert;
2506 	int u;
2507 	float v[4];
2508 	double c;
2509 
2510 	// take each vertex with its attributes and obtain data for drawing
2511 	// should use either the vertex program or transformation matrices
2512 	if (vertex_pipeline == 4) {
2513 		// transformation matrices
2514 		// this part needs more testing
2515 		for (int i = 0; i < 4; i++) {
2516 			v[i] = 0;
2517 			for (int j = 0; j < 4; j++)
2518 				v[i] += matrix.composite[i][j] * source->attribute[0].fv[j];
2519 		};
2520 		destination->w = v[3];
2521 		destination->x = (v[0] / v[3]) * supersample_factor_x; // source->attribute[0].fv[0];
2522 		destination->y = (v[1] / v[3]) * supersample_factor_y; // source->attribute[0].fv[1];
2523 		c = v[3];
2524 		if (c == 0)
2525 			c = FLT_MIN;
2526 		c = 1.0f / c;
2527 		destination->p[(int)VERTEX_PARAMETER::PARAM_1W] = c;
2528 		destination->p[(int)VERTEX_PARAMETER::PARAM_Z] = v[2] * c;
2529 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_R] = source->attribute[3].fv[0] * c;
2530 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_G] = source->attribute[3].fv[1] * c;
2531 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_B] = source->attribute[3].fv[2] * c;
2532 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_A] = source->attribute[3].fv[3] * c;
2533 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R] = source->attribute[4].fv[0] * c;
2534 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G] = source->attribute[4].fv[1] * c;
2535 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B] = source->attribute[4].fv[2] * c;
2536 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A] = source->attribute[4].fv[3] * c;
2537 		for (u = 0; u < 4; u++) {
2538 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + u * 4] = source->attribute[9 + u].fv[0] * c;
2539 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + u * 4] = source->attribute[9 + u].fv[1] * c;
2540 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + u * 4] = source->attribute[9 + u].fv[2] * c;
2541 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + u * 4] = source->attribute[9 + u].fv[3] * c;
2542 		}
2543 	}
2544 	else {
2545 		// vertex program
2546 		// run vertex program
2547 		vertexprogram.exec.process(vertexprogram.start_instruction, source, &vert, 1);
2548 		// the output of the vertex program has the perspective divide, viewport scale and offset already applied
2549 		// copy data for poly.h
2550 		destination->w = vert.attribute[0].fv[3];
2551 		destination->x = (vert.attribute[0].fv[0] - 0.53125f) * supersample_factor_x;
2552 		destination->y = (vert.attribute[0].fv[1] - 0.53125f) * supersample_factor_y;
2553 		c = destination->w;
2554 		if (c == 0)
2555 			c = FLT_MIN;
2556 		c = 1.0f / c;
2557 		destination->p[(int)VERTEX_PARAMETER::PARAM_1W] = c;
2558 		destination->p[(int)VERTEX_PARAMETER::PARAM_Z] = vert.attribute[0].fv[2]; // already divided by w
2559 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_R] = vert.attribute[3].fv[0] * c;
2560 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_G] = vert.attribute[3].fv[1] * c;
2561 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_B] = vert.attribute[3].fv[2] * c;
2562 		destination->p[(int)VERTEX_PARAMETER::PARAM_COLOR_A] = vert.attribute[3].fv[3] * c;
2563 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_R] = vert.attribute[4].fv[0] * c;
2564 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_G] = vert.attribute[4].fv[1] * c;
2565 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_B] = vert.attribute[4].fv[2] * c;
2566 		destination->p[(int)VERTEX_PARAMETER::PARAM_SECONDARY_COLOR_A] = vert.attribute[4].fv[3] * c;
2567 		for (u = 0; u < 4; u++) {
2568 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_S + u * 4] = vert.attribute[9 + u].fv[0] * c;
2569 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_T + u * 4] = vert.attribute[9 + u].fv[1] * c;
2570 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_R + u * 4] = vert.attribute[9 + u].fv[2] * c;
2571 			destination->p[(int)VERTEX_PARAMETER::PARAM_TEXTURE0_Q + u * 4] = vert.attribute[9 + u].fv[3] * c;
2572 		}
2573 	}
2574 }
2575 
clear_render_target(int what,uint32_t value)2576 void nv2a_renderer::clear_render_target(int what, uint32_t value)
2577 {
2578 	int xi, yi, xf, yf;
2579 	int x, y;
2580 	uint32_t color;
2581 	uint8_t *addr;
2582 	uint32_t mask;
2583 	uint32_t offset;
2584 
2585 	if (what == 0)
2586 		return;
2587 	mask = 0;
2588 	switch (colorformat_rendertarget) {
2589 	case NV2A_COLOR_FORMAT::R5G6B5:
2590 		if (what & 1)
2591 			mask = 0x1f;
2592 		if (what & 2)
2593 			mask = mask | 0x07e0;
2594 		if (what & 4)
2595 			mask = mask | 0xf800;
2596 		break;
2597 	case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2598 	case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2599 		for (x = 3; x >= 0; x--) {
2600 			if (what & 8)
2601 				mask = (mask << 8) | 255;
2602 			what = what << 1;
2603 		}
2604 		break;
2605 	case NV2A_COLOR_FORMAT::A8R8G8B8:
2606 		for (x = 3; x >= 0; x--) {
2607 			if (what & 8)
2608 				mask = (mask << 8) | 255;
2609 			what = what << 1;
2610 		}
2611 		break;
2612 	case NV2A_COLOR_FORMAT::B8:
2613 		if (what & 1)
2614 			mask = 255;
2615 		break;
2616 	default:
2617 		return;
2618 	}
2619 	xi = clear_rendertarget.left()*supersample_factor_x;
2620 	yi = clear_rendertarget.top()*supersample_factor_y;
2621 	xf = clear_rendertarget.right()*supersample_factor_x;
2622 	yf = clear_rendertarget.bottom()*supersample_factor_y;
2623 	if ((xi < limits_rendertarget.left()) && (xf < limits_rendertarget.left()))
2624 		return;
2625 	if ((xi > limits_rendertarget.right()) && (xf > limits_rendertarget.right()))
2626 		return;
2627 	if ((yi < limits_rendertarget.top()) && (yf < limits_rendertarget.top()))
2628 		return;
2629 	if ((yi > limits_rendertarget.bottom()) && (yf > limits_rendertarget.bottom()))
2630 		return;
2631 	if (xi < limits_rendertarget.left())
2632 		xi = limits_rendertarget.left();
2633 	if (xf > limits_rendertarget.right())
2634 		xf = limits_rendertarget.right();
2635 	if (yi < limits_rendertarget.top())
2636 		yi = limits_rendertarget.top();
2637 	if (yf > limits_rendertarget.bottom())
2638 		yf = limits_rendertarget.bottom();
2639 	if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
2640 		offset = (dilated0[dilate_rendertarget][xf] + dilated1[dilate_rendertarget][yf]) * bytespixel_rendertarget;
2641 	else // type_rendertarget == LINEAR
2642 		offset = pitch_rendertarget * yf + xf * bytespixel_rendertarget;
2643 	addr = (uint8_t *)rendertarget + offset;
2644 	if ((addr < basemempointer) || (addr > topmempointer))
2645 	{
2646 		machine().logerror("Bad memory pointer computed in clear_render_target !\n");
2647 		return;
2648 	}
2649 
2650 	for (y = yi; y <= yf; y++)
2651 		for (x = xi; x <= xf; x++) {
2652 			if (type_rendertarget == NV2A_RT_TYPE::SWIZZLED)
2653 				offset = (dilated0[dilate_rendertarget][x] + dilated1[dilate_rendertarget][y]) * bytespixel_rendertarget;
2654 			else // type_rendertarget == LINEAR
2655 				offset = pitch_rendertarget * y + x * bytespixel_rendertarget;
2656 			switch (colorformat_rendertarget) {
2657 			case NV2A_COLOR_FORMAT::R5G6B5:
2658 				addr = (uint8_t *)rendertarget + offset;
2659 				color = *((uint16_t *)addr);
2660 				break;
2661 			case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2662 			case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2663 				addr = (uint8_t *)rendertarget + offset;
2664 				color = *((uint32_t *)addr);
2665 				break;
2666 			case NV2A_COLOR_FORMAT::A8R8G8B8:
2667 				addr = (uint8_t *)rendertarget + offset;
2668 				color = *((uint32_t *)addr);
2669 				break;
2670 			case NV2A_COLOR_FORMAT::B8:
2671 				addr = (uint8_t *)rendertarget + offset;
2672 				color = *addr;
2673 				break;
2674 			default:
2675 				return;
2676 			}
2677 			color = (color & ~mask) | (value & mask);
2678 			switch (colorformat_rendertarget) {
2679 			case NV2A_COLOR_FORMAT::R5G6B5:
2680 				*((uint16_t *)addr) = color;
2681 				break;
2682 			case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
2683 			case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
2684 				*((uint32_t *)addr) = color;
2685 				break;
2686 			case NV2A_COLOR_FORMAT::A8R8G8B8:
2687 				*((uint32_t *)addr) = color;
2688 				break;
2689 			case NV2A_COLOR_FORMAT::B8:
2690 				*addr = color;
2691 				break;
2692 			default:
2693 				return;
2694 			}
2695 		}
2696 #ifdef LOG_NV2A
2697 	printf("clearscreen\n\r");
2698 #endif
2699 }
2700 
clear_depth_buffer(int what,uint32_t value)2701 void nv2a_renderer::clear_depth_buffer(int what, uint32_t value)
2702 {
2703 	int xi, yi, xf, yf;
2704 	int x, y;
2705 	uint32_t color;
2706 	uint8_t *addr;
2707 	uint32_t mask;
2708 	uint32_t offset;
2709 	uint32_t bpp;
2710 
2711 	if (what == 0)
2712 		return;
2713 	mask = 0;
2714 	switch (depthformat_rendertarget) {
2715 	case NV2A_RT_DEPTH_FORMAT::Z24S8:
2716 		if (what & 1)
2717 			mask = 0xffffff00;
2718 		if (what & 2)
2719 			mask = mask | 0xff;
2720 		bpp = 4;
2721 		break;
2722 	case NV2A_RT_DEPTH_FORMAT::Z16:
2723 		if (what & 1)
2724 			mask = 0xffff;
2725 		bpp = 2;
2726 		break;
2727 	default:
2728 		return;
2729 	}
2730 	xi = clear_rendertarget.left()*supersample_factor_x;
2731 	yi = clear_rendertarget.top()*supersample_factor_y;
2732 	xf = clear_rendertarget.right()*supersample_factor_x;
2733 	yf = clear_rendertarget.bottom()*supersample_factor_y;
2734 	if ((xi < limits_rendertarget.left()) && (xf < limits_rendertarget.left()))
2735 		return;
2736 	if ((xi > limits_rendertarget.right()) && (xf > limits_rendertarget.right()))
2737 		return;
2738 	if ((yi < limits_rendertarget.top()) && (yf < limits_rendertarget.top()))
2739 		return;
2740 	if ((yi > limits_rendertarget.bottom()) && (yf > limits_rendertarget.bottom()))
2741 		return;
2742 	if (xi < limits_rendertarget.left())
2743 		xi = limits_rendertarget.left();
2744 	if (xf > limits_rendertarget.right())
2745 		xf = limits_rendertarget.right();
2746 	if (yi < limits_rendertarget.top())
2747 		yi = limits_rendertarget.top();
2748 	if (yf > limits_rendertarget.bottom())
2749 		yf = limits_rendertarget.bottom();
2750 	offset = pitch_depthbuffer * yf + xf * bpp;
2751 	addr = (uint8_t *)depthbuffer + offset;
2752 	if ((addr < basemempointer) || (addr > topmempointer))
2753 	{
2754 		machine().logerror("Bad memory pointer computed in clear_depth_buffer !\n");
2755 		return;
2756 	}
2757 
2758 	for (y = yi; y <= yf; y++)
2759 		for (x = xi; x <= xf; x++) {
2760 			offset = pitch_depthbuffer * y + x * bpp;
2761 			switch (depthformat_rendertarget) {
2762 			case NV2A_RT_DEPTH_FORMAT::Z16:
2763 				addr = (uint8_t *)depthbuffer + offset;
2764 				color = *((uint16_t *)addr);
2765 				break;
2766 			case NV2A_RT_DEPTH_FORMAT::Z24S8:
2767 				addr = (uint8_t *)depthbuffer + offset;
2768 				color = *((uint32_t *)addr);
2769 				break;
2770 			default:
2771 				return;
2772 			}
2773 			color = (color & ~mask) | (value & mask);
2774 			switch (depthformat_rendertarget) {
2775 			case NV2A_RT_DEPTH_FORMAT::Z16:
2776 				addr = (uint8_t *)depthbuffer + offset;
2777 				*((uint16_t *)addr) = color;
2778 				break;
2779 			case NV2A_RT_DEPTH_FORMAT::Z24S8:
2780 				addr = (uint8_t *)depthbuffer + offset;
2781 				*((uint32_t *)addr) = color;
2782 				break;
2783 			default:
2784 				return;
2785 			}
2786 		}
2787 }
2788 
render_triangle_culling(const rectangle & cliprect,nv2avertex_t & _v1,nv2avertex_t & _v2,nv2avertex_t & _v3)2789 uint32_t nv2a_renderer::render_triangle_culling(const rectangle &cliprect, nv2avertex_t &_v1, nv2avertex_t &_v2, nv2avertex_t &_v3)
2790 {
2791 	float areax2;
2792 	NV2A_GL_CULL_FACE face = NV2A_GL_CULL_FACE::FRONT;
2793 
2794 	if (backface_culling_enabled == false)
2795 		return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2796 	if (backface_culling_culled == NV2A_GL_CULL_FACE::FRONT_AND_BACK)
2797 	{
2798 		triangles_bfculled++;
2799 		return 0;
2800 	}
2801 	areax2 = _v1.x*(_v2.y - _v3.y) + _v2.x*(_v3.y - _v1.y) + _v3.x*(_v1.y - _v2.y);
2802 	if (areax2 == 0.0f) {
2803 		triangles_bfculled++;
2804 		return 0;
2805 	}
2806 	if (backface_culling_winding == NV2A_GL_FRONT_FACE::CCW)
2807 	{
2808 		if (-areax2 <= 0)
2809 			face = NV2A_GL_CULL_FACE::BACK;
2810 		else
2811 			face = NV2A_GL_CULL_FACE::FRONT;
2812 	} else
2813 	{
2814 		if (areax2 <= 0)
2815 			face = NV2A_GL_CULL_FACE::BACK;
2816 		else
2817 			face = NV2A_GL_CULL_FACE::FRONT;
2818 	}
2819 	if (face == NV2A_GL_CULL_FACE::FRONT)
2820 		if (backface_culling_culled == NV2A_GL_CULL_FACE::BACK)
2821 			return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2822 	if (face == NV2A_GL_CULL_FACE::BACK)
2823 		if (backface_culling_culled == NV2A_GL_CULL_FACE::FRONT)
2824 			return rasterizer.render_triangle(cliprect, render_spans_callback, (int)VERTEX_PARAMETER::ALL, _v1, _v2, _v3);
2825 	triangles_bfculled++;
2826 	return 0;
2827 }
2828 
clip_triangle_w(nv2avertex_t vi[3],nv2avertex_t * vo)2829 int nv2a_renderer::clip_triangle_w(nv2avertex_t vi[3], nv2avertex_t *vo)
2830 {
2831 	int idx_prev, idx_curr;
2832 	int neg_prev, neg_curr;
2833 	double tfactor;
2834 	int idx;
2835 	const double wthreshold = 0.000001;
2836 
2837 	idx_prev = 2;
2838 	idx_curr = 0;
2839 	idx = 0;
2840 	neg_prev = vi[idx_prev].w < wthreshold ? 1 : 0;
2841 	while (idx_curr < 3)
2842 	{
2843 		neg_curr = vi[idx_curr].w < wthreshold ? 1 : 0;
2844 		if (neg_curr ^ neg_prev)
2845 		{
2846 			tfactor = (wthreshold - vi[idx_prev].w) / (vi[idx_curr].w - vi[idx_prev].w);
2847 			// compute values for the new intermediate point
2848 			vo[idx].x = ((vi[idx_curr].x - vi[idx_prev].x) * tfactor) + vi[idx_prev].x;
2849 			vo[idx].y = ((vi[idx_curr].y - vi[idx_prev].y) * tfactor) + vi[idx_prev].y;
2850 			vo[idx].w = ((vi[idx_curr].w - vi[idx_prev].w) * tfactor) + vi[idx_prev].w;
2851 			for (int n = 0; n < (int)VERTEX_PARAMETER::PARAM_Z; n++)
2852 				vo[idx].p[n] = ((vi[idx_curr].p[n] - vi[idx_prev].p[n]) * tfactor) + vi[idx_prev].p[n];
2853 			vo[idx].p[(int)VERTEX_PARAMETER::PARAM_Z] = ((vi[idx_curr].p[(int)VERTEX_PARAMETER::PARAM_Z] - vi[idx_prev].p[(int)VERTEX_PARAMETER::PARAM_Z]) * tfactor) + vi[idx_prev].p[(int)VERTEX_PARAMETER::PARAM_Z];
2854 			vo[idx].p[(int)VERTEX_PARAMETER::PARAM_1W] = 1.0f / vo[idx].w;
2855 			idx++;
2856 		}
2857 		if (neg_curr == 0)
2858 		{
2859 			vo[idx].x = vi[idx_curr].x;
2860 			vo[idx].y = vi[idx_curr].y;
2861 			vo[idx].w = vi[idx_curr].w;
2862 			for (int n = 0; n < (int)VERTEX_PARAMETER::PARAM_Z; n++)
2863 				vo[idx].p[n] = vi[idx_curr].p[n];
2864 			vo[idx].p[(int)VERTEX_PARAMETER::PARAM_Z] = vi[idx_curr].p[(int)VERTEX_PARAMETER::PARAM_Z];
2865 			vo[idx].p[(int)VERTEX_PARAMETER::PARAM_1W] = 1.0f / vo[idx].w;
2866 			idx++;
2867 		}
2868 		neg_prev = neg_curr;
2869 		idx_prev = idx_curr;
2870 		idx_curr++;
2871 	}
2872 	return idx;
2873 }
2874 
render_triangle_clipping(const rectangle & cliprect,nv2avertex_t & _v1,nv2avertex_t & _v2,nv2avertex_t & _v3)2875 uint32_t nv2a_renderer::render_triangle_clipping(const rectangle &cliprect, nv2avertex_t &_v1, nv2avertex_t &_v2, nv2avertex_t &_v3)
2876 {
2877 	nv2avertex_t *vp[3];
2878 	nv2avertex_t vi[3];
2879 	nv2avertex_t vo[8];
2880 	int nv;
2881 	double c;
2882 
2883 	if ((_v1.w > 0) && (_v2.w > 0) && (_v3.w > 0))
2884 		return render_triangle_culling(cliprect, _v1, _v2, _v3);
2885 	if (enable_clipping_w == false)
2886 		return 0;
2887 	if ((_v1.w <= 0) && (_v2.w <= 0) && (_v3.w <= 0))
2888 		return 0;
2889 	// assign the elements of the pointer array
2890 	vp[0] = &_v1;
2891 	vp[1] = &_v2;
2892 	vp[2] = &_v3;
2893 	// go back to the state before perpective divide
2894 	if (vertex_pipeline == 4)
2895 	{
2896 		for (int n = 0; n < 3; n++)
2897 		{
2898 			c = vp[n]->w;
2899 			vi[n].w = c;
2900 			vi[n].x = (vp[n]->x / (double)supersample_factor_x) * c;
2901 			vi[n].y = (vp[n]->y / (double)supersample_factor_y) * c;
2902 			for (int nn = 0; nn <= (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2903 				vi[n].p[nn] = vp[n]->p[nn] * c;
2904 		}
2905 	} else
2906 	{
2907 		for (int n = 0; n < 3; n++)
2908 		{
2909 			c = vp[n]->w;
2910 			vi[n].w = c;
2911 			// remove perspective correct interpolate
2912 			for (int nn = 0; nn < (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2913 				vi[n].p[nn] = vp[n]->p[nn] * c;
2914 			// remove supersample
2915 			vi[n].x = (vp[n]->x / supersample_factor_x) + 0.53125f;
2916 			vi[n].y = (vp[n]->y / supersample_factor_y) + 0.53125f;
2917 			// remove translate
2918 			vi[n].x = vi[n].x - matrix.translate[0];
2919 			vi[n].y = vi[n].y - matrix.translate[1];
2920 			vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vp[n]->p[(int)VERTEX_PARAMETER::PARAM_Z] - matrix.translate[2];
2921 			// remove perspective divide
2922 			vi[n].x = vi[n].x * c;
2923 			vi[n].y = vi[n].y * c;
2924 			vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vi[n].p[(int)VERTEX_PARAMETER::PARAM_Z] * c;
2925 		}
2926 	}
2927 	// do the clipping
2928 	nv = clip_triangle_w(vi, vo);
2929 	// screen coordinates for the new points
2930 	if (vertex_pipeline == 4)
2931 	{
2932 		for (int n = 0; n < nv; n++)
2933 		{
2934 			c = 1 / vo[n].w;
2935 			vo[n].x = vo[n].x * (double)supersample_factor_x * c;
2936 			vo[n].y = vo[n].y * (double)supersample_factor_y * c;
2937 			for (int nn = 0; nn <= (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2938 				vo[n].p[nn] = vo[n].p[nn] * c;
2939 		}
2940 	} else
2941 	{
2942 		for (int n = 0; n < nv; n++)
2943 		{
2944 			c = 1 / vo[n].w;
2945 			// apply perspective divide
2946 			vo[n].x = vo[n].x * c;
2947 			vo[n].y = vo[n].y * c;
2948 			vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] * c;
2949 			// apply translate
2950 			vo[n].x = vo[n].x + matrix.translate[0];
2951 			vo[n].y = vo[n].y + matrix.translate[1];
2952 			vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] = vo[n].p[(int)VERTEX_PARAMETER::PARAM_Z] + matrix.translate[2];
2953 			// apply supersample
2954 			vo[n].x = (vo[n].x - 0.53125f) * supersample_factor_x;
2955 			vo[n].y = (vo[n].y - 0.53125f) * supersample_factor_y;
2956 			// apply perspective correct interpolate
2957 			for (int nn = 0; nn < (int)VERTEX_PARAMETER::PARAM_Z; nn++)
2958 				vo[n].p[nn] = vo[n].p[nn] * c;
2959 		}
2960 	}
2961 	for (int n = 1; n <= (nv - 2); n++)
2962 		render_triangle_culling(cliprect, vo[0], vo[n], vo[n + 1]);
2963 	return 0;
2964 }
2965 
assemble_primitive(int source,int count)2966 void nv2a_renderer::assemble_primitive(int source, int count)
2967 {
2968 	uint32_t pc = primitives_count;
2969 	vertex_nv *v;
2970 
2971 	for (; count > 0; count--) {
2972 		v = &vertex_software[source];
2973 		if (primitive_type == NV2A_BEGIN_END::QUADS) {
2974 			convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
2975 			vertex_accumulated++;
2976 			if (vertex_accumulated == 4) {
2977 				primitives_count++;
2978 				vertex_accumulated = 0;
2979 				render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[vertex_count + 1], vertex_xy[vertex_count + 2]);
2980 				render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[vertex_count + 2], vertex_xy[vertex_count + 3]);
2981 				vertex_count = (vertex_count + 4) & 1023;
2982 				rasterizer.wait();
2983 			}
2984 		}
2985 		else if (primitive_type == NV2A_BEGIN_END::TRIANGLES) {
2986 			convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
2987 			vertex_accumulated++;
2988 			if (vertex_accumulated == 3) {
2989 				primitives_count++;
2990 				vertex_accumulated = 0;
2991 				render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 2) & 1023]); // 4 rgba, 4 texture units 2 uv
2992 				vertex_count = (vertex_count + 3) & 1023;
2993 				rasterizer.wait();
2994 			}
2995 		}
2996 		else if (primitive_type == NV2A_BEGIN_END::TRIANGLE_FAN) {
2997 			if (vertex_accumulated == 0)
2998 			{
2999 				convert_vertices(v, vertex_xy + 1024);
3000 				vertex_accumulated = 1;
3001 			}
3002 			else if (vertex_accumulated == 1)
3003 			{
3004 				convert_vertices(v, vertex_xy);
3005 				vertex_accumulated = 2;
3006 				vertex_count = 1;
3007 			}
3008 			else
3009 			{
3010 				primitives_count++;
3011 				// if software sends the vertices 0 1 2 3 4 5 6
3012 				// hardware will draw triangles made by (0,1,2) (0,2,3) (0,3,4) (0,4,5) (0,5,6)
3013 				convert_vertices(v, vertex_xy + vertex_count);
3014 				render_triangle_clipping(limits_rendertarget, vertex_xy[1024], vertex_xy[(vertex_count - 1) & 1023], vertex_xy[vertex_count]);
3015 				vertex_count = (vertex_count + 1) & 1023;
3016 				rasterizer.wait();
3017 			}
3018 		}
3019 		else if (primitive_type == NV2A_BEGIN_END::TRIANGLE_STRIP) {
3020 			if (vertex_accumulated == 0)
3021 			{
3022 				convert_vertices(v, vertex_xy);
3023 				vertex_accumulated = 1;
3024 			}
3025 			else if (vertex_accumulated == 1)
3026 			{
3027 				convert_vertices(v, vertex_xy + 1);
3028 				vertex_accumulated = 2;
3029 				vertex_count = 2;
3030 			}
3031 			else
3032 			{
3033 				primitives_count++;
3034 				// if software sends the vertices 0 1 2 3 4 5 6
3035 				// hardware will draw triangles made by (0,1,2) (1,3,2) (2,3,4) (3,5,4) (4,5,6)
3036 				convert_vertices(v, vertex_xy + vertex_count);
3037 				if ((vertex_count & 1) == 0)
3038 					render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count - 2) & 1023], vertex_xy[(vertex_count - 1) & 1023], vertex_xy[vertex_count]);
3039 				else
3040 					render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count - 2) & 1023], vertex_xy[vertex_count], vertex_xy[(vertex_count - 1) & 1023]);
3041 				vertex_count = (vertex_count + 1) & 1023;
3042 				rasterizer.wait();
3043 			}
3044 		}
3045 		else if (primitive_type == NV2A_BEGIN_END::QUAD_STRIP) {
3046 			if (vertex_accumulated == 0)
3047 			{
3048 				convert_vertices(v, vertex_xy);
3049 				vertex_accumulated = 1;
3050 			}
3051 			else if (vertex_accumulated == 1)
3052 			{
3053 				convert_vertices(v, vertex_xy + 1);
3054 				vertex_accumulated = 2;
3055 				vertex_count = 0;
3056 			}
3057 			else
3058 			{
3059 				convert_vertices(v, vertex_xy + ((vertex_count + vertex_accumulated) & 1023));
3060 				vertex_accumulated++;
3061 				if (vertex_accumulated == 4)
3062 				{
3063 					primitives_count++;
3064 					// if software sends the vertices 0 1 2 3 4 5 6 7
3065 					// hardware will draw triangles made by (0,1,2) (2,1,3) (2,3,4) (4,3,5) (4,5,6) (6,5,7)
3066 					render_triangle_clipping(limits_rendertarget, vertex_xy[vertex_count], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 2) & 1023]);
3067 					render_triangle_clipping(limits_rendertarget, vertex_xy[(vertex_count + 2) & 1023], vertex_xy[(vertex_count + 1) & 1023], vertex_xy[(vertex_count + 3) & 1023]);
3068 					vertex_accumulated = 2;
3069 					vertex_count = (vertex_count + 2) & 1023;
3070 					rasterizer.wait();
3071 				}
3072 			}
3073 		}
3074 		else {
3075 			if (vertex_count == 0)
3076 				machine().logerror("Unsupported primitive %d\n", int(primitive_type));
3077 			vertex_count++;
3078 		}
3079 		source = (source + 1) & 1023;
3080 	}
3081 	primitives_total_count += primitives_count - pc;
3082 }
3083 
process_persistent_vertex()3084 void nv2a_renderer::process_persistent_vertex()
3085 {
3086 	memcpy(&vertex_software[1025], &persistvertexattr, sizeof(persistvertexattr));
3087 	assemble_primitive(1025, 1);
3088 }
3089 
compute_limits_rendertarget(uint32_t chanel,uint32_t subchannel)3090 void nv2a_renderer::compute_limits_rendertarget(uint32_t chanel, uint32_t subchannel)
3091 {
3092 	uint32_t data;
3093 	int x, w;
3094 	int y, h;
3095 
3096 	data = channel[chanel][subchannel].object.method[0x0200 / 4];
3097 	x = data & 0xffff;
3098 	w = (data >> 16) & 0xffff;
3099 	x = x*supersample_factor_x;
3100 	w = w*supersample_factor_x;
3101 	limits_rendertarget.setx(x, x + w - 1);
3102 	data = channel[chanel][subchannel].object.method[0x0204 / 4];
3103 	y = data & 0xffff;
3104 	h = (data >> 16) & 0xffff;
3105 	y = y*supersample_factor_y;
3106 	h = h*supersample_factor_y;
3107 	limits_rendertarget.sety(y, y + h - 1);
3108 }
3109 
compute_size_rendertarget(uint32_t chanel,uint32_t subchannel)3110 void nv2a_renderer::compute_size_rendertarget(uint32_t chanel, uint32_t subchannel)
3111 {
3112 	size_rendertarget = pitch_rendertarget*(limits_rendertarget.bottom() + 1);
3113 	size_depthbuffer = pitch_depthbuffer*(limits_rendertarget.bottom() + 1);
3114 }
3115 
execute_method(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,int & countlen)3116 int nv2a_renderer::execute_method(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, int &countlen)
3117 {
3118 	uint32_t data;
3119 
3120 	data = space.read_dword(address);
3121 	channel[chanel][subchannel].object.method[method / 4] = data;
3122 #ifdef LOG_NV2A
3123 	//printf("A:%08X CH=%02d SCH=%02d MTHD:%08X D:%08X\n\r",address,chanel,subchannel,maddress,data);
3124 #endif
3125 	if (channel[chanel][subchannel].object.objclass == 0x97)
3126 		return execute_method_3d(space, chanel, subchannel, method, address, data, countlen);
3127 	if (channel[chanel][subchannel].object.objclass == 0x39) // 0180
3128 		return execute_method_m2mf(space, chanel, subchannel, method, address, data, countlen);
3129 	if (channel[chanel][subchannel].object.objclass == 0x62) // 0184 0188
3130 		return execute_method_surf2d(space, chanel, subchannel, method, address, data, countlen);
3131 	if (channel[chanel][subchannel].object.objclass == 0x9f) // 019c 02fc
3132 		return execute_method_blit(space, chanel, subchannel, method, address, data, countlen);
3133 	return 0;
3134 }
3135 
execute_method_3d(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t maddress,uint32_t address,uint32_t data,int & countlen)3136 int nv2a_renderer::execute_method_3d(address_space& space, uint32_t chanel, uint32_t subchannel, uint32_t maddress, uint32_t address, uint32_t data, int &countlen)
3137 {
3138 	if ((chanel != 0) || (subchannel != 0))
3139 		return 0;
3140 	if (maddress == 0x17fc) {
3141 #if 0 // useful while debugging to see what coordinates have been used
3142 		static int debugvc = 0;
3143 		if (debugvc)
3144 			if (data == 0)
3145 			{
3146 				printf("%d %d\n\r", (int)primitive_type, vertex_first);
3147 				for (int n = 0; n < vertex_first; n++)
3148 				{
3149 					if (indexesleft_count > 0)
3150 						printf("%d i:%d ", n, vertex_indexes[n]);
3151 					else
3152 						printf("%d ", n);
3153 					printf("X:%f Y:%f Z:%f W:%f x:%f y:%f\n\r", vertex_software[n].attribute[0].fv[0], vertex_software[n].attribute[0].fv[1], vertex_software[n].attribute[0].fv[2], vertex_software[n].attribute[0].fv[3], vertex_xy[n].x, vertex_xy[n].y);
3154 				}
3155 			}
3156 #endif
3157 		vertex_count = 0;
3158 		vertex_first = 0;
3159 		vertex_accumulated = 0;
3160 		indexesleft_count = 0;
3161 		indexesleft_first = 0;
3162 		primitives_count = 0;
3163 		primitive_type = (NV2A_BEGIN_END)data;
3164 		if (data == 0)
3165 			primitives_batches_count++;
3166 		else
3167 		{
3168 			if (((channel[chanel][subchannel].object.method[0x1e60 / 4] & 7) > 0) && (combiner.used != 0))
3169 				render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_register_combiners, this);
3170 			else if (texture[0].enabled)
3171 				render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_texture_simple, this);
3172 			else
3173 				render_spans_callback = nv2a_rasterizer::render_delegate(&nv2a_renderer::render_color, this);
3174 		}
3175 		countlen--;
3176 	}
3177 	if (maddress == 0x1810) {
3178 		// draw vertices
3179 		int offset, count;
3180 		uint32_t n;
3181 
3182 		offset = data & 0xffffff;
3183 		count = (data >> 24) & 0xff;
3184 #ifdef LOG_NV2A
3185 		printf("vertex %d %d\n\r", offset, count);
3186 #endif
3187 		for (n = 0; n <= count; n++) {
3188 			read_vertices_0x1810(space, vertex_first, n + offset, 1);
3189 			assemble_primitive(vertex_first, 1);
3190 			vertex_first = (vertex_first + 1) & 1023;
3191 		}
3192 		countlen--;
3193 	}
3194 	if ((maddress == 0x1800) || (maddress == 0x1808)) {
3195 		int mult;
3196 
3197 		if (maddress == 0x1800)
3198 			mult = 2;
3199 		else
3200 			mult = 1;
3201 		// vertices are selected from the vertex buffer using an array of indexes
3202 		// each dword after 1800 contains two 16 bit index values to select the vartices
3203 		// each dword after 1808 contains a 32 bit index value to select the vartices
3204 		while (countlen > 0) {
3205 			int n;
3206 
3207 			data = space.read_dword(address);
3208 			n = indexesleft_first + indexesleft_count;
3209 			if (mult == 2) {
3210 				vertex_indexes[n & 1023] = data & 0xffff;
3211 				vertex_indexes[(n + 1) & 1023] = (data >> 16) & 0xffff;
3212 				indexesleft_count = indexesleft_count + 2;
3213 			}
3214 			else {
3215 				vertex_indexes[n & 1023] = data;
3216 				indexesleft_count = indexesleft_count + 1;
3217 			}
3218 			address += 4;
3219 			countlen--;
3220 			read_vertices_0x180x(space, vertex_first, address, mult);
3221 			assemble_primitive(vertex_first, mult);
3222 			vertex_first = (vertex_first + mult) & 1023;
3223 		}
3224 	}
3225 	if (maddress == 0x1818) {
3226 		if (countlen == 0)
3227 			machine().logerror("Method 0x1818 with 0 vertices\n");
3228 		// vertices are taken from the next words, not from a vertex buffer
3229 		// first send primitive type with 17fc
3230 		// then countlen number of dwords with 1818
3231 		// end with 17fc primitive type 0
3232 		// at 1760 16 words specify the vertex format:for each possible vertex attribute the number of components (0=not present) and type of each
3233 		while (countlen > 0) {
3234 			int c;
3235 
3236 			c = read_vertices_0x1818(space, vertex_first, address, 1);
3237 			countlen = countlen - c;
3238 			if (countlen < 0) {
3239 				machine().logerror("Method 0x1818 missing %d words\n", -countlen);
3240 				countlen = 0;
3241 				break;
3242 			}
3243 			address = address + c * 4;
3244 			assemble_primitive(vertex_first, 1);
3245 			vertex_first = (vertex_first + 1) & 1023;
3246 		}
3247 	}
3248 	if ((maddress >= 0x1880) && (maddress < 0x1900))
3249 	{
3250 		int v = maddress - 0x1880; // 16 couples,2 float per couple,16*2*4=128
3251 		int attr = v >> 3;
3252 		int comp = (v >> 2) & 1;
3253 
3254 		persistvertexattr.attribute[attr].iv[comp] = data;
3255 		if (comp == 1)
3256 		{
3257 			persistvertexattr.attribute[attr].fv[2] = 0;
3258 			persistvertexattr.attribute[attr].fv[3] = 1;
3259 			if (attr == 0)
3260 				process_persistent_vertex();
3261 		}
3262 	}
3263 	if ((maddress >= 0x1900) && (maddress < 0x1940))
3264 	{
3265 		int v = maddress - 0x1900; // 16 dwords,2 values per dword
3266 		int attr = v >> 2;
3267 		uint16_t d1 = data & 0xffff;
3268 		uint16_t d2 = data >> 16;
3269 
3270 		persistvertexattr.attribute[attr].fv[0] = (float)((int16_t)d1);
3271 		persistvertexattr.attribute[attr].fv[1] = (float)((int16_t)d2);
3272 		persistvertexattr.attribute[attr].fv[2] = 0;
3273 		persistvertexattr.attribute[attr].fv[3] = 1;
3274 		if (attr == 0)
3275 			process_persistent_vertex();
3276 	}
3277 	if ((maddress >= 0x1940) && (maddress < 0x1980))
3278 	{
3279 		int v = maddress - 0x1940; // 16 dwords,4 values per dword
3280 		int attr = v >> 2;
3281 		uint8_t d1 = data & 255;
3282 		uint8_t d2 = (data >> 8) & 255;
3283 		uint8_t d3 = (data >> 16) & 255;
3284 		uint8_t d4 = data >> 24;
3285 
3286 		// if sending color dword is aabbggrr
3287 		persistvertexattr.attribute[attr].fv[0] = (float)d1 / 255.0;
3288 		persistvertexattr.attribute[attr].fv[1] = (float)d2 / 255.0;
3289 		persistvertexattr.attribute[attr].fv[2] = (float)d3 / 255.0;
3290 		persistvertexattr.attribute[attr].fv[3] = (float)d4 / 255.0;
3291 		if (attr == 0)
3292 			process_persistent_vertex();
3293 	}
3294 	if ((maddress >= 0x1980) && (maddress < 0x1a00))
3295 	{
3296 		int v = maddress - 0x1980; // 16 couples,4 values per couple,16*2*4=128
3297 		int attr = v >> 3;
3298 		int comp = (v >> 1) & 2;
3299 		uint16_t d1 = data & 0xffff;
3300 		uint16_t d2 = data >> 16;
3301 
3302 		persistvertexattr.attribute[attr].fv[comp] = (float)((int16_t)d1);
3303 		persistvertexattr.attribute[attr].fv[comp+1] = (float)((int16_t)d2);
3304 		if (comp == 2)
3305 			if (attr == 0)
3306 				process_persistent_vertex();
3307 	}
3308 	if ((maddress >= 0x1a00) && (maddress < 0x1b00))
3309 	{
3310 		int v = maddress - 0x1a00; // 16 groups,4 float per group
3311 		int attr = v >> 4;
3312 		int comp = (v >> 2) & 3;
3313 
3314 		persistvertexattr.attribute[attr].iv[comp] = data;
3315 		if (comp == 3)
3316 			if (attr == 0)
3317 				process_persistent_vertex();
3318 	}
3319 	if ((maddress >= 0x1518) && (maddress < 0x1528))
3320 	{
3321 		int v = maddress - 0x1518;
3322 		int comp = v >> 2;
3323 
3324 		persistvertexattr.attribute[(int)NV2A_VERTEX_ATTR::POS].iv[comp] = data;
3325 		if (comp == 3)
3326 			process_persistent_vertex();
3327 	}
3328 	else if ((maddress >= 0x1500) && (maddress < 0x1590))
3329 	{
3330 		machine().logerror("Yet unsupported method %x\n\r", maddress);
3331 	}
3332 	if ((maddress >= 0x1720) && (maddress < 0x1760)) {
3333 		int bit = maddress / 4 - 0x1720 / 4;
3334 
3335 		if (data & 0x80000000)
3336 			vertexbuffer.address[bit] = (data & 0x0fffffff) + dma_offset[7];
3337 		else
3338 			vertexbuffer.address[bit] = (data & 0x0fffffff) + dma_offset[6];
3339 	}
3340 	if ((maddress >= 0x1760) && (maddress < 0x17A0)) {
3341 		int bit = maddress / 4 - 0x1760 / 4;
3342 
3343 		vertexbuffer.type[bit] = data & 255;
3344 		vertexbuffer.stride[bit] = (data >> 8) & 255;
3345 		switch (vertexbuffer.type[bit])
3346 		{
3347 		case 0x02: // none
3348 			vertexbuffer.words[bit] = 0;
3349 			break;
3350 		case 0x12: // float1
3351 			vertexbuffer.words[bit] = 1;
3352 			break;
3353 		case 0x16: // normpacked3
3354 			vertexbuffer.words[bit] = 1;
3355 			break;
3356 		case 0x22: // float2
3357 			vertexbuffer.words[bit] = 2;
3358 			break;
3359 		case 0x32: // float3
3360 			vertexbuffer.words[bit] = 3;
3361 			break;
3362 		case 0x40: // d3dcolor
3363 			vertexbuffer.words[bit] = 1;
3364 			break;
3365 		case 0x42: // float4
3366 			vertexbuffer.words[bit] = 4;
3367 			break;
3368 		default:
3369 			machine().logerror("Yet unsupported vertex data type %x\n\r", vertexbuffer.type[bit]);
3370 			vertexbuffer.words[bit] = 0;
3371 		}
3372 		if (vertexbuffer.words[bit] > 0)
3373 			vertexbuffer.enabled |= (1 << bit);
3374 		else
3375 			vertexbuffer.enabled &= ~(1 << bit);
3376 		vertexbuffer.offset[0] = 0;
3377 		for (int n = bit + 1; n <= 16; n++) {
3378 			if ((vertexbuffer.enabled & (1 << (n - 1))) != 0)
3379 				vertexbuffer.offset[n] = vertexbuffer.offset[n - 1] + vertexbuffer.words[n - 1];
3380 			else
3381 				vertexbuffer.offset[n] = vertexbuffer.offset[n - 1];
3382 		}
3383 		countlen--;
3384 	}
3385 	if ((maddress == 0x1d6c) || (maddress == 0x1a4))
3386 		countlen--;
3387 	if (maddress == 0x0308) {
3388 		backface_culling_enabled = data != 0 ? true : false;
3389 	}
3390 	if (maddress == 0x03a0) {
3391 		backface_culling_winding = (NV2A_GL_FRONT_FACE)data;
3392 	}
3393 	if (maddress == 0x039c) {
3394 		backface_culling_culled = (NV2A_GL_CULL_FACE)data;
3395 	}
3396 	if (maddress == 0x0180) {
3397 		geforce_read_dma_object(data, dma_offset[0], dma_size[0]);
3398 	}
3399 	if (maddress == 0x0184) {
3400 		geforce_read_dma_object(data, dma_offset[1], dma_size[1]);
3401 	}
3402 	if (maddress == 0x0188) {
3403 		geforce_read_dma_object(data, dma_offset[2], dma_size[2]);
3404 	}
3405 	if (maddress == 0x0190) {
3406 		geforce_read_dma_object(data, dma_offset[3], dma_size[3]);
3407 	}
3408 	if (maddress == 0x0194) {
3409 		geforce_read_dma_object(data, dma_offset[4], dma_size[4]);
3410 	}
3411 	if (maddress == 0x0198) {
3412 		geforce_read_dma_object(data, dma_offset[5], dma_size[5]);
3413 	}
3414 	if (maddress == 0x019c) {
3415 		geforce_read_dma_object(data, dma_offset[6], dma_size[6]);
3416 	}
3417 	if (maddress == 0x01a0) {
3418 		geforce_read_dma_object(data, dma_offset[7], dma_size[7]);
3419 	}
3420 	if (maddress == 0x01a4) {
3421 		geforce_read_dma_object(data, dma_offset[8], dma_size[8]);
3422 	}
3423 	if (maddress == 0x01a8) {
3424 		geforce_read_dma_object(data, dma_offset[9], dma_size[9]);
3425 	}
3426 	if (maddress == 0x1d70) {
3427 		// with 1d70 write the value at offest [1d6c] inside dma object [1a4]
3428 		uint32_t offset, base;
3429 		uint32_t dmahand, dmaoff, smasiz;
3430 
3431 		offset = channel[chanel][subchannel].object.method[0x1d6c / 4];
3432 		dmahand = channel[chanel][subchannel].object.method[0x1a4 / 4];
3433 		geforce_read_dma_object(dmahand, dmaoff, smasiz);
3434 		base = dmaoff;
3435 		space.write_dword(base + offset, data);
3436 		// software expects to find the parameter of this method at pgraph offset b10
3437 		pgraph[0xb10 / 4] = data << 2;
3438 		countlen--;
3439 	}
3440 	if (maddress == 0x1d7c) {
3441 		antialias_control = data;
3442 		compute_supersample_factors(supersample_factor_x, supersample_factor_y);
3443 		compute_limits_rendertarget(chanel, subchannel);
3444 		countlen--;
3445 	}
3446 	if (maddress == 0x1d98) {
3447 		int x, w;
3448 
3449 		x = data & 0xffff;
3450 		w = (data >> 16) & 0xffff;
3451 		clear_rendertarget.setx(x, w);
3452 		countlen--;
3453 	}
3454 	if (maddress == 0x1d9c) {
3455 		int y, h;
3456 
3457 		y = data & 0xffff;
3458 		h = (data >> 16) & 0xffff;
3459 		clear_rendertarget.sety(y, h);
3460 		countlen--;
3461 	}
3462 	if (maddress == 0x1d94) {
3463 		// possible buffers: color, depth, stencil
3464 		// clear framebuffer
3465 		clear_render_target((data >> 4) & 15, channel[chanel][subchannel].object.method[0x1d90 / 4]);
3466 		clear_depth_buffer(data & 3, channel[chanel][subchannel].object.method[0x1d8c / 4]);
3467 		countlen--;
3468 	}
3469 	if ((maddress >= 0x02c0) && (maddress < 0x2e0)) {
3470 		int x, w, i;
3471 
3472 		i = (maddress - 0x2c0) / 4;
3473 		x = data & 0xffff;
3474 		w = (data >> 16) & 0xffff;
3475 		clippingwindows[i].setx(x, x + w - 1);
3476 	}
3477 	if ((maddress >= 0x02e0) && (maddress < 0x300)) {
3478 		int y, h, i;
3479 
3480 		i = (maddress - 0x2e0) / 4;
3481 		y = data & 0xffff;
3482 		h = (data >> 16) & 0xffff;
3483 		clippingwindows[i].sety(y, y + h - 1);
3484 	}
3485 	if (maddress == 0x0200) {
3486 		compute_limits_rendertarget(chanel, subchannel);
3487 		compute_size_rendertarget(chanel, subchannel);
3488 	}
3489 	if (maddress == 0x0204) {
3490 		compute_limits_rendertarget(chanel, subchannel);
3491 		compute_size_rendertarget(chanel, subchannel);
3492 	}
3493 	if (maddress == 0x0208) {
3494 		log2height_rendertarget = (data >> 24) & 255;
3495 		log2width_rendertarget = (data >> 16) & 255;
3496 		antialiasing_rendertarget = (data >> 12) & 15;
3497 		type_rendertarget = (NV2A_RT_TYPE)((data >> 8) & 15);
3498 		depthformat_rendertarget = (NV2A_RT_DEPTH_FORMAT)((data >> 4) & 15);
3499 		colorformat_rendertarget = (NV2A_COLOR_FORMAT)((data >> 0) & 15);
3500 		compute_supersample_factors(supersample_factor_x, supersample_factor_y);
3501 		compute_limits_rendertarget(chanel, subchannel);
3502 		compute_size_rendertarget(chanel, subchannel);
3503 /* for debugging
3504         if (limits_rendertarget.max_x == 1023)
3505             type_rendertarget = NV2A_RT_TYPE::LINEAR;
3506 */
3507 		switch (colorformat_rendertarget) {
3508 		case NV2A_COLOR_FORMAT::R5G6B5:
3509 			bytespixel_rendertarget = 2;
3510 			break;
3511 		case NV2A_COLOR_FORMAT::X8R8G8B8_Z8R8G8B8:
3512 		case NV2A_COLOR_FORMAT::X8R8G8B8_X8R8G8B8:
3513 		case NV2A_COLOR_FORMAT::A8R8G8B8:
3514 			bytespixel_rendertarget = 4;
3515 			break;
3516 		case NV2A_COLOR_FORMAT::B8:
3517 			bytespixel_rendertarget = 1;
3518 			break;
3519 		default:
3520 			machine().logerror("Unknown render target color format %d\n\r", int(colorformat_rendertarget));
3521 			bytespixel_rendertarget = 4;
3522 			break;
3523 		}
3524 		dilate_rendertarget = dilatechose[(log2width_rendertarget << 4) + log2height_rendertarget];
3525 	}
3526 	if (maddress == 0x020c) {
3527 		pitch_rendertarget=data & 0xffff;
3528 		pitch_depthbuffer=(data >> 16) & 0xffff;
3529 		compute_size_rendertarget(chanel, subchannel);
3530 #ifdef LOG_NV2A
3531 		printf("Pitch color %04X zbuffer %04X\n\r", pitch_rendertarget, pitch_depthbuffer);
3532 #endif
3533 		countlen--;
3534 	}
3535 	if (maddress == 0x0100) {
3536 		countlen--;
3537 		if (data != 0) {
3538 #ifdef LOG_NV2A
3539 			machine().logerror("Software method %04x\n", data);
3540 #endif
3541 			pgraph[0x704 / 4] = 0x100 | (chanel << 20) | (subchannel << 16);
3542 			pgraph[0x708 / 4] = data;
3543 			pgraph[0x100 / 4] |= 1;
3544 			pgraph[0x108 / 4] |= 1;
3545 			if (update_interrupts() == true)
3546 				irq_callback(1); // IRQ 3
3547 			else
3548 				irq_callback(0); // IRQ 3
3549 			return 2;
3550 		}
3551 		else
3552 			return 0;
3553 	}
3554 	if (maddress == 0x0130) {
3555 		countlen--;
3556 		if (enable_waitvblank == true)
3557 			return 1; // block until next vblank
3558 		else
3559 			return 0;
3560 	}
3561 	if (maddress == 0x1d8c) {
3562 		countlen--;
3563 		// it is used to specify the clear value for the depth buffer (zbuffer)
3564 		// but also as a parameter for interrupt routines
3565 		pgraph[0x1a88 / 4] = data;
3566 	}
3567 	if (maddress == 0x1d90) {
3568 		countlen--;
3569 		// it is used to specify the clear value for the color buffer
3570 		// but also as a parameter for interrupt routines
3571 		pgraph[0x186c / 4] = data;
3572 	}
3573 	if (maddress == 0x0210) {
3574 		// framebuffer offset
3575 		old_rendertarget = rendertarget;
3576 		// To see it with the image watch extension: @mem(0x000002d2263af060, UINT8, 4, 640, 480, 2560)
3577 		rendertarget = (uint32_t *)direct_access_ptr(data);
3578 #ifdef LOG_NV2A
3579 		printf("Render target at %08X\n\r", data);
3580 #endif
3581 		countlen--;
3582 	}
3583 	if (maddress == 0x0214) {
3584 		// zbuffer offset ?
3585 		depthbuffer = (uint32_t *)direct_access_ptr(data);
3586 #ifdef LOG_NV2A
3587 		printf("Depth buffer at %08X\n\r",data);
3588 #endif
3589 		if ((data == 0) || (data > 0x7ffffffc))
3590 			depth_write_enabled = false;
3591 		else if (channel[chanel][subchannel].object.method[0x035c / 4] != 0)
3592 			depth_write_enabled = true;
3593 		else
3594 			depth_write_enabled = false;
3595 		countlen--;
3596 	}
3597 	if (maddress == 0x0300) {
3598 		alpha_test_enabled = data != 0;
3599 	}
3600 	if (maddress == 0x033c) {
3601 		alpha_func = (NV2A_COMPARISON_OP)data;
3602 	}
3603 	if (maddress == 0x0340) {
3604 		alpha_reference = data;
3605 	}
3606 	if (maddress == 0x0304) {
3607 		if (logical_operation_enabled)
3608 			blending_enabled = false;
3609 		else
3610 			blending_enabled = data != 0;
3611 	}
3612 	if (maddress == 0x030c) {
3613 		depth_test_enabled = data != 0;
3614 	}
3615 	if (maddress == 0x0354) {
3616 		depth_function = (NV2A_COMPARISON_OP)data;
3617 	}
3618 	if (maddress == 0x0358) {
3619 		//color_mask = data;
3620 		if (data & 0x000000ff)
3621 			data |= 0x000000ff;
3622 		if (data & 0x0000ff00)
3623 			data |= 0x0000ff00;
3624 		if (data & 0x00ff0000)
3625 			data |= 0x00ff0000;
3626 		if (data & 0xff000000)
3627 			data |= 0xff000000;
3628 		color_mask = data;
3629 	}
3630 	if (maddress == 0x035c) {
3631 		uint32_t g = channel[chanel][subchannel].object.method[0x0214 / 4];
3632 		depth_write_enabled = data != 0;
3633 		if ((g == 0) || (g > 0x7ffffffc))
3634 			depth_write_enabled = false;
3635 	}
3636 	if (maddress == 0x032c) {
3637 		stencil_test_enabled = data != 0;
3638 	}
3639 	if (maddress == 0x0364) {
3640 		stencil_func = (NV2A_COMPARISON_OP)data;
3641 	}
3642 	if (maddress == 0x0368) {
3643 		if (data > 255)
3644 			data = 255;
3645 		stencil_ref = data;
3646 	}
3647 	if (maddress == 0x036c) {
3648 		stencil_mask = data;
3649 	}
3650 	if (maddress == 0x0370) {
3651 		stencil_op_fail = (NV2A_STENCIL_OP)data;
3652 	}
3653 	if (maddress == 0x0374) {
3654 		stencil_op_zfail = (NV2A_STENCIL_OP)data;
3655 	}
3656 	if (maddress == 0x0378) {
3657 		stencil_op_zpass = (NV2A_STENCIL_OP)data;
3658 	}
3659 	if (maddress == 0x0344) {
3660 		blend_function_source = (NV2A_BLEND_FACTOR)data;
3661 	}
3662 	if (maddress == 0x0348) {
3663 		blend_function_destination = (NV2A_BLEND_FACTOR)data;
3664 	}
3665 	if (maddress == 0x034c) {
3666 		blend_color = data;
3667 	}
3668 	if (maddress == 0x0350) {
3669 		blend_equation = (NV2A_BLEND_EQUATION)data;
3670 	}
3671 	if (maddress == 0x0d40) {
3672 		if (data != 0)
3673 			blending_enabled = false;
3674 		else
3675 			blending_enabled = channel[chanel][subchannel].object.method[0x0304 / 4] != 0;
3676 		logical_operation_enabled = data != 0;
3677 	}
3678 	if (maddress == 0x0d44) {
3679 		logical_operation = (NV2A_LOGIC_OP)data;
3680 	}
3681 	// Texture Units
3682 	if ((maddress >= 0x1b00) && (maddress < 0x1c00)) {
3683 		int unit;//,off;
3684 
3685 		unit = (maddress >> 6) & 3;
3686 		//off=maddress & 0xc0;
3687 		maddress = maddress & ~0xc0;
3688 		if (maddress == 0x1b00) {
3689 			uint32_t offset;//,base;
3690 			//uint32_t dmahand,dmaoff,dmasiz;
3691 
3692 			offset = data;
3693 			texture[unit].buffer = direct_access_ptr(offset);
3694 			/*if (dma0 != 0) {
3695 			    dmahand=channel[channel][subchannel].object.method[0x184/4];
3696 			    geforce_read_dma_object(dmahand,dmaoff,dmasiz);
3697 			} else if (dma1 != 0) {
3698 			    dmahand=channel[channel][subchannel].object.method[0x188/4];
3699 			    geforce_read_dma_object(dmahand,dmaoff,dmasiz);
3700 			}*/
3701 		}
3702 		if (maddress == 0x1b04) {
3703 			int basesizeu, basesizev, basesizew, format;
3704 			bool rectangle;
3705 
3706 			texture[unit].dma0 = (data >> 0) & 1;
3707 			texture[unit].dma1 = (data >> 1) & 1;
3708 			texture[unit].cubic = (data >> 2) & 1;
3709 			texture[unit].noborder = (data >> 3) & 1;
3710 			texture[unit].dims = (data >> 4) & 15;
3711 			texture[unit].mipmap = (data >> 19) & 1;
3712 			format = (data >> 8) & 255;
3713 			basesizeu = (data >> 20) & 15;
3714 			basesizev = (data >> 24) & 15;
3715 			basesizew = (data >> 28) & 15;
3716 			texture[unit].sizes = 1 << basesizeu;
3717 			texture[unit].sizet = 1 << basesizev;
3718 			texture[unit].sizer = 1 << basesizew;
3719 			texture[unit].dilate = dilatechose[(basesizeu << 4) + basesizev];
3720 			texture[unit].format = (NV2A_TEX_FORMAT)format;
3721 			switch (texture[unit].format)
3722 			{
3723 			case NV2A_TEX_FORMAT::A1R5G5B5_RECT:
3724 			case NV2A_TEX_FORMAT::R5G6B5_RECT:
3725 			case NV2A_TEX_FORMAT::A8R8G8B8_RECT:
3726 			case NV2A_TEX_FORMAT::DSDT8_RECT:
3727 			case NV2A_TEX_FORMAT::A4R4G4B4_RECT:
3728 			case NV2A_TEX_FORMAT::R8G8B8_RECT:
3729 			case NV2A_TEX_FORMAT::A8L8_RECT:
3730 			case NV2A_TEX_FORMAT::Z24_RECT:
3731 			case NV2A_TEX_FORMAT::Z16_RECT:
3732 			case NV2A_TEX_FORMAT::HILO16_RECT:
3733 			case NV2A_TEX_FORMAT::SIGNED_HILO8_RECT:
3734 				rectangle = true;
3735 				break;
3736 			default:
3737 				rectangle = false;
3738 			}
3739 			texture[unit].rectangle = rectangle;
3740 			if (debug_grab_texttype == format) {
3741 				FILE *f;
3742 				int written;
3743 
3744 				debug_grab_texttype = -1;
3745 				f = fopen(debug_grab_textfile, "wb");
3746 				if (f) {
3747 					written = (int)fwrite(texture[unit].buffer, texture[unit].sizes * texture[unit].sizet * 4, 1, f);
3748 					fclose(f);
3749 					machine().logerror("Written %d bytes of texture to specified file\n", written);
3750 				}
3751 				else
3752 					machine().logerror("Unable to save texture to specified file\n");
3753 			}
3754 		}
3755 		if (maddress == 0x1b08) {
3756 			texture[unit].addrmodes = (data >> 0) & 15;
3757 			texture[unit].addrmodet = (data >> 8) & 15;
3758 			texture[unit].addrmoder = (data >> 16) & 15;
3759 		}
3760 		if (maddress == 0x1b0c) {
3761 			texture[unit].colorkey = (data >> 0) & 3;
3762 			texture[unit].imagefield = (data >> 3) & 1;
3763 			texture[unit].aniso = (data >> 4) & 3;
3764 			texture[unit].mipmapmaxlod = (data >> 6) & 0xfff;
3765 			texture[unit].mipmapminlod = (data >> 18) & 0xfff;
3766 			// enable texture ?
3767 			texture[unit].enabled = (data >> 30) & 3;
3768 		}
3769 		if (maddress == 0x1b10) {
3770 			texture[unit].rectangle_pitch = data >> 16;
3771 		}
3772 		if (maddress == 0x1b1c) {
3773 			texture[unit].rectheight = data & 0xffff;
3774 			texture[unit].rectwidth = data >> 16;
3775 		}
3776 		countlen--;
3777 	}
3778 	if (maddress == 0x1e70) {
3779 		texture[0].mode = data & 31;
3780 		texture[1].mode = (data >> 5) & 31;
3781 		texture[2].mode = (data >> 10) & 31;
3782 		texture[3].mode = (data >> 15) & 31;
3783 	}
3784 	// projection matrix
3785 	if ((maddress >= 0x0440) && (maddress < 0x0480)) {
3786 		maddress = (maddress - 0x0440) / 4;
3787 		*(uint32_t *)(&matrix.projection[maddress >> 2][maddress & 3]) = data;
3788 		countlen--;
3789 	}
3790 	// modelview matrix
3791 	if ((maddress >= 0x0480) && (maddress < 0x04c0)) {
3792 		maddress = (maddress - 0x0480) / 4;
3793 		/* the modelview matrix is obtained by direct3d by multiplying the world matrix and the view matrix
3794 		    modelview = world * view
3795 		   given a point in 3d space with coordinates x y and z, to find te transformed coordinates
3796 		   first create a row vector with components (x,y,z,1) then multiply the vector by the matrix
3797 		    transformed = rowvector * matrix
3798 		   in direct3d the matrix is stored as the sequence (first digit row, second digit column)
3799 		    11 12 13 14
3800 		    21 22 23 24
3801 		    31 32 33 34
3802 		    41 42 43 44
3803 		   but it is sent transposed as the sequence
3804 		    11 21 31 41 12 22 32 42 13 23 33 43 14 24 34 44
3805 		   so in matrix.modelview[x][y] x is the column and y is the row of the direct3d matrix
3806 		*/
3807 		*(uint32_t *)(&matrix.modelview[maddress >> 2][maddress & 3]) = data;
3808 		countlen--;
3809 	}
3810 	// inverse modelview matrix
3811 	if ((maddress >= 0x0580) && (maddress < 0x05c0)) {
3812 		maddress = (maddress - 0x0580) / 4;
3813 		*(uint32_t *)(&matrix.modelview_inverse[maddress >> 2][maddress & 3]) = data;
3814 		countlen--;
3815 	}
3816 	// composite matrix
3817 	if ((maddress >= 0x0680) && (maddress < 0x06c0)) {
3818 		maddress = (maddress - 0x0680) / 4;
3819 		/* the composite matrix is computed by direct3d by multiplying the
3820 		   world, view, projection and viewport matrices
3821 		    composite = world * view * projection * viewport
3822 		   the viewport matrix applies the viewport scale and offset
3823 		 */
3824 		*(uint32_t *)(&matrix.composite[maddress >> 2][maddress & 3]) = data;
3825 		countlen--;
3826 	}
3827 	// viewport translate
3828 	if ((maddress >= 0x0a20) && (maddress < 0x0a30)) {
3829 		maddress = (maddress - 0x0a20) / 4;
3830 		*(uint32_t *)(&matrix.translate[maddress]) = data;
3831 		// set corresponding vertex shader constant too
3832 		vertexprogram.exec.c_constant[59].iv(maddress, data); // constant -37
3833 #ifdef LOG_NV2A
3834 		if (maddress == 3)
3835 			machine().logerror("viewport translate = {%f %f %f %f}\n", matrix.translate[0], matrix.translate[1], matrix.translate[2], matrix.translate[3]);
3836 #endif
3837 		countlen--;
3838 	}
3839 	// viewport scale
3840 	if ((maddress >= 0x0af0) && (maddress < 0x0b00)) {
3841 		maddress = (maddress - 0x0af0) / 4;
3842 		*(uint32_t *)(&matrix.scale[maddress]) = data;
3843 		// set corresponding vertex shader constant too
3844 		vertexprogram.exec.c_constant[58].iv(maddress, data); // constant -38
3845 #ifdef LOG_NV2A
3846 		if (maddress == 3)
3847 			machine().logerror("viewport scale = {%f %f %f %f}\n", matrix.scale[0], matrix.scale[1], matrix.scale[2], matrix.scale[3]);
3848 #endif
3849 		countlen--;
3850 	}
3851 	// Vertex program (shader)
3852 	if (maddress == 0x1e94) {
3853 		/*if (data == 2)
3854 		machine().logerror("Enabled vertex program\n");
3855 		else if (data == 4)
3856 		machine().logerror("Enabled fixed function pipeline\n");
3857 		else if (data == 6)
3858 		machine().logerror("Enabled both fixed function pipeline and vertex program ?\n");
3859 		else
3860 		machine().logerror("Unknown value %d to method 0x1e94\n",data);*/
3861 		vertex_pipeline = data & 6;
3862 		countlen--;
3863 	}
3864 	if (maddress == 0x1e9c) {
3865 		//machine().logerror("VP_UPLOAD_FROM_ID %d\n",data);
3866 		vertexprogram.upload_instruction_index = data;
3867 		vertexprogram.upload_instruction_component = 0;
3868 		countlen--;
3869 	}
3870 	if (maddress == 0x1ea0) {
3871 		//machine().logerror("VP_START_FROM_ID %d\n",data);
3872 		vertexprogram.instructions = vertexprogram.upload_instruction_index;
3873 		vertexprogram.start_instruction = data;
3874 		countlen--;
3875 	}
3876 	if (maddress == 0x1ea4) {
3877 		//machine().logerror("VP_UPLOAD_CONST_ID %d\n",data);
3878 		vertexprogram.upload_parameter_index = data;
3879 		vertexprogram.upload_parameter_component = 0;
3880 		countlen--;
3881 	}
3882 	if ((maddress >= 0x0b00) && (maddress < 0x0b80)) {
3883 		//machine().logerror("VP_UPLOAD_INST\n");
3884 		if (vertexprogram.upload_instruction_index < 256) {
3885 			vertexprogram.exec.op[vertexprogram.upload_instruction_index].i[vertexprogram.upload_instruction_component] = data;
3886 			vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified |= (1 << vertexprogram.upload_instruction_component);
3887 		}
3888 		else
3889 			machine().logerror("Need to increase size of vertexprogram.instruction to %d\n\r", vertexprogram.upload_instruction_index);
3890 		if (vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified == 15) {
3891 			vertexprogram.exec.op[vertexprogram.upload_instruction_index].modified = 0;
3892 			vertexprogram.exec.decode_instruction(vertexprogram.upload_instruction_index);
3893 		}
3894 		vertexprogram.upload_instruction_component++;
3895 		if (vertexprogram.upload_instruction_component >= 4) {
3896 			vertexprogram.upload_instruction_component = 0;
3897 			vertexprogram.upload_instruction_index++;
3898 		}
3899 	}
3900 	if ((maddress >= 0x0b80) && (maddress < 0x0c00)) {
3901 		//machine().logerror("VP_UPLOAD_CONST\n");
3902 		if (vertexprogram.upload_parameter_index < 192) {
3903 			vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].iv(vertexprogram.upload_parameter_component, data);
3904 		}
3905 		else
3906 			machine().logerror("Need to increase size of vertexprogram.parameter to %d\n\r", vertexprogram.upload_parameter_index);
3907 		vertexprogram.upload_parameter_component++;
3908 		if (vertexprogram.upload_parameter_component >= 4) {
3909 #ifdef LOG_NV2A
3910 			if ((vertexprogram.upload_parameter_index == 58) || (vertexprogram.upload_parameter_index == 59))
3911 				machine().logerror("vp constant %d (%s) = {%f %f %f %f}\n", vertexprogram.upload_parameter_index,
3912 					vertexprogram.upload_parameter_index == 58 ? "viewport scale" : "viewport translate",
3913 					vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[0],
3914 					vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[1],
3915 					vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[2],
3916 					vertexprogram.exec.c_constant[vertexprogram.upload_parameter_index].fv[3]);
3917 #endif
3918 			vertexprogram.upload_parameter_component = 0;
3919 			vertexprogram.upload_parameter_index++;
3920 		}
3921 	}
3922 	if ((maddress >= 0x1e80) && (maddress < 0x1e90)) {
3923 		machine().logerror("Setting v0 vertex program input component %d to %f\n", (maddress - 0x1e80) / 4, *((float *)&data));
3924 	}
3925 	if (maddress == 0x1e90) {
3926 		machine().logerror("Received explicit method to run vertex program\n");
3927 	}
3928 	if (maddress == 0x02a8) {
3929 		fog_color = data;
3930 	}
3931 	// Register combiners
3932 	if (maddress == 0x0288) {
3933 		combiner.setup.final.mapin_rgb.D_input = (Combiner::InputRegister)(data & 15);
3934 		combiner.setup.final.mapin_rgb.D_component = (data >> 4) & 1;
3935 		combiner.setup.final.mapin_rgb.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3936 		combiner.setup.final.mapin_rgb.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3937 		combiner.setup.final.mapin_rgb.C_component = (data >> 12) & 1;
3938 		combiner.setup.final.mapin_rgb.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3939 		combiner.setup.final.mapin_rgb.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3940 		combiner.setup.final.mapin_rgb.B_component = (data >> 20) & 1;
3941 		combiner.setup.final.mapin_rgb.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3942 		combiner.setup.final.mapin_rgb.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3943 		combiner.setup.final.mapin_rgb.A_component = (data >> 28) & 1;
3944 		combiner.setup.final.mapin_rgb.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3945 		countlen--;
3946 	}
3947 	if (maddress == 0x028c) {
3948 		combiner.setup.final.color_sum_clamp = (data >> 7) & 1;
3949 		combiner.setup.final.mapin_alpha.G_input = (Combiner::InputRegister)((data >> 8) & 15);
3950 		combiner.setup.final.mapin_alpha.G_component = (data >> 12) & 1;
3951 		combiner.setup.final.mapin_alpha.G_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3952 		combiner.setup.final.mapin_rgb.F_input = (Combiner::InputRegister)((data >> 16) & 15);
3953 		combiner.setup.final.mapin_rgb.F_component = (data >> 20) & 1;
3954 		combiner.setup.final.mapin_rgb.F_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3955 		combiner.setup.final.mapin_rgb.E_input = (Combiner::InputRegister)((data >> 24) & 15);
3956 		combiner.setup.final.mapin_rgb.E_component = (data >> 28) & 1;
3957 		combiner.setup.final.mapin_rgb.E_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3958 		countlen--;
3959 	}
3960 	if ((maddress >= 0x0260) && (maddress < 0x0280)) {
3961 		int n;
3962 
3963 		n = (maddress - 0x0260) >> 2;
3964 		combiner.setup.stage[n].mapin_alpha.D_input = (Combiner::InputRegister)(data & 15);
3965 		combiner.setup.stage[n].mapin_alpha.D_component = (data >> 4) & 1;
3966 		combiner.setup.stage[n].mapin_alpha.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3967 		combiner.setup.stage[n].mapin_alpha.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3968 		combiner.setup.stage[n].mapin_alpha.C_component = (data >> 12) & 1;
3969 		combiner.setup.stage[n].mapin_alpha.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3970 		combiner.setup.stage[n].mapin_alpha.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3971 		combiner.setup.stage[n].mapin_alpha.B_component = (data >> 20) & 1;
3972 		combiner.setup.stage[n].mapin_alpha.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3973 		combiner.setup.stage[n].mapin_alpha.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3974 		combiner.setup.stage[n].mapin_alpha.A_component = (data >> 28) & 1;
3975 		combiner.setup.stage[n].mapin_alpha.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3976 		countlen--;
3977 	}
3978 	if ((maddress >= 0x0ac0) && (maddress < 0x0ae0)) {
3979 		int n;
3980 
3981 		n = (maddress - 0x0ac0) >> 2;
3982 		combiner.setup.stage[n].mapin_rgb.D_input = (Combiner::InputRegister)(data & 15);
3983 		combiner.setup.stage[n].mapin_rgb.D_component = (data >> 4) & 1;
3984 		combiner.setup.stage[n].mapin_rgb.D_mapping = (Combiner::MapFunction)((data >> 5) & 7);
3985 		combiner.setup.stage[n].mapin_rgb.C_input = (Combiner::InputRegister)((data >> 8) & 15);
3986 		combiner.setup.stage[n].mapin_rgb.C_component = (data >> 12) & 1;
3987 		combiner.setup.stage[n].mapin_rgb.C_mapping = (Combiner::MapFunction)((data >> 13) & 7);
3988 		combiner.setup.stage[n].mapin_rgb.B_input = (Combiner::InputRegister)((data >> 16) & 15);
3989 		combiner.setup.stage[n].mapin_rgb.B_component = (data >> 20) & 1;
3990 		combiner.setup.stage[n].mapin_rgb.B_mapping = (Combiner::MapFunction)((data >> 21) & 7);
3991 		combiner.setup.stage[n].mapin_rgb.A_input = (Combiner::InputRegister)((data >> 24) & 15);
3992 		combiner.setup.stage[n].mapin_rgb.A_component = (data >> 28) & 1;
3993 		combiner.setup.stage[n].mapin_rgb.A_mapping = (Combiner::MapFunction)((data >> 29) & 7);
3994 		countlen--;
3995 	}
3996 	if ((maddress >= 0x0a60) && (maddress < 0x0a80)) {
3997 		int n;
3998 
3999 		n = (maddress - 0x0a60) >> 2;
4000 		combiner_argb8_float(data, combiner.setup.stage[n].constantcolor0);
4001 		countlen--;
4002 	}
4003 	if ((maddress >= 0x0a80) && (maddress < 0x0aa0)) {
4004 		int n;
4005 
4006 		n = (maddress - 0x0a80) >> 2;
4007 		combiner_argb8_float(data, combiner.setup.stage[n].constantcolor1);
4008 		countlen--;
4009 	}
4010 	if ((maddress >= 0x0aa0) && (maddress < 0x0ac0)) {
4011 		int n;
4012 
4013 		n = (maddress - 0x0aa0) >> 2;
4014 		combiner.setup.stage[n].mapout_alpha.CD_output = (Combiner::InputRegister)(data & 15);
4015 		combiner.setup.stage[n].mapout_alpha.AB_output = (Combiner::InputRegister)((data >> 4) & 15);
4016 		combiner.setup.stage[n].mapout_alpha.SUM_output = (Combiner::InputRegister)((data >> 8) & 15);
4017 		combiner.setup.stage[n].mapout_alpha.CD_dotproduct = (data >> 12) & 1;
4018 		combiner.setup.stage[n].mapout_alpha.AB_dotproduct = (data >> 13) & 1;
4019 		combiner.setup.stage[n].mapout_alpha.muxsum = (data >> 14) & 1;
4020 		combiner.setup.stage[n].mapout_alpha.bias = (data >> 15) & 1;
4021 		combiner.setup.stage[n].mapout_alpha.scale = (data >> 16) & 3;
4022 		//combiner.=(data >> 27) & 7;
4023 		countlen--;
4024 	}
4025 	if (maddress == 0x1e20) {
4026 		combiner_argb8_float(data, combiner.setup.final.constantcolor0);
4027 		countlen--;
4028 	}
4029 	if (maddress == 0x1e24) {
4030 		combiner_argb8_float(data, combiner.setup.final.constantcolor1);
4031 		countlen--;
4032 	}
4033 	if ((maddress >= 0x1e40) && (maddress < 0x1e60)) {
4034 		int n;
4035 
4036 		n = (maddress - 0x1e40) >> 2;
4037 		combiner.setup.stage[n].mapout_rgb.CD_output = (Combiner::InputRegister)(data & 15);
4038 		combiner.setup.stage[n].mapout_rgb.AB_output = (Combiner::InputRegister)((data >> 4) & 15);
4039 		combiner.setup.stage[n].mapout_rgb.SUM_output = (Combiner::InputRegister)((data >> 8) & 15);
4040 		combiner.setup.stage[n].mapout_rgb.CD_dotproduct = (data >> 12) & 1;
4041 		combiner.setup.stage[n].mapout_rgb.AB_dotproduct = (data >> 13) & 1;
4042 		combiner.setup.stage[n].mapout_rgb.muxsum = (data >> 14) & 1;
4043 		combiner.setup.stage[n].mapout_rgb.bias = (data >> 15) & 1;
4044 		combiner.setup.stage[n].mapout_rgb.scale = (data >> 16) & 3;
4045 		//combiner.=(data >> 27) & 7;
4046 		countlen--;
4047 	}
4048 	if (maddress == 0x1e60) {
4049 		combiner.setup.stages = data & 15;
4050 		countlen--;
4051 	}
4052 	return 0;
4053 }
4054 
execute_method_m2mf(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4055 int nv2a_renderer::execute_method_m2mf(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4056 {
4057 	if (method == 0x0180) {
4058 #ifdef LOG_NV2A
4059 		machine().logerror("m2mf method 0180 notify\n");
4060 #endif
4061 		geforce_read_dma_object(data, dma_offset[10], dma_size[10]);
4062 	}
4063 	return 0;
4064 }
4065 
execute_method_surf2d(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4066 int nv2a_renderer::execute_method_surf2d(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4067 {
4068 	if (method == 0x0184) {
4069 #ifdef LOG_NV2A
4070 		machine().logerror("surf2d method 0184 source\n");
4071 #endif
4072 		geforce_read_dma_object(data, dma_offset[11], dma_size[11]);
4073 	}
4074 	if (method == 0x0188) {
4075 #ifdef LOG_NV2A
4076 		machine().logerror("surf2d method 0188 destination\n");
4077 #endif
4078 		geforce_read_dma_object(data, dma_offset[12], dma_size[12]);
4079 	}
4080 	if (method == 0x0300) {
4081 		bitblit.format = data; // 0xa is a8r8g8b8
4082 	}
4083 	if (method == 0x0304) {
4084 		bitblit.pitch_source = data & 0xffff;
4085 		bitblit.pitch_destination = data >> 16;
4086 	}
4087 	if (method == 0x0308) {
4088 		bitblit.source_address = dma_offset[11] + data;
4089 	}
4090 	if (method == 0x030c) {
4091 		bitblit.destination_address = dma_offset[12] + data;
4092 	}
4093 	return 0;
4094 }
4095 
execute_method_blit(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t method,uint32_t address,uint32_t data,int & countlen)4096 int nv2a_renderer::execute_method_blit(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t method, uint32_t address, uint32_t data, int &countlen)
4097 {
4098 	if (method == 0x019c) {
4099 #ifdef LOG_NV2A
4100 		machine().logerror("blit method 019c surface objecct handle %d\n", data); // set to 0x11
4101 #endif
4102 	}
4103 	if (method == 0x02fc) {
4104 #ifdef LOG_NV2A
4105 		machine().logerror("blit method 02fc operation %d\n", data); // 3 is copy from source to destination
4106 #endif
4107 		bitblit.op = data;
4108 	}
4109 	if (method == 0x0300) {
4110 		bitblit.sourcex = data & 0xffff;
4111 		bitblit.sourcey = data >> 16;
4112 	}
4113 	if (method == 0x0304) {
4114 		bitblit.destinationx = data & 0xffff;
4115 		bitblit.destinationy = data >> 16;
4116 	}
4117 	if (method == 0x0308) {
4118 		bitblit.width = data & 0xffff;
4119 		bitblit.heigth = data >> 16;
4120 		surface_2d_blit();
4121 	}
4122 	return 0;
4123 }
4124 
surface_2d_blit()4125 void nv2a_renderer::surface_2d_blit()
4126 {
4127 	int x, y;
4128 	uint32_t *src, *dest;
4129 	uint32_t *srcrow, *destrow;
4130 
4131 	if (bitblit.format != 0xa) {
4132 		machine().logerror("Unsupported format %d in surface_2d_blit\n", bitblit.format);
4133 		return;
4134 	}
4135 	srcrow = (uint32_t *)direct_access_ptr(bitblit.source_address + bitblit.pitch_source * bitblit.sourcey + bitblit.sourcex * 4);
4136 	destrow = (uint32_t *)direct_access_ptr(bitblit.destination_address + bitblit.pitch_destination * bitblit.destinationy + bitblit.destinationx * 4);
4137 	for (y = 0; y < bitblit.heigth; y++) {
4138 		src = srcrow;
4139 		dest = destrow;
4140 		for (x = 0; x < bitblit.width; x++) {
4141 			*dest = *src;
4142 			dest++;
4143 			src++;
4144 		}
4145 		srcrow += bitblit.pitch_source >> 2;
4146 		destrow += bitblit.pitch_destination >> 2;
4147 	}
4148 }
4149 
toggle_register_combiners_usage()4150 bool nv2a_renderer::toggle_register_combiners_usage()
4151 {
4152 	combiner.used = 1 - combiner.used;
4153 	return combiner.used != 0;
4154 }
4155 
toggle_wait_vblank_support()4156 bool nv2a_renderer::toggle_wait_vblank_support()
4157 {
4158 	enable_waitvblank = !enable_waitvblank;
4159 	return enable_waitvblank;
4160 }
4161 
toggle_clipping_w_support()4162 bool nv2a_renderer::toggle_clipping_w_support()
4163 {
4164 	enable_clipping_w = !enable_clipping_w;
4165 	return enable_clipping_w;
4166 }
4167 
debug_grab_texture(int type,const char * filename)4168 void nv2a_renderer::debug_grab_texture(int type, const char *filename)
4169 {
4170 	debug_grab_texttype = type;
4171 	if (debug_grab_textfile == nullptr)
4172 		debug_grab_textfile = (char *)malloc(128);
4173 	strncpy(debug_grab_textfile, filename, 127);
4174 }
4175 
debug_grab_vertex_program_slot(int slot,uint32_t * instruction)4176 void nv2a_renderer::debug_grab_vertex_program_slot(int slot, uint32_t *instruction)
4177 {
4178 	if (slot >= 1024 / 4)
4179 		return;
4180 	instruction[0] = vertexprogram.exec.op[slot].i[0];
4181 	instruction[1] = vertexprogram.exec.op[slot].i[1];
4182 	instruction[2] = vertexprogram.exec.op[slot].i[2];
4183 	instruction[3] = vertexprogram.exec.op[slot].i[3];
4184 }
4185 
combiner_argb8_float(uint32_t color,float reg[4])4186 void nv2a_renderer::combiner_argb8_float(uint32_t color, float reg[4])
4187 {
4188 	reg[2] = (float)(color & 0xff) / 255.0f;
4189 	reg[1] = (float)((color >> 8) & 0xff) / 255.0f;
4190 	reg[0] = (float)((color >> 16) & 0xff) / 255.0f;
4191 	reg[3] = (float)((color >> 24) & 0xff) / 255.0f;
4192 }
4193 
combiner_float_argb8(float reg[4])4194 uint32_t nv2a_renderer::combiner_float_argb8(float reg[4])
4195 {
4196 	uint32_t r, g, b, a;
4197 
4198 	a = reg[3] * 255.0f;
4199 	b = reg[2] * 255.0f;
4200 	g = reg[1] * 255.0f;
4201 	r = reg[0] * 255.0f;
4202 	return (a << 24) | (r << 16) | (g << 8) | b;
4203 }
4204 
combiner_map_input_select(int id,Combiner::InputRegister code,int index)4205 float nv2a_renderer::combiner_map_input_select(int id, Combiner::InputRegister code, int index)
4206 {
4207 	switch ((int)code) {
4208 	case 0:
4209 	default:
4210 		return combiner.work[id].registers.zero[index];
4211 	case 1:
4212 		return combiner.work[id].registers.color0[index];
4213 	case 2:
4214 		return combiner.work[id].registers.color1[index];
4215 	case 3:
4216 		return combiner.work[id].registers.fogcolor[index];
4217 	case 4:
4218 		return combiner.work[id].registers.primarycolor[index];
4219 	case 5:
4220 		return combiner.work[id].registers.secondarycolor[index];
4221 	case 8:
4222 		return combiner.work[id].registers.texture0color[index];
4223 	case 9:
4224 		return combiner.work[id].registers.texture1color[index];
4225 	case 10:
4226 		return combiner.work[id].registers.texture2color[index];
4227 	case 11:
4228 		return combiner.work[id].registers.texture3color[index];
4229 	case 12:
4230 		return combiner.work[id].registers.spare0[index];
4231 	case 13:
4232 		return combiner.work[id].registers.spare1[index];
4233 	case 14:
4234 		return combiner.work[id].variables.sumclamp[index];
4235 	case 15:
4236 		return combiner.work[id].variables.EF[index];
4237 	}
4238 
4239 	// never executed
4240 	//return 0;
4241 }
4242 
combiner_map_input_select_array(int id,Combiner::InputRegister code)4243 float *nv2a_renderer::combiner_map_input_select_array(int id, Combiner::InputRegister code)
4244 {
4245 	switch ((int)code) {
4246 	case 0:
4247 	default:
4248 		return combiner.work[id].registers.zero;
4249 	case 1:
4250 		return combiner.work[id].registers.color0;
4251 	case 2:
4252 		return combiner.work[id].registers.color1;
4253 	case 3:
4254 		return combiner.work[id].registers.fogcolor;
4255 	case 4:
4256 		return combiner.work[id].registers.primarycolor;
4257 	case 5:
4258 		return combiner.work[id].registers.secondarycolor;
4259 	case 8:
4260 		return combiner.work[id].registers.texture0color;
4261 	case 9:
4262 		return combiner.work[id].registers.texture1color;
4263 	case 10:
4264 		return combiner.work[id].registers.texture2color;
4265 	case 11:
4266 		return combiner.work[id].registers.texture3color;
4267 	case 12:
4268 		return combiner.work[id].registers.spare0;
4269 	case 13:
4270 		return combiner.work[id].registers.spare1;
4271 	case 14:
4272 		return combiner.work[id].variables.sumclamp;
4273 	case 15:
4274 		return combiner.work[id].variables.EF;
4275 	}
4276 
4277 	// never executed
4278 	//return 0;
4279 }
4280 
combiner_map_output_select_array(int id,Combiner::InputRegister code)4281 float *nv2a_renderer::combiner_map_output_select_array(int id, Combiner::InputRegister code)
4282 {
4283 	switch ((int)code) {
4284 	case 0:
4285 		return nullptr;
4286 	case 1:
4287 		return nullptr;
4288 	case 2:
4289 		return nullptr;
4290 	case 3:
4291 		return nullptr;
4292 	case 4:
4293 		return combiner.work[id].registers.primarycolor;
4294 	case 5:
4295 		return combiner.work[id].registers.secondarycolor;
4296 	case 8:
4297 		return combiner.work[id].registers.texture0color;
4298 	case 9:
4299 		return combiner.work[id].registers.texture1color;
4300 	case 10:
4301 		return combiner.work[id].registers.texture2color;
4302 	case 11:
4303 		return combiner.work[id].registers.texture3color;
4304 	case 12:
4305 		return combiner.work[id].registers.spare0;
4306 	case 13:
4307 		return combiner.work[id].registers.spare1;
4308 	case 14:
4309 		return nullptr;
4310 	case 15:
4311 	default:
4312 		return nullptr;
4313 	}
4314 }
4315 
combiner_map_input_function(Combiner::MapFunction code,float value)4316 float nv2a_renderer::combiner_map_input_function(Combiner::MapFunction code, float value)
4317 {
4318 	switch ((int)code) {
4319 	case 0: // unsigned identity
4320 		return std::max(0.0f, value);
4321 	case 1: // unsigned invert
4322 		return 1.0f - std::min(std::max(value, 0.0f), 1.0f);
4323 	case 2: // expand normal
4324 		return 2.0f * std::max(0.0f, value) - 1.0f;
4325 	case 3: // expand negate
4326 		return -2.0f * std::max(0.0f, value) + 1.0f;
4327 	case 4: // half bias normal
4328 		return std::max(0.0f, value) - 0.5f;
4329 	case 5: // half bias negate
4330 		return -std::max(0.0f, value) + 0.5f;
4331 	case 6: // signed identyty
4332 		return value;
4333 	case 7: // signed negate
4334 	default:
4335 		return -value;
4336 	}
4337 
4338 	// never executed
4339 	//return 0;
4340 }
4341 
combiner_map_input_function_array(Combiner::MapFunction code,float * data)4342 void nv2a_renderer::combiner_map_input_function_array(Combiner::MapFunction code, float *data)
4343 {
4344 	switch ((int)code) {
4345 	case 0:
4346 		data[0] = std::max(0.0f, data[0]);
4347 		data[1] = std::max(0.0f, data[1]);
4348 		data[2] = std::max(0.0f, data[2]);
4349 		break;
4350 	case 1:
4351 		data[0] = 1.0f - std::min(std::max(data[0], 0.0f), 1.0f);
4352 		data[1] = 1.0f - std::min(std::max(data[1], 0.0f), 1.0f);
4353 		data[2] = 1.0f - std::min(std::max(data[2], 0.0f), 1.0f);
4354 		break;
4355 	case 2:
4356 		data[0] = 2.0f * std::max(0.0f, data[0]) - 1.0f;
4357 		data[1] = 2.0f * std::max(0.0f, data[1]) - 1.0f;
4358 		data[2] = 2.0f * std::max(0.0f, data[2]) - 1.0f;
4359 		break;
4360 	case 3:
4361 		data[0] = -2.0f * std::max(0.0f, data[0]) + 1.0f;
4362 		data[1] = -2.0f * std::max(0.0f, data[1]) + 1.0f;
4363 		data[2] = -2.0f * std::max(0.0f, data[2]) + 1.0f;
4364 		break;
4365 	case 4:
4366 		data[0] = std::max(0.0f, data[0]) - 0.5f;
4367 		data[1] = std::max(0.0f, data[1]) - 0.5f;
4368 		data[2] = std::max(0.0f, data[2]) - 0.5f;
4369 		break;
4370 	case 5:
4371 		data[0] = -std::max(0.0f, data[0]) + 0.5f;
4372 		data[1] = -std::max(0.0f, data[1]) + 0.5f;
4373 		data[2] = -std::max(0.0f, data[2]) + 0.5f;
4374 		break;
4375 	case 6:
4376 		return;
4377 	case 7:
4378 	default:
4379 		data[0] = -data[0];
4380 		data[1] = -data[1];
4381 		data[2] = -data[2];
4382 		break;
4383 	}
4384 }
4385 
combiner_initialize_registers(int id,float rgba[6][4])4386 void nv2a_renderer::combiner_initialize_registers(int id, float rgba[6][4])
4387 {
4388 	for (int n = 0; n < 4; n++) {
4389 		combiner.work[id].registers.primarycolor[n] = rgba[0][n];
4390 		combiner.work[id].registers.secondarycolor[n] = rgba[1][n];
4391 		combiner.work[id].registers.texture0color[n] = rgba[2][n];
4392 		combiner.work[id].registers.texture1color[n] = rgba[3][n];
4393 		combiner.work[id].registers.texture2color[n] = rgba[4][n];
4394 		combiner.work[id].registers.texture3color[n] = rgba[5][n];
4395 		combiner.work[id].registers.fogcolor[n] = rgba[6][n];
4396 	}
4397 	combiner.work[id].registers.spare0[3] = combiner.work[id].registers.texture0color[3]; // alpha of spare 0 must be the alpha of the pixel from texture 0
4398 	combiner.work[id].registers.zero[0] = combiner.work[id].registers.zero[1] = combiner.work[id].registers.zero[2] = combiner.work[id].registers.zero[3] = 0;
4399 }
4400 
combiner_initialize_stage(int id,int stage_number)4401 void nv2a_renderer::combiner_initialize_stage(int id, int stage_number)
4402 {
4403 	int n = stage_number;
4404 
4405 	// put register_constantcolor0 in register_color0
4406 	combiner.work[id].registers.color0[0] = combiner.setup.stage[n].constantcolor0[0];
4407 	combiner.work[id].registers.color0[1] = combiner.setup.stage[n].constantcolor0[1];
4408 	combiner.work[id].registers.color0[2] = combiner.setup.stage[n].constantcolor0[2];
4409 	combiner.work[id].registers.color0[3] = combiner.setup.stage[n].constantcolor0[3];
4410 	// put register_constantcolor1 in register_color1
4411 	combiner.work[id].registers.color1[0] = combiner.setup.stage[n].constantcolor1[0];
4412 	combiner.work[id].registers.color1[1] = combiner.setup.stage[n].constantcolor1[1];
4413 	combiner.work[id].registers.color1[2] = combiner.setup.stage[n].constantcolor1[2];
4414 	combiner.work[id].registers.color1[3] = combiner.setup.stage[n].constantcolor1[3];
4415 }
4416 
combiner_initialize_final(int id)4417 void nv2a_renderer::combiner_initialize_final(int id)
4418 {
4419 	// put register_constantcolor0 in register_color0
4420 	combiner.work[id].registers.color0[0] = combiner.setup.final.constantcolor0[0];
4421 	combiner.work[id].registers.color0[1] = combiner.setup.final.constantcolor0[1];
4422 	combiner.work[id].registers.color0[2] = combiner.setup.final.constantcolor0[2];
4423 	combiner.work[id].registers.color0[3] = combiner.setup.final.constantcolor0[3];
4424 	// put register_constantcolor1 in register_color1
4425 	combiner.work[id].registers.color1[0] = combiner.setup.final.constantcolor1[0];
4426 	combiner.work[id].registers.color1[1] = combiner.setup.final.constantcolor1[1];
4427 	combiner.work[id].registers.color1[2] = combiner.setup.final.constantcolor1[2];
4428 	combiner.work[id].registers.color1[3] = combiner.setup.final.constantcolor1[3];
4429 }
4430 
combiner_map_stage_input(int id,int stage_number)4431 void nv2a_renderer::combiner_map_stage_input(int id, int stage_number)
4432 {
4433 	int n = stage_number;
4434 	int c, d, i;
4435 	float v, *pv;
4436 
4437 	// rgb portion
4438 	// A
4439 	// get pointer to rgb components of selected input register
4440 	pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.A_input);
4441 	c = combiner.setup.stage[n].mapin_rgb.A_component * 3;
4442 	i = combiner.setup.stage[n].mapin_rgb.A_component ^ 1;
4443 	// copy components to A
4444 	for (d = 0; d < 3; d++) {
4445 		combiner.work[id].variables.A[d] = pv[c];
4446 		c += i;
4447 	}
4448 	// apply mapping function
4449 	combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.A_mapping, combiner.work[id].variables.A);
4450 	// B
4451 	pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.B_input);
4452 	c = combiner.setup.stage[n].mapin_rgb.B_component * 3;
4453 	i = combiner.setup.stage[n].mapin_rgb.B_component ^ 1;
4454 	for (d = 0; d < 3; d++) {
4455 		combiner.work[id].variables.B[d] = pv[c];
4456 		c += i;
4457 	}
4458 	combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.B_mapping, combiner.work[id].variables.B);
4459 	// C
4460 	pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.C_input);
4461 	c = combiner.setup.stage[n].mapin_rgb.C_component * 3;
4462 	i = combiner.setup.stage[n].mapin_rgb.C_component ^ 1;
4463 	for (d = 0; d < 3; d++) {
4464 		combiner.work[id].variables.C[d] = pv[c];
4465 		c += i;
4466 	}
4467 	combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.C_mapping, combiner.work[id].variables.C);
4468 	// D
4469 	pv = combiner_map_input_select_array(id, combiner.setup.stage[n].mapin_rgb.D_input);
4470 	c = combiner.setup.stage[n].mapin_rgb.D_component * 3;
4471 	i = combiner.setup.stage[n].mapin_rgb.D_component ^ 1;
4472 	for (d = 0; d < 3; d++) {
4473 		combiner.work[id].variables.D[d] = pv[c];
4474 		c += i;
4475 	}
4476 	combiner_map_input_function_array(combiner.setup.stage[n].mapin_rgb.D_mapping, combiner.work[id].variables.D);
4477 
4478 	// alpha portion
4479 	// A
4480 	// get component (blue or alpha) from selected input
4481 	v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.A_input, 2 + combiner.setup.stage[n].mapin_alpha.A_component);
4482 	// copy component to A
4483 	combiner.work[id].variables.A[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.A_mapping, v);
4484 	// B
4485 	v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.B_input, 2 + combiner.setup.stage[n].mapin_alpha.B_component);
4486 	combiner.work[id].variables.B[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.B_mapping, v);
4487 	// C
4488 	v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.C_input, 2 + combiner.setup.stage[n].mapin_alpha.C_component);
4489 	combiner.work[id].variables.C[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.C_mapping, v);
4490 	// D
4491 	v = combiner_map_input_select(id, combiner.setup.stage[n].mapin_alpha.D_input, 2 + combiner.setup.stage[n].mapin_alpha.D_component);
4492 	combiner.work[id].variables.D[3] = combiner_map_input_function(combiner.setup.stage[n].mapin_alpha.D_mapping, v);
4493 }
4494 
combiner_map_stage_output(int id,int stage_number)4495 void nv2a_renderer::combiner_map_stage_output(int id, int stage_number)
4496 {
4497 	int n = stage_number;
4498 	float *f;
4499 
4500 	// rgb
4501 	f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.AB_output);
4502 	if (f) {
4503 		f[0] = combiner.work[id].functions.RGBop1[0];
4504 		f[1] = combiner.work[id].functions.RGBop1[1];
4505 		f[2] = combiner.work[id].functions.RGBop1[2];
4506 	}
4507 	f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.CD_output);
4508 	if (f) {
4509 		f[0] = combiner.work[id].functions.RGBop2[0];
4510 		f[1] = combiner.work[id].functions.RGBop2[1];
4511 		f[2] = combiner.work[id].functions.RGBop2[2];
4512 	}
4513 	if ((combiner.setup.stage[n].mapout_rgb.AB_dotproduct | combiner.setup.stage[n].mapout_rgb.CD_dotproduct) == 0) {
4514 		f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_rgb.SUM_output);
4515 		if (f) {
4516 			f[0] = combiner.work[id].functions.RGBop3[0];
4517 			f[1] = combiner.work[id].functions.RGBop3[1];
4518 			f[2] = combiner.work[id].functions.RGBop3[2];
4519 		}
4520 	}
4521 	// alpha
4522 	f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.AB_output);
4523 	if (f)
4524 		f[3] = combiner.work[id].functions.Aop1;
4525 	f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.CD_output);
4526 	if (f)
4527 		f[3] = combiner.work[id].functions.Aop2;
4528 	f = combiner_map_output_select_array(id, combiner.setup.stage[n].mapout_alpha.SUM_output);
4529 	if (f)
4530 		f[3] = combiner.work[id].functions.Aop3;
4531 }
4532 
combiner_map_final_input(int id)4533 void nv2a_renderer::combiner_map_final_input(int id)
4534 {
4535 	int c, d, i;
4536 	float *pv;
4537 
4538 	// E
4539 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.E_input);
4540 	c = combiner.setup.final.mapin_rgb.E_component * 3;
4541 	i = combiner.setup.final.mapin_rgb.E_component ^ 1;
4542 	for (d = 0; d < 3; d++) {
4543 		combiner.work[id].variables.E[d] = pv[c];
4544 		c += i;
4545 	}
4546 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.E_mapping, combiner.work[id].variables.E);
4547 	// F
4548 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.F_input);
4549 	c = combiner.setup.final.mapin_rgb.F_component * 3;
4550 	i = combiner.setup.final.mapin_rgb.F_component ^ 1;
4551 	for (d = 0; d < 3; d++) {
4552 		combiner.work[id].variables.F[d] = pv[c];
4553 		c += i;
4554 	}
4555 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.F_mapping, combiner.work[id].variables.F);
4556 	// EF
4557 	combiner.work[id].variables.EF[0] = combiner.work[id].variables.E[0] * combiner.work[id].variables.F[0];
4558 	combiner.work[id].variables.EF[1] = combiner.work[id].variables.E[1] * combiner.work[id].variables.F[1];
4559 	combiner.work[id].variables.EF[2] = combiner.work[id].variables.E[2] * combiner.work[id].variables.F[2];
4560 	// sumclamp
4561 	combiner.work[id].variables.sumclamp[0] = std::max(0.0f, combiner.work[id].registers.spare0[0]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[0]);
4562 	combiner.work[id].variables.sumclamp[1] = std::max(0.0f, combiner.work[id].registers.spare0[1]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[1]);
4563 	combiner.work[id].variables.sumclamp[2] = std::max(0.0f, combiner.work[id].registers.spare0[2]) + std::max(0.0f, combiner.work[id].registers.secondarycolor[2]);
4564 	if (combiner.setup.final.color_sum_clamp != 0) {
4565 		combiner.work[id].variables.sumclamp[0] = std::min(combiner.work[id].variables.sumclamp[0], 1.0f);
4566 		combiner.work[id].variables.sumclamp[1] = std::min(combiner.work[id].variables.sumclamp[1], 1.0f);
4567 		combiner.work[id].variables.sumclamp[2] = std::min(combiner.work[id].variables.sumclamp[2], 1.0f);
4568 	}
4569 	// A
4570 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.A_input);
4571 	c = combiner.setup.final.mapin_rgb.A_component * 3;
4572 	i = combiner.setup.final.mapin_rgb.A_component ^ 1;
4573 	for (d = 0; d < 3; d++) {
4574 		combiner.work[id].variables.A[d] = pv[c];
4575 		c += i;
4576 	}
4577 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.A_mapping, combiner.work[id].variables.A);
4578 	// B
4579 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.B_input);
4580 	c = combiner.setup.final.mapin_rgb.B_component * 3;
4581 	i = combiner.setup.final.mapin_rgb.B_component ^ 1;
4582 	for (d = 0; d < 3; d++) {
4583 		combiner.work[id].variables.B[d] = pv[c];
4584 		c += i;
4585 	}
4586 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.B_mapping, combiner.work[id].variables.B);
4587 	// C
4588 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.C_input);
4589 	c = combiner.setup.final.mapin_rgb.C_component * 3;
4590 	i = combiner.setup.final.mapin_rgb.C_component ^ 1;
4591 	for (d = 0; d < 3; d++) {
4592 		combiner.work[id].variables.C[d] = pv[c];
4593 		c += i;
4594 	}
4595 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.C_mapping, combiner.work[id].variables.C);
4596 	// D
4597 	pv = combiner_map_input_select_array(id, combiner.setup.final.mapin_rgb.D_input);
4598 	c = combiner.setup.final.mapin_rgb.D_component * 3;
4599 	i = combiner.setup.final.mapin_rgb.D_component ^ 1;
4600 	for (d = 0; d < 3; d++) {
4601 		combiner.work[id].variables.D[d] = pv[c];
4602 		c += i;
4603 	}
4604 	combiner_map_input_function_array(combiner.setup.final.mapin_rgb.D_mapping, combiner.work[id].variables.D);
4605 	// G
4606 	combiner.work[id].variables.G = combiner_map_input_select(id, combiner.setup.final.mapin_alpha.G_input, 2 + combiner.setup.final.mapin_alpha.G_component);
4607 }
4608 
combiner_final_output(int id)4609 void nv2a_renderer::combiner_final_output(int id)
4610 {
4611 	// rgb
4612 	combiner.work[id].output[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + (1.0f - combiner.work[id].variables.A[0])*combiner.work[id].variables.C[0] + combiner.work[id].variables.D[0];
4613 	combiner.work[id].output[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + (1.0f - combiner.work[id].variables.A[1])*combiner.work[id].variables.C[1] + combiner.work[id].variables.D[1];
4614 	combiner.work[id].output[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2] + (1.0f - combiner.work[id].variables.A[2])*combiner.work[id].variables.C[2] + combiner.work[id].variables.D[2];
4615 	combiner.work[id].output[0] = std::min(combiner.work[id].output[0], 2.0f);
4616 	combiner.work[id].output[1] = std::min(combiner.work[id].output[1], 2.0f);
4617 	combiner.work[id].output[2] = std::min(combiner.work[id].output[2], 2.0f);
4618 	// a
4619 	combiner.work[id].output[3] = combiner_map_input_function(combiner.setup.final.mapin_alpha.G_mapping, combiner.work[id].variables.G);
4620 }
4621 
combiner_function_AB(int id,float result[4])4622 void nv2a_renderer::combiner_function_AB(int id, float result[4])
4623 {
4624 	result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0];
4625 	result[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1];
4626 	result[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2];
4627 }
4628 
combiner_function_AdotB(int id,float result[4])4629 void nv2a_renderer::combiner_function_AdotB(int id, float result[4])
4630 {
4631 	result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2];
4632 	result[1] = result[0];
4633 	result[2] = result[0];
4634 }
4635 
combiner_function_CD(int id,float result[4])4636 void nv2a_renderer::combiner_function_CD(int id, float result[4])
4637 {
4638 	result[0] = combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0];
4639 	result[1] = combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1];
4640 	result[2] = combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4641 }
4642 
combiner_function_CdotD(int id,float result[4])4643 void nv2a_renderer::combiner_function_CdotD(int id, float result[4])
4644 {
4645 	result[0] = combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0] + combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1] + combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4646 	result[1] = result[0];
4647 	result[2] = result[0];
4648 }
4649 
combiner_function_ABmuxCD(int id,float result[4])4650 void nv2a_renderer::combiner_function_ABmuxCD(int id, float result[4])
4651 {
4652 	if (combiner.work[id].registers.spare0[3] >= 0.5f)
4653 		combiner_function_AB(id, result);
4654 	else
4655 		combiner_function_CD(id, result);
4656 }
4657 
combiner_function_ABsumCD(int id,float result[4])4658 void nv2a_renderer::combiner_function_ABsumCD(int id, float result[4])
4659 {
4660 	result[0] = combiner.work[id].variables.A[0] * combiner.work[id].variables.B[0] + combiner.work[id].variables.C[0] * combiner.work[id].variables.D[0];
4661 	result[1] = combiner.work[id].variables.A[1] * combiner.work[id].variables.B[1] + combiner.work[id].variables.C[1] * combiner.work[id].variables.D[1];
4662 	result[2] = combiner.work[id].variables.A[2] * combiner.work[id].variables.B[2] + combiner.work[id].variables.C[2] * combiner.work[id].variables.D[2];
4663 }
4664 
combiner_compute_rgb_outputs(int id,int stage_number)4665 void nv2a_renderer::combiner_compute_rgb_outputs(int id, int stage_number)
4666 {
4667 	int n = stage_number;
4668 	int m;
4669 	float bias, scale;
4670 
4671 	// select bias and scale
4672 	if (combiner.setup.stage[n].mapout_rgb.bias)
4673 		bias = -0.5;
4674 	else
4675 		bias = 0;
4676 	switch (combiner.setup.stage[n].mapout_rgb.scale) {
4677 	case 0:
4678 	default:
4679 		scale = 1.0;
4680 		break;
4681 	case 1:
4682 		scale = 2.0;
4683 		break;
4684 	case 2:
4685 		scale = 4.0;
4686 		break;
4687 	case 3:
4688 		scale = 0.5;
4689 		break;
4690 	}
4691 	// first
4692 	if (combiner.setup.stage[n].mapout_rgb.AB_dotproduct) {
4693 		m = 1;
4694 		combiner_function_AdotB(id, combiner.work[id].functions.RGBop1);
4695 	}
4696 	else {
4697 		m = 0;
4698 		combiner_function_AB(id, combiner.work[id].functions.RGBop1);
4699 	}
4700 	combiner.work[id].functions.RGBop1[0] = std::max(std::min((combiner.work[id].functions.RGBop1[0] + bias) * scale, 1.0f), -1.0f);
4701 	combiner.work[id].functions.RGBop1[1] = std::max(std::min((combiner.work[id].functions.RGBop1[1] + bias) * scale, 1.0f), -1.0f);
4702 	combiner.work[id].functions.RGBop1[2] = std::max(std::min((combiner.work[id].functions.RGBop1[2] + bias) * scale, 1.0f), -1.0f);
4703 	// second
4704 	if (combiner.setup.stage[n].mapout_rgb.CD_dotproduct) {
4705 		m = m | 1;
4706 		combiner_function_CdotD(id, combiner.work[id].functions.RGBop2);
4707 	}
4708 	else
4709 		combiner_function_CD(id, combiner.work[id].functions.RGBop2);
4710 	combiner.work[id].functions.RGBop2[0] = std::max(std::min((combiner.work[id].functions.RGBop2[0] + bias) * scale, 1.0f), -1.0f);
4711 	combiner.work[id].functions.RGBop2[1] = std::max(std::min((combiner.work[id].functions.RGBop2[1] + bias) * scale, 1.0f), -1.0f);
4712 	combiner.work[id].functions.RGBop2[2] = std::max(std::min((combiner.work[id].functions.RGBop2[2] + bias) * scale, 1.0f), -1.0f);
4713 	// third
4714 	if (m == 0) {
4715 		if (combiner.setup.stage[n].mapout_rgb.muxsum)
4716 			combiner_function_ABmuxCD(id, combiner.work[id].functions.RGBop3);
4717 		else
4718 			combiner_function_ABsumCD(id, combiner.work[id].functions.RGBop3);
4719 		combiner.work[id].functions.RGBop3[0] = std::max(std::min((combiner.work[id].functions.RGBop3[0] + bias) * scale, 1.0f), -1.0f);
4720 		combiner.work[id].functions.RGBop3[1] = std::max(std::min((combiner.work[id].functions.RGBop3[1] + bias) * scale, 1.0f), -1.0f);
4721 		combiner.work[id].functions.RGBop3[2] = std::max(std::min((combiner.work[id].functions.RGBop3[2] + bias) * scale, 1.0f), -1.0f);
4722 	}
4723 }
4724 
combiner_compute_alpha_outputs(int id,int stage_number)4725 void nv2a_renderer::combiner_compute_alpha_outputs(int id, int stage_number)
4726 {
4727 	int n = stage_number;
4728 	float bias, scale;
4729 
4730 	// select bias and scale
4731 	if (combiner.setup.stage[n].mapout_alpha.bias)
4732 		bias = -0.5;
4733 	else
4734 		bias = 0;
4735 	switch (combiner.setup.stage[n].mapout_alpha.scale) {
4736 	case 0:
4737 	default:
4738 		scale = 1.0;
4739 		break;
4740 	case 1:
4741 		scale = 2.0;
4742 		break;
4743 	case 2:
4744 		scale = 4.0;
4745 		break;
4746 	case 3:
4747 		scale = 0.5;
4748 		break;
4749 	}
4750 	// first
4751 	combiner.work[id].functions.Aop1 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3];
4752 	combiner.work[id].functions.Aop1 = std::max(std::min((combiner.work[id].functions.Aop1 + bias) * scale, 1.0f), -1.0f);
4753 	// second
4754 	combiner.work[id].functions.Aop2 = combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4755 	combiner.work[id].functions.Aop2 = std::max(std::min((combiner.work[id].functions.Aop2 + bias) * scale, 1.0f), -1.0f);
4756 	// third
4757 	if (combiner.setup.stage[n].mapout_alpha.muxsum) {
4758 		if (combiner.work[id].registers.spare0[3] >= 0.5f)
4759 			combiner.work[id].functions.Aop3 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3];
4760 		else
4761 			combiner.work[id].functions.Aop3 = combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4762 	}
4763 	else
4764 		combiner.work[id].functions.Aop3 = combiner.work[id].variables.A[3] * combiner.work[id].variables.B[3] + combiner.work[id].variables.C[3] * combiner.work[id].variables.D[3];
4765 	combiner.work[id].functions.Aop3 = std::max(std::min((combiner.work[id].functions.Aop3 + bias) * scale, 1.0f), -1.0f);
4766 }
4767 
WRITE_LINE_MEMBER(nv2a_renderer::vblank_callback)4768 WRITE_LINE_MEMBER(nv2a_renderer::vblank_callback)
4769 {
4770 /*#ifdef LOG_NV2A
4771     printf("vblank_callback\n\r");
4772 #endif*/
4773 	if ((state != 0) && (puller_waiting == 1)) {
4774 		puller_waiting = 0;
4775 		puller_timer_work(nullptr, 0);
4776 	}
4777 	if (state != 0) {
4778 		pcrtc[0x100 / 4] |= 1;
4779 		pcrtc[0x808 / 4] |= 0x10000;
4780 	}
4781 	else {
4782 		pcrtc[0x100 / 4] &= ~1;
4783 		pcrtc[0x808 / 4] &= ~0x10000;
4784 	}
4785 	if (update_interrupts() == true)
4786 		irq_callback(1); // IRQ 3
4787 	else
4788 		irq_callback(0); // IRQ 3
4789 }
4790 
update_interrupts()4791 bool nv2a_renderer::update_interrupts()
4792 {
4793 	if (pcrtc[0x100 / 4] & pcrtc[0x140 / 4])
4794 		pmc[0x100 / 4] |= 0x1000000;
4795 	else
4796 		pmc[0x100 / 4] &= ~0x1000000;
4797 	if (pgraph[0x100 / 4] & pgraph[0x140 / 4])
4798 		pmc[0x100 / 4] |= 0x1000;
4799 	else
4800 		pmc[0x100 / 4] &= ~0x1000;
4801 	if (((pmc[0x100 / 4] & 0x7fffffff) && (pmc[0x140 / 4] & 1)) || ((pmc[0x100 / 4] & 0x80000000) && (pmc[0x140 / 4] & 2))) {
4802 		// send interrupt
4803 		return true;
4804 	}
4805 	else
4806 		return false;
4807 }
4808 
screen_update_callback(screen_device & screen,bitmap_rgb32 & bitmap,const rectangle & cliprect)4809 uint32_t nv2a_renderer::screen_update_callback(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect)
4810 {
4811 	if (displayedtarget != nullptr) {
4812 		bitmap_rgb32 bm(displayedtarget, 640, 480, 640);
4813 		uint32_t *dst = (uint32_t *)bitmap.raw_pixptr(0, 0);
4814 
4815 		//printf("updatescreen %08X\n\r",pcrtc[0x800/4]);
4816 		memcpy(dst, displayedtarget, bitmap.rowbytes()*bitmap.height());
4817 	}
4818 	return 0;
4819 }
4820 
geforce_assign_object(address_space & space,uint32_t chanel,uint32_t subchannel,uint32_t address)4821 void nv2a_renderer::geforce_assign_object(address_space &space, uint32_t chanel, uint32_t subchannel, uint32_t address)
4822 {
4823 	uint32_t handle, offset, objclass, data;
4824 
4825 	handle = space.read_dword(address);
4826 	offset = geforce_object_offset(handle);
4827 #ifdef LOG_NV2A
4828 	machine().logerror("  assign to subchannel %d object at %d in ramin", subchannel, offset);
4829 #endif
4830 	channel[chanel][subchannel].object.offset = offset;
4831 	data = ramin[offset / 4];
4832 	objclass = data & 0xff;
4833 #ifdef LOG_NV2A
4834 	machine().logerror(" class %03X\n", objclass);
4835 #endif
4836 	channel[chanel][subchannel].object.objclass = objclass;
4837 }
4838 
TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)4839 TIMER_CALLBACK_MEMBER(nv2a_renderer::puller_timer_work)
4840 {
4841 	int chanel;
4842 	int method, count;
4843 	uint32_t *dmaput, *dmaget;
4844 	uint32_t cmd;
4845 	COMMAND cmdtype;
4846 	int countlen;
4847 	int ret;
4848 	address_space *space = puller_space;
4849 	uint32_t subch;
4850 
4851 	for (chanel = 0; chanel < 32; chanel++) {
4852 		dmaput = &channel[chanel][0].regs[0x40 / 4];
4853 		dmaget = &channel[chanel][0].regs[0x44 / 4];
4854 		while (*dmaget != *dmaput) {
4855 			cmd = space->read_dword(*dmaget);
4856 			*dmaget += 4;
4857 			cmdtype = geforce_commandkind(cmd);
4858 			switch (cmdtype)
4859 			{
4860 			case COMMAND::JUMP:
4861 	#ifdef LOG_NV2A
4862 				machine().logerror("jump dmaget %08X", *dmaget);
4863 	#endif
4864 				*dmaget = cmd & 0xfffffffc;
4865 	#ifdef LOG_NV2A
4866 				machine().logerror(" -> %08X\n\r", *dmaget);
4867 	#endif
4868 				break;
4869 			case COMMAND::INCREASING:
4870 				method = cmd & (2047 << 2); // if method >= 0x100 send it to assigned object
4871 				subch = (cmd >> 13) & 7;
4872 				count = (cmd >> 18) & 2047;
4873 				if ((method == 0) && (count == 1)) { // OBJECT method, bind an engine object to a subchannel
4874 					geforce_assign_object(*space, chanel, subch, *dmaget);
4875 					*dmaget += 4;
4876 				}
4877 				else {
4878 	#ifdef LOG_NV2A
4879 					machine().logerror("  subch. %d method %04x count %d\n", subch, method, count);
4880 	#endif
4881 					ret = 0;
4882 					while (count > 0) {
4883 						countlen = 1;
4884 						ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4885 						count--;
4886 						method += 4;
4887 						*dmaget += 4;
4888 						if (ret != 0)
4889 							break;
4890 					}
4891 					if (ret != 0) {
4892 						puller_timer->enable(false);
4893 						puller_waiting = ret;
4894 						return;
4895 					}
4896 				}
4897 				break;
4898 			case COMMAND::NON_INCREASING:
4899 				method = cmd & (2047 << 2);
4900 				subch = (cmd >> 13) & 7;
4901 				count = (cmd >> 18) & 2047;
4902 				if ((method == 0) && (count == 1)) {
4903 					geforce_assign_object(*space, chanel, subch, *dmaget);
4904 					*dmaget += 4;
4905 				}
4906 				else {
4907 	#ifdef LOG_NV2A
4908 					machine().logerror("  subch. %d method %04x count %d\n", subch, method, count);
4909 	#endif
4910 					while (count > 0) {
4911 						countlen = count;
4912 						ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4913 						*dmaget += 4 * (count - countlen);
4914 						count = countlen;
4915 					}
4916 				}
4917 				break;
4918 			case COMMAND::LONG_NON_INCREASING:
4919 				method = cmd & (2047 << 2);
4920 				subch = (cmd >> 13) & 7;
4921 				count = space->read_dword(*dmaget);
4922 				*dmaget += 4;
4923 				if ((method == 0) && (count == 1)) {
4924 					geforce_assign_object(*space, chanel, subch, *dmaget);
4925 					*dmaget += 4;
4926 				}
4927 				else {
4928 	#ifdef LOG_NV2A
4929 					machine().logerror("  subch. %d method %04x count %d\n", subch, method, count);
4930 	#endif
4931 					while (count > 0) {
4932 						countlen = count;
4933 						ret = execute_method(*space, chanel, subch, method, *dmaget, countlen);
4934 						*dmaget += 4 * (count - countlen);
4935 						count = countlen;
4936 					}
4937 				}
4938 				break;
4939 			default:
4940 				machine().logerror("  unimplemented command %08X\n", cmd);
4941 			}
4942 		}
4943 	}
4944 }
4945 
geforce_r(offs_t offset,uint32_t mem_mask)4946 uint32_t nv2a_renderer::geforce_r(offs_t offset, uint32_t mem_mask)
4947 {
4948 	static int x, ret;
4949 
4950 	ret = 0;
4951 	if (offset == 0x1804f6) {
4952 		x = x ^ 0x08080808;
4953 		ret = x;
4954 	}
4955 	if ((offset >= 0x00100000 / 4) && (offset < 0x00101000 / 4)) {
4956 		//machine().logerror("NV_2A: read PFB[%06X] mask %08X value %08X\n",offset*4-0x00100000,mem_mask,ret);
4957 		if (offset == 0x100200 / 4)
4958 			return 3;
4959 	}
4960 	else if ((offset >= 0x00101000 / 4) && (offset < 0x00102000 / 4)) {
4961 		//machine().logerror("NV_2A: read STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,ret);
4962 	}
4963 	else if ((offset >= 0x00002000 / 4) && (offset < 0x00004000 / 4)) {
4964 		ret = pfifo[offset - 0x00002000 / 4];
4965 		// PFIFO.CACHE1.STATUS or PFIFO.RUNOUT_STATUS
4966 		if ((offset == 0x3214 / 4) || (offset == 0x2400 / 4))
4967 			ret = 0x10;
4968 		//machine().logerror("NV_2A: read PFIFO[%06X] value %08X\n",offset*4-0x00002000,ret);
4969 	}
4970 	else if ((offset >= 0x00700000 / 4) && (offset < 0x00800000 / 4)) {
4971 		ret = ramin[offset - 0x00700000 / 4];
4972 		//machine().logerror("NV_2A: read PRAMIN[%06X] value %08X\n",offset*4-0x00700000,ret);
4973 	}
4974 	else if ((offset >= 0x00400000 / 4) && (offset < 0x00402000 / 4)) {
4975 		ret = pgraph[offset - 0x00400000 / 4];
4976 		//machine().logerror("NV_2A: read PGRAPH[%06X] value %08X\n",offset*4-0x00400000,ret);
4977 	}
4978 	else if ((offset >= 0x00600000 / 4) && (offset < 0x00601000 / 4)) {
4979 		ret = pcrtc[offset - 0x00600000 / 4];
4980 		//machine().logerror("NV_2A: read PCRTC[%06X] value %08X\n",offset*4-0x00600000,ret);
4981 	}
4982 	else if ((offset >= 0x00000000 / 4) && (offset < 0x00001000 / 4)) {
4983 		ret = pmc[offset - 0x00000000 / 4];
4984 		//machine().logerror("NV_2A: read PMC[%06X] value %08X\n",offset*4-0x00000000,ret);
4985 	}
4986 	else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
4987 		// 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
4988 		int chanel, subchannel, suboffset;
4989 
4990 		suboffset = offset - 0x00800000 / 4;
4991 		chanel = (suboffset >> (16 - 2)) & 31;
4992 		subchannel = (suboffset >> (13 - 2)) & 7;
4993 		suboffset = suboffset & 0x7ff;
4994 		if (suboffset < 0x80 / 4)
4995 			ret = channel[chanel][subchannel].regs[suboffset];
4996 		//machine().logerror("NV_2A: read channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,ret);
4997 		return ret;
4998 	}
4999 	//machine().logerror("NV_2A: read at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,ret);
5000 	return ret;
5001 }
5002 
geforce_w(address_space & space,offs_t offset,uint32_t data,uint32_t mem_mask)5003 void nv2a_renderer::geforce_w(address_space &space, offs_t offset, uint32_t data, uint32_t mem_mask)
5004 {
5005 	uint32_t old;
5006 	bool update_int;
5007 
5008 	update_int = false;
5009 	if ((offset >= 0x00101000 / 4) && (offset < 0x00102000 / 4)) {
5010 		//machine().logerror("NV_2A: write STRAPS[%06X] mask %08X value %08X\n",offset*4-0x00101000,mem_mask,data);
5011 	}
5012 	else if ((offset >= 0x00002000 / 4) && (offset < 0x00004000 / 4)) {
5013 		int e = offset - 0x00002000 / 4;
5014 		if (e >= (sizeof(pfifo) / sizeof(uint32_t)))
5015 			return;
5016 		COMBINE_DATA(pfifo + e);
5017 		//machine().logerror("NV_2A: read PFIFO[%06X]=%08X\n",offset*4-0x00002000,data & mem_mask); // 2210 pfifo ramht & 1f0 << 12
5018 	}
5019 	else if ((offset >= 0x00700000 / 4) && (offset < 0x00800000 / 4)) {
5020 		int e = offset - 0x00700000 / 4;
5021 		if (e >= (sizeof(ramin) / sizeof(uint32_t)))
5022 			return;
5023 		COMBINE_DATA(ramin + e);
5024 		//machine().logerror("NV_2A: write PRAMIN[%06X]=%08X\n",offset*4-0x00700000,data & mem_mask);
5025 	}
5026 	else if ((offset >= 0x00400000 / 4) && (offset < 0x00402000 / 4)) {
5027 		int e = offset - 0x00400000 / 4;
5028 		if (e >= (sizeof(pgraph) / sizeof(uint32_t)))
5029 			return;
5030 		old = pgraph[e];
5031 		COMBINE_DATA(pgraph + e);
5032 		if (e == 0x100 / 4) {
5033 			pgraph[e] = old & ~data;
5034 			if (data & 1)
5035 				pgraph[0x108 / 4] = 0;
5036 			update_int = true;
5037 		}
5038 		if (e == 0x140 / 4)
5039 			update_int = true;
5040 		if (e == 0x720 / 4) {
5041 			if ((data & 1) && (puller_waiting == 2)) {
5042 				puller_waiting = 0;
5043 				puller_timer->enable();
5044 				puller_timer->adjust(attotime::zero);
5045 			}
5046 		}
5047 		if ((e >= 0x900 / 4) && (e < 0xa00 / 4))
5048 			pgraph[e] = 0;
5049 		//machine().logerror("NV_2A: write PGRAPH[%06X]=%08X\n",offset*4-0x00400000,data & mem_mask);
5050 	}
5051 	else if ((offset >= 0x00600000 / 4) && (offset < 0x00601000 / 4)) {
5052 		int e = offset - 0x00600000 / 4;
5053 		if (e >= (sizeof(pcrtc) / sizeof(uint32_t)))
5054 			return;
5055 		old = pcrtc[e];
5056 		COMBINE_DATA(pcrtc + e);
5057 		if (e == 0x100 / 4) {
5058 			pcrtc[e] = old & ~data;
5059 			update_int = true;
5060 		}
5061 		if (e == 0x140 / 4)
5062 			update_int = true;
5063 		if (e == 0x800 / 4) {
5064 			displayedtarget = (uint32_t *)direct_access_ptr(pcrtc[e]);
5065 #ifdef LOG_NV2A
5066 			printf("crtc buffer %08X\n\r", data);
5067 #endif
5068 		}
5069 		//machine().logerror("NV_2A: write PCRTC[%06X]=%08X\n",offset*4-0x00600000,data & mem_mask);
5070 	}
5071 	else if ((offset >= 0x00000000 / 4) && (offset < 0x00001000 / 4)) {
5072 		int e = offset - 0x00000000 / 4;
5073 		if (e >= (sizeof(pmc) / sizeof(uint32_t)))
5074 			return;
5075 		COMBINE_DATA(pmc + e);
5076 		if (e == 0x200 / 4) // PMC.ENABLE register
5077 			if (data & 0x1100) // either PFIFO or PGRAPH enabled
5078 				for (int ch = 0; ch < 32; ch++) // zero dma_get in all the channels
5079 					channel[ch][0].regs[0x44 / 4] = 0;
5080 		//machine().logerror("NV_2A: write PMC[%06X]=%08X\n",offset*4-0x00000000,data & mem_mask);
5081 	}
5082 	else if ((offset >= 0x00800000 / 4) && (offset < 0x00900000 / 4)) {
5083 		// 32 channels size 0x10000 each, 8 subchannels per channel size 0x2000 each
5084 		int chanel, subchannel, suboffset;
5085 		//int method, count, handle, objclass;
5086 
5087 		suboffset = offset - 0x00800000 / 4;
5088 		chanel = (suboffset >> (16 - 2)) & 31;
5089 		subchannel = (suboffset >> (13 - 2)) & 7;
5090 		suboffset = suboffset & 0x7ff;
5091 		//machine().logerror("NV_2A: write channel[%02X,%d,%04X]=%08X\n",chanel,subchannel,suboffset*4,data & mem_mask);
5092 		COMBINE_DATA(&channel[chanel][subchannel].regs[suboffset]);
5093 		if (suboffset >= 0x80 / 4)
5094 			return;
5095 		if ((suboffset == 0x40 / 4) || (suboffset == 0x44 / 4)) { // DMA_PUT or DMA_GET
5096 			uint32_t *dmaput, *dmaget;
5097 
5098 			dmaput = &channel[chanel][0].regs[0x40 / 4];
5099 			dmaget = &channel[chanel][0].regs[0x44 / 4];
5100 			//printf("dmaget %08X dmaput %08X\n\r",*dmaget,*dmaput);
5101 			if (*dmaget != *dmaput) {
5102 				if (puller_waiting == 0) {
5103 					puller_space = &space;
5104 					puller_timer->enable();
5105 					puller_timer->adjust(attotime::zero);
5106 				}
5107 			}
5108 		}
5109 	}
5110 	//else
5111 	//      machine().logerror("NV_2A: write at %08X mask %08X value %08X\n",0xfd000000+offset*4,mem_mask,data);
5112 	if (update_int == true) {
5113 		if (update_interrupts() == true)
5114 			irq_callback(1); // IRQ 3
5115 		else
5116 			irq_callback(0); // IRQ 3
5117 	}
5118 }
5119 
savestate_items()5120 void nv2a_renderer::savestate_items()
5121 {
5122 }
5123 
set_ram_base(void * base)5124 void nv2a_renderer::set_ram_base(void *base)
5125 {
5126 	basemempointer = (uint8_t*)base;
5127 	topmempointer = basemempointer + 512 * 1024 * 1024 - 1;
5128 }
5129 
start(address_space * cpu_space)5130 void nv2a_renderer::start(address_space *cpu_space)
5131 {
5132 	puller_timer = machine().scheduler().timer_alloc(timer_expired_delegate(FUNC(nv2a_renderer::puller_timer_work), this), (void *)"NV2A Puller Timer");
5133 	puller_timer->enable(false);
5134 }
5135