tcmask(uint32_t wid,int32_t * S,int32_t * T,int32_t num)1 static STRICTINLINE void tcmask(uint32_t wid, int32_t* S, int32_t* T, int32_t num)
2 {
3 int32_t wrap;
4
5
6
7 if (state[wid].tile[num].mask_s)
8 {
9 if (state[wid].tile[num].ms)
10 {
11 wrap = *S >> state[wid].tile[num].f.masksclamped;
12 wrap &= 1;
13 *S ^= (-wrap);
14 }
15 *S &= maskbits_table[state[wid].tile[num].mask_s];
16 }
17
18 if (state[wid].tile[num].mask_t)
19 {
20 if (state[wid].tile[num].mt)
21 {
22 wrap = *T >> state[wid].tile[num].f.masktclamped;
23 wrap &= 1;
24 *T ^= (-wrap);
25 }
26
27 *T &= maskbits_table[state[wid].tile[num].mask_t];
28 }
29 }
30
31
tcmask_coupled(uint32_t wid,int32_t * S,int32_t * sdiff,int32_t * T,int32_t * tdiff,int32_t num)32 static STRICTINLINE void tcmask_coupled(uint32_t wid, int32_t* S, int32_t* sdiff, int32_t* T, int32_t* tdiff, int32_t num)
33 {
34 int32_t wrap;
35 int32_t maskbits;
36 int32_t wrapthreshold;
37
38
39 if (state[wid].tile[num].mask_s)
40 {
41 maskbits = maskbits_table[state[wid].tile[num].mask_s];
42
43 if (state[wid].tile[num].ms)
44 {
45 wrapthreshold = state[wid].tile[num].f.masksclamped;
46
47 wrap = (*S >> wrapthreshold) & 1;
48 *S ^= (-wrap);
49 *S &= maskbits;
50
51
52 if (((*S - wrap) & maskbits) == maskbits)
53 *sdiff = 0;
54 else
55 *sdiff = 1 - (wrap << 1);
56 }
57 else
58 {
59 *S &= maskbits;
60 if (*S == maskbits)
61 *sdiff = -(*S);
62 else
63 *sdiff = 1;
64 }
65 }
66 else
67 *sdiff = 1;
68
69 if (state[wid].tile[num].mask_t)
70 {
71 maskbits = maskbits_table[state[wid].tile[num].mask_t];
72
73 if (state[wid].tile[num].mt)
74 {
75 wrapthreshold = state[wid].tile[num].f.masktclamped;
76
77 wrap = (*T >> wrapthreshold) & 1;
78 *T ^= (-wrap);
79 *T &= maskbits;
80
81 if (((*T - wrap) & maskbits) == maskbits)
82 *tdiff = 0;
83 else
84 *tdiff = 1 - (wrap << 1);
85 }
86 else
87 {
88 *T &= maskbits;
89 if (*T == maskbits)
90 *tdiff = -(*T & 0xff);
91 else
92 *tdiff = 1;
93 }
94 }
95 else
96 *tdiff = 1;
97 }
98
99
calculate_clamp_diffs(struct tile * t)100 static INLINE void calculate_clamp_diffs(struct tile* t)
101 {
102 t->f.clampdiffs = ((t->sh >> 2) - (t->sl >> 2)) & 0x3ff;
103 t->f.clampdifft = ((t->th >> 2) - (t->tl >> 2)) & 0x3ff;
104 }
105
106
calculate_tile_derivs(struct tile * t)107 static INLINE void calculate_tile_derivs(struct tile* t)
108 {
109 t->f.clampens = t->cs || !t->mask_s;
110 t->f.clampent = t->ct || !t->mask_t;
111 t->f.masksclamped = t->mask_s <= 10 ? t->mask_s : 10;
112 t->f.masktclamped = t->mask_t <= 10 ? t->mask_t : 10;
113 t->f.notlutswitch = (t->format << 2) | t->size;
114 t->f.tlutswitch = (t->size << 2) | ((t->format + 2) & 3);
115
116 if (t->format < 5)
117 {
118 t->f.notlutswitch = (t->format << 2) | t->size;
119 t->f.tlutswitch = (t->size << 2) | ((t->format + 2) & 3);
120 }
121 else
122 {
123 t->f.notlutswitch = 0x10 | t->size;
124 t->f.tlutswitch = (t->size << 2) | 2;
125 }
126 }
127
get_texel1_1cycle(uint32_t wid,int32_t * s1,int32_t * t1,int32_t s,int32_t t,int32_t w,int32_t dsinc,int32_t dtinc,int32_t dwinc,int32_t scanline,struct spansigs * sigs)128 static STRICTINLINE void get_texel1_1cycle(uint32_t wid, int32_t* s1, int32_t* t1, int32_t s, int32_t t, int32_t w, int32_t dsinc, int32_t dtinc, int32_t dwinc, int32_t scanline, struct spansigs* sigs)
129 {
130 int32_t nexts, nextt, nextsw;
131
132 if (!sigs->endspan || !sigs->longspan || !state[wid].span[scanline + 1].validline)
133 {
134
135
136 nextsw = (w + dwinc) >> 16;
137 nexts = (s + dsinc) >> 16;
138 nextt = (t + dtinc) >> 16;
139 }
140 else
141 {
142
143
144
145
146
147
148
149 int32_t nextscan = scanline + 1;
150 nextt = state[wid].span[nextscan].t >> 16;
151 nexts = state[wid].span[nextscan].s >> 16;
152 nextsw = state[wid].span[nextscan].w >> 16;
153 }
154
155 state[wid].tcdiv_ptr(nexts, nextt, nextsw, s1, t1);
156 }
157
texture_pipeline_cycle(uint32_t wid,struct color * TEX,struct color * prev,int32_t SSS,int32_t SST,uint32_t tilenum,uint32_t cycle)158 static STRICTINLINE void texture_pipeline_cycle(uint32_t wid, struct color* TEX, struct color* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle)
159 {
160 int32_t maxs, maxt, invt3r, invt3g, invt3b, invt3a;
161 int32_t sfrac, tfrac, invsf, invtf, sfracrg, invsfrg;
162 int upper, upperrg, center, centerrg;
163
164
165 int bilerp = cycle ? state[wid].other_modes.bi_lerp1 : state[wid].other_modes.bi_lerp0;
166 int convert = state[wid].other_modes.convert_one && cycle;
167 struct color t0, t1, t2, t3;
168 int sss1, sst1, sdiff, tdiff;
169
170 sss1 = SSS;
171 sst1 = SST;
172
173 tcshift_cycle(wid, &sss1, &sst1, &maxs, &maxt, tilenum);
174
175 sss1 = TRELATIVE(sss1, state[wid].tile[tilenum].sl);
176 sst1 = TRELATIVE(sst1, state[wid].tile[tilenum].tl);
177
178 if (state[wid].other_modes.sample_type || state[wid].other_modes.en_tlut)
179 {
180 sfrac = sss1 & 0x1f;
181 tfrac = sst1 & 0x1f;
182
183
184
185
186 tcclamp_cycle(wid, &sss1, &sst1, &sfrac, &tfrac, maxs, maxt, tilenum);
187
188
189
190
191
192
193 tcmask_coupled(wid, &sss1, &sdiff, &sst1, &tdiff, tilenum);
194
195
196
197
198
199
200
201 upper = (sfrac + tfrac) & 0x20;
202
203
204
205
206 if (state[wid].tile[tilenum].format == FORMAT_YUV)
207 {
208 sfracrg = (sfrac >> 1) | ((sss1 & 1) << 4);
209
210
211
212 upperrg = (sfracrg + tfrac) & 0x20;
213 }
214 else
215 {
216 upperrg = upper;
217 sfracrg = sfrac;
218 }
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236 if (bilerp)
237 {
238
239 if (!state[wid].other_modes.sample_type)
240 fetch_texel_entlut_quadro_nearest(wid, &t0, &t1, &t2, &t3, sss1, sst1, tilenum, upper, upperrg);
241 else if (state[wid].other_modes.en_tlut)
242 fetch_texel_entlut_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper, upperrg);
243 else
244 fetch_texel_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper - upperrg);
245
246 if (!state[wid].other_modes.mid_texel)
247 center = centerrg = 0;
248 else
249 {
250
251 center = (sfrac == 0x10 && tfrac == 0x10);
252 centerrg = (sfracrg == 0x10 && tfrac == 0x10);
253 }
254
255 if (!convert)
256 {
257 invtf = 0x20 - tfrac;
258
259 if (!centerrg)
260 {
261
262
263 if (upperrg)
264 {
265
266 invsfrg = 0x20 - sfracrg;
267
268 TEX->r = t3.r + ((invsfrg * (t2.r - t3.r) + invtf * (t1.r - t3.r) + 0x10) >> 5);
269 TEX->g = t3.g + ((invsfrg * (t2.g - t3.g) + invtf * (t1.g - t3.g) + 0x10) >> 5);
270 }
271 else
272 {
273 TEX->r = t0.r + ((sfracrg * (t1.r - t0.r) + tfrac * (t2.r - t0.r) + 0x10) >> 5);
274 TEX->g = t0.g + ((sfracrg * (t1.g - t0.g) + tfrac * (t2.g - t0.g) + 0x10) >> 5);
275 }
276 }
277 else
278 {
279
280 invt3r = ~t3.r;
281 invt3g = ~t3.g;
282
283
284 TEX->r = t3.r + ((((t1.r + t2.r) << 6) - (t3.r << 7) + ((invt3r + t0.r) << 6) + 0xc0) >> 8);
285 TEX->g = t3.g + ((((t1.g + t2.g) << 6) - (t3.g << 7) + ((invt3g + t0.g) << 6) + 0xc0) >> 8);
286 }
287
288 if (!center)
289 {
290 if (upper)
291 {
292 invsf = 0x20 - sfrac;
293
294 TEX->b = t3.b + ((invsf * (t2.b - t3.b) + invtf * (t1.b - t3.b) + 0x10) >> 5);
295 TEX->a = t3.a + ((invsf * (t2.a - t3.a) + invtf * (t1.a - t3.a) + 0x10) >> 5);
296 }
297 else
298 {
299 TEX->b = t0.b + ((sfrac * (t1.b - t0.b) + tfrac * (t2.b - t0.b) + 0x10) >> 5);
300 TEX->a = t0.a + ((sfrac * (t1.a - t0.a) + tfrac * (t2.a - t0.a) + 0x10) >> 5);
301 }
302 }
303 else
304 {
305 invt3b = ~t3.b;
306 invt3a = ~t3.a;
307
308 TEX->b = t3.b + ((((t1.b + t2.b) << 6) - (t3.b << 7) + ((invt3b + t0.b) << 6) + 0xc0) >> 8);
309 TEX->a = t3.a + ((((t1.a + t2.a) << 6) - (t3.a << 7) + ((invt3a + t0.a) << 6) + 0xc0) >> 8);
310 }
311 }
312 else
313 {
314 int32_t prevr, prevg, prevb;
315 prevr = SIGN(prev->r, 9);
316 prevg = SIGN(prev->g, 9);
317 prevb = SIGN(prev->b, 9);
318
319 if (!centerrg)
320 {
321 if (upperrg)
322 {
323 TEX->r = prevb + ((prevr * (t2.r - t3.r) + prevg * (t1.r - t3.r) + 0x80) >> 8);
324 TEX->g = prevb + ((prevr * (t2.g - t3.g) + prevg * (t1.g - t3.g) + 0x80) >> 8);
325 }
326 else
327 {
328 TEX->r = prevb + ((prevr * (t1.r - t0.r) + prevg * (t2.r - t0.r) + 0x80) >> 8);
329 TEX->g = prevb + ((prevr * (t1.g - t0.g) + prevg * (t2.g - t0.g) + 0x80) >> 8);
330 }
331 }
332 else
333 {
334 invt3r = ~t3.r;
335 invt3g = ~t3.g;
336
337 TEX->r = prevb + ((prevr * (t2.r - t3.r) + prevg * (t1.r - t3.r) + ((invt3r + t0.r) << 6) + 0xc0) >> 8);
338 TEX->g = prevb + ((prevr * (t2.g - t3.g) + prevg * (t1.g - t3.g) + ((invt3g + t0.g) << 6) + 0xc0) >> 8);
339 }
340
341 if (!center)
342 {
343 if (upper)
344 {
345 TEX->b = prevb + ((prevr * (t2.b - t3.b) + prevg * (t1.b - t3.b) + 0x80) >> 8);
346 TEX->a = prevb + ((prevr * (t2.a - t3.a) + prevg * (t1.a - t3.a) + 0x80) >> 8);
347 }
348 else
349 {
350 TEX->b = prevb + ((prevr * (t1.b - t0.b) + prevg * (t2.b - t0.b) + 0x80) >> 8);
351 TEX->a = prevb + ((prevr * (t1.a - t0.a) + prevg * (t2.a - t0.a) + 0x80) >> 8);
352 }
353 }
354 else
355 {
356 invt3b = ~t3.b;
357 invt3a = ~t3.a;
358
359 TEX->b = prevb + ((prevr * (t2.b - t3.b) + prevg * (t1.b - t3.b) + ((invt3b + t0.b) << 6) + 0xc0) >> 8);
360 TEX->a = prevb + ((prevr * (t2.a - t3.a) + prevg * (t1.a - t3.a) + ((invt3a + t0.a) << 6) + 0xc0) >> 8);
361 }
362 }
363 }
364 else
365 {
366
367
368
369 if (convert)
370 {
371 t0 = t3 = *prev;
372 t0.r = SIGN(t0.r, 9);
373 t0.g = SIGN(t0.g, 9);
374 t0.b = SIGN(t0.b, 9);
375 t3.r = SIGN(t3.r, 9);
376 t3.g = SIGN(t3.g, 9);
377 t3.b = SIGN(t3.b, 9);
378 }
379 else
380 {
381 if (!state[wid].other_modes.sample_type)
382 fetch_texel_entlut_quadro_nearest(wid, &t0, &t1, &t2, &t3, sss1, sst1, tilenum, upper, upperrg);
383 else if (state[wid].other_modes.en_tlut)
384 fetch_texel_entlut_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper, upperrg);
385 else
386 fetch_texel_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper - upperrg);
387 }
388
389
390 if (upperrg)
391 {
392 if (upper)
393 {
394 TEX->r = t3.b + ((state[wid].k0_tf * t3.g + 0x80) >> 8);
395 TEX->g = t3.b + ((state[wid].k1_tf * t3.r + state[wid].k2_tf * t3.g + 0x80) >> 8);
396 TEX->b = t3.b + ((state[wid].k3_tf * t3.r + 0x80) >> 8);
397 TEX->a = t3.b;
398 }
399 else
400 {
401 TEX->r = t0.b + ((state[wid].k0_tf * t3.g + 0x80) >> 8);
402 TEX->g = t0.b + ((state[wid].k1_tf * t3.r + state[wid].k2_tf * t3.g + 0x80) >> 8);
403 TEX->b = t0.b + ((state[wid].k3_tf * t3.r + 0x80) >> 8);
404 TEX->a = t0.b;
405 }
406 }
407 else
408 {
409 if (upper)
410 {
411 TEX->r = t3.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
412 TEX->g = t3.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
413 TEX->b = t3.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
414 TEX->a = t3.b;
415 }
416 else
417 {
418 TEX->r = t0.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
419 TEX->g = t0.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
420 TEX->b = t0.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
421 TEX->a = t0.b;
422 }
423 }
424 }
425
426 TEX->r &= 0x1ff;
427 TEX->g &= 0x1ff;
428 TEX->b &= 0x1ff;
429 TEX->a &= 0x1ff;
430
431
432 }
433 else
434 {
435
436
437
438
439 tcclamp_cycle_light(wid, &sss1, &sst1, maxs, maxt, tilenum);
440
441 tcmask(wid, &sss1, &sst1, tilenum);
442
443
444
445
446 if (bilerp)
447 {
448 if (!convert)
449 {
450
451 fetch_texel(wid, &t0, sss1, sst1, tilenum);
452
453 TEX->r = t0.r & 0x1ff;
454 TEX->g = t0.g & 0x1ff;
455 TEX->b = t0.b;
456 TEX->a = t0.a;
457 }
458 else
459 TEX->r = TEX->g = TEX->b = TEX->a = prev->b;
460 }
461 else
462 {
463 if (convert)
464 {
465 t0 = *prev;
466 t0.r = SIGN(t0.r, 9);
467 t0.g = SIGN(t0.g, 9);
468 t0.b = SIGN(t0.b, 9);
469 }
470 else
471 fetch_texel(wid, &t0, sss1, sst1, tilenum);
472
473 TEX->r = t0.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
474 TEX->g = t0.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
475 TEX->b = t0.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
476 TEX->a = t0.b & 0x1ff;
477 TEX->r &= 0x1ff;
478 TEX->g &= 0x1ff;
479 TEX->b &= 0x1ff;
480 }
481 }
482
483 }
484
loading_pipeline(uint32_t wid,int start,int end,int tilenum,int coord_quad,int ltlut)485 static void loading_pipeline(uint32_t wid, int start, int end, int tilenum, int coord_quad, int ltlut)
486 {
487
488
489 int localdebugmode = 0, cnt = 0;
490 int i, j;
491
492 int dsinc, dtinc;
493 dsinc = state[wid].spans_ds;
494 dtinc = state[wid].spans_dt;
495
496 int s, t;
497 int ss, st;
498 int xstart, xend, xendsc;
499 int sss = 0, sst = 0;
500 int ti_index, length;
501
502 uint32_t tmemidx0 = 0, tmemidx1 = 0, tmemidx2 = 0, tmemidx3 = 0;
503 int dswap = 0;
504 uint32_t readval0, readval1, readval2, readval3;
505 uint32_t readidx32;
506 uint64_t loadqword;
507 uint16_t tempshort;
508 int tmem_formatting = 0;
509 uint32_t bit3fl = 0, hibit = 0;
510
511 if (end > start && ltlut)
512 {
513 rdp_pipeline_crashed = 1;
514 return;
515 }
516
517 if (state[wid].tile[tilenum].format == FORMAT_YUV)
518 tmem_formatting = 0;
519 else if (state[wid].tile[tilenum].format == FORMAT_RGBA && state[wid].tile[tilenum].size == PIXEL_SIZE_32BIT)
520 tmem_formatting = 1;
521 else
522 tmem_formatting = 2;
523
524 int tiadvance = 0, spanadvance = 0;
525 int tiptr = 0;
526 switch (state[wid].ti_size)
527 {
528 case PIXEL_SIZE_4BIT:
529 rdp_pipeline_crashed = 1;
530 return;
531 break;
532 case PIXEL_SIZE_8BIT:
533 tiadvance = 8;
534 spanadvance = 8;
535 break;
536 case PIXEL_SIZE_16BIT:
537 if (!ltlut)
538 {
539 tiadvance = 8;
540 spanadvance = 4;
541 }
542 else
543 {
544 tiadvance = 2;
545 spanadvance = 1;
546 }
547 break;
548 case PIXEL_SIZE_32BIT:
549 tiadvance = 8;
550 spanadvance = 2;
551 break;
552 }
553
554 for (i = start; i <= end; i++)
555 {
556 xstart = state[wid].span[i].lx;
557 xend = state[wid].span[i].unscrx;
558 xendsc = state[wid].span[i].rx;
559 s = state[wid].span[i].s;
560 t = state[wid].span[i].t;
561
562 ti_index = state[wid].ti_width * i + xend;
563 tiptr = state[wid].ti_address + PIXELS_TO_BYTES(ti_index, state[wid].ti_size);
564
565 length = (xstart - xend + 1) & 0xfff;
566
567 for (j = 0; j < length; j+= spanadvance)
568 {
569 ss = s >> 16;
570 st = t >> 16;
571
572
573
574
575
576
577
578 sss = ss & 0xffff;
579 sst = st & 0xffff;
580
581 tc_pipeline_load(wid, &sss, &sst, tilenum, coord_quad);
582
583 dswap = sst & 1;
584
585
586 get_tmem_idx(wid, sss, sst, tilenum, &tmemidx0, &tmemidx1, &tmemidx2, &tmemidx3, &bit3fl, &hibit);
587
588 readidx32 = (tiptr >> 2) & ~1;
589 RREADIDX32(readval0, readidx32);
590 readidx32++;
591 RREADIDX32(readval1, readidx32);
592 readidx32++;
593 RREADIDX32(readval2, readidx32);
594 readidx32++;
595 RREADIDX32(readval3, readidx32);
596
597
598 switch(tiptr & 7)
599 {
600 case 0:
601 if (!ltlut)
602 loadqword = ((uint64_t)readval0 << 32) | readval1;
603 else
604 {
605 tempshort = readval0 >> 16;
606 loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
607 }
608 break;
609 case 1:
610 loadqword = ((uint64_t)readval0 << 40) | ((uint64_t)readval1 << 8) | (readval2 >> 24);
611 break;
612 case 2:
613 if (!ltlut)
614 loadqword = ((uint64_t)readval0 << 48) | ((uint64_t)readval1 << 16) | (readval2 >> 16);
615 else
616 {
617 tempshort = readval0 & 0xffff;
618 loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
619 }
620 break;
621 case 3:
622 loadqword = ((uint64_t)readval0 << 56) | ((uint64_t)readval1 << 24) | (readval2 >> 8);
623 break;
624 case 4:
625 if (!ltlut)
626 loadqword = ((uint64_t)readval1 << 32) | readval2;
627 else
628 {
629 tempshort = readval1 >> 16;
630 loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
631 }
632 break;
633 case 5:
634 loadqword = ((uint64_t)readval1 << 40) | ((uint64_t)readval2 << 8) | (readval3 >> 24);
635 break;
636 case 6:
637 if (!ltlut)
638 loadqword = ((uint64_t)readval1 << 48) | ((uint64_t)readval2 << 16) | (readval3 >> 16);
639 else
640 {
641 tempshort = readval1 & 0xffff;
642 loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
643 }
644 break;
645 case 7:
646 loadqword = ((uint64_t)readval1 << 56) | ((uint64_t)readval2 << 24) | (readval3 >> 8);
647 break;
648 }
649
650
651 switch(tmem_formatting)
652 {
653 case 0:
654 readval0 = (uint32_t)((((loadqword >> 56) & 0xff) << 24) | (((loadqword >> 40) & 0xff) << 16) | (((loadqword >> 24) & 0xff) << 8) | (((loadqword >> 8) & 0xff) << 0));
655 readval1 = (uint32_t)((((loadqword >> 48) & 0xff) << 24) | (((loadqword >> 32) & 0xff) << 16) | (((loadqword >> 16) & 0xff) << 8) | (((loadqword >> 0) & 0xff) << 0));
656 if (bit3fl)
657 {
658 tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
659 tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
660 tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
661 tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
662 }
663 else
664 {
665 tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
666 tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
667 tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
668 tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
669 }
670 break;
671 case 1:
672 readval0 = (uint32_t)(((loadqword >> 48) << 16) | ((loadqword >> 16) & 0xffff));
673 readval1 = (uint32_t)((((loadqword >> 32) & 0xffff) << 16) | (loadqword & 0xffff));
674
675 if (bit3fl)
676 {
677 tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
678 tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
679 tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
680 tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
681 }
682 else
683 {
684 tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
685 tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
686 tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
687 tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
688 }
689 break;
690 case 2:
691 if (!dswap)
692 {
693 if (!hibit)
694 {
695 tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
696 tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
697 tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
698 tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
699 }
700 else
701 {
702 tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
703 tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
704 tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
705 tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
706 }
707 }
708 else
709 {
710 if (!hibit)
711 {
712 tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
713 tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
714 tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
715 tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
716 }
717 else
718 {
719 tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
720 tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
721 tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
722 tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
723 }
724 }
725 break;
726 }
727
728
729 s = (s + dsinc) & ~0x1f;
730 t = (t + dtinc) & ~0x1f;
731 tiptr += tiadvance;
732 }
733 }
734 }
735
edgewalker_for_loads(uint32_t wid,int32_t * lewdata)736 static void edgewalker_for_loads(uint32_t wid, int32_t* lewdata)
737 {
738 int j = 0;
739 int xleft = 0, xright = 0;
740 int xstart = 0, xend = 0;
741 int s = 0, t = 0, w = 0;
742 int dsdx = 0, dtdx = 0;
743 int dsdy = 0, dtdy = 0;
744 int dsde = 0, dtde = 0;
745 int tilenum = 0, flip = 0;
746 int32_t yl = 0, ym = 0, yh = 0;
747 int32_t xl = 0, xm = 0, xh = 0;
748 int32_t dxldy = 0, dxhdy = 0, dxmdy = 0;
749
750 int cmd_id = CMD_ID(lewdata);
751 int ltlut = (cmd_id == CMD_ID_LOAD_TLUT);
752 int coord_quad = ltlut || (cmd_id == CMD_ID_LOAD_BLOCK);
753 flip = 1;
754 state[wid].max_level = 0;
755 tilenum = (lewdata[0] >> 16) & 7;
756
757
758 yl = SIGN(lewdata[0], 14);
759 ym = lewdata[1] >> 16;
760 ym = SIGN(ym, 14);
761 yh = SIGN(lewdata[1], 14);
762
763 xl = SIGN(lewdata[2], 28);
764 xh = SIGN(lewdata[3], 28);
765 xm = SIGN(lewdata[4], 28);
766
767 dxldy = 0;
768 dxhdy = 0;
769 dxmdy = 0;
770
771
772 s = lewdata[5] & 0xffff0000;
773 t = (lewdata[5] & 0xffff) << 16;
774 w = 0;
775 dsdx = (lewdata[7] & 0xffff0000) | ((lewdata[6] >> 16) & 0xffff);
776 dtdx = ((lewdata[7] << 16) & 0xffff0000) | (lewdata[6] & 0xffff);
777 dsde = 0;
778 dtde = (lewdata[9] & 0xffff) << 16;
779 dsdy = 0;
780 dtdy = (lewdata[8] & 0xffff) << 16;
781
782 state[wid].spans_ds = dsdx & ~0x1f;
783 state[wid].spans_dt = dtdx & ~0x1f;
784 state[wid].spans_dw = 0;
785
786
787
788
789
790
791 xright = xh & ~0x1;
792 xleft = xm & ~0x1;
793
794 int k = 0;
795
796 int sign_dxhdy = 0;
797
798 int do_offset = 0;
799
800 int xfrac = 0;
801
802
803
804
805
806
807 #define ADJUST_ATTR_LOAD() \
808 { \
809 state[wid].span[j].s = s & ~0x3ff; \
810 state[wid].span[j].t = t & ~0x3ff; \
811 }
812
813
814 #define ADDVALUES_LOAD() { \
815 t += dtde; \
816 }
817
818 int32_t maxxmx, minxhx;
819
820 int spix = 0;
821 int ycur = yh & ~3;
822 int ylfar = yl | 3;
823
824 int valid_y = 1;
825 int length = 0;
826 int32_t xrsc = 0, xlsc = 0, stickybit = 0;
827 int32_t yllimit = yl;
828 int32_t yhlimit = yh;
829
830 xfrac = 0;
831 xend = xright >> 16;
832
833
834 for (k = ycur; k <= ylfar; k++)
835 {
836 if (k == ym)
837 xleft = xl & ~1;
838
839 spix = k & 3;
840
841 if (!(k & ~0xfff))
842 {
843 j = k >> 2;
844 valid_y = !(k < yhlimit || k >= yllimit);
845
846 if (spix == 0)
847 {
848 maxxmx = 0;
849 minxhx = 0xfff;
850 }
851
852 xrsc = (xright >> 13) & 0x7ffe;
853
854
855
856 xlsc = (xleft >> 13) & 0x7ffe;
857
858 if (valid_y)
859 {
860 maxxmx = (((xlsc >> 3) & 0xfff) > maxxmx) ? (xlsc >> 3) & 0xfff : maxxmx;
861 minxhx = (((xrsc >> 3) & 0xfff) < minxhx) ? (xrsc >> 3) & 0xfff : minxhx;
862 }
863
864 if (spix == 0)
865 {
866 state[wid].span[j].unscrx = xend;
867 ADJUST_ATTR_LOAD();
868 }
869
870 if (spix == 3)
871 {
872 state[wid].span[j].lx = maxxmx;
873 state[wid].span[j].rx = minxhx;
874
875
876 }
877
878
879 }
880
881 if (spix == 3)
882 {
883 ADDVALUES_LOAD();
884 }
885
886
887
888 }
889
890 loading_pipeline(wid, yhlimit >> 2, yllimit >> 2, tilenum, coord_quad, ltlut);
891 }
892
rdp_set_tile_size(uint32_t wid,const uint32_t * args)893 void rdp_set_tile_size(uint32_t wid, const uint32_t* args)
894 {
895 int tilenum = (args[1] >> 24) & 0x7;
896 state[wid].tile[tilenum].sl = (args[0] >> 12) & 0xfff;
897 state[wid].tile[tilenum].tl = (args[0] >> 0) & 0xfff;
898 state[wid].tile[tilenum].sh = (args[1] >> 12) & 0xfff;
899 state[wid].tile[tilenum].th = (args[1] >> 0) & 0xfff;
900
901 calculate_clamp_diffs(&state[wid].tile[tilenum]);
902 }
903
rdp_load_block(uint32_t wid,const uint32_t * args)904 void rdp_load_block(uint32_t wid, const uint32_t* args)
905 {
906 int tilenum = (args[1] >> 24) & 0x7;
907 int sl, sh, tl, dxt;
908
909
910 state[wid].tile[tilenum].sl = sl = ((args[0] >> 12) & 0xfff);
911 state[wid].tile[tilenum].tl = tl = ((args[0] >> 0) & 0xfff);
912 state[wid].tile[tilenum].sh = sh = ((args[1] >> 12) & 0xfff);
913 state[wid].tile[tilenum].th = dxt = ((args[1] >> 0) & 0xfff);
914
915 calculate_clamp_diffs(&state[wid].tile[tilenum]);
916
917 int tlclamped = tl & 0x3ff;
918
919 int32_t lewdata[10];
920
921 lewdata[0] = (args[0] & 0xff000000) | (0x10 << 19) | (tilenum << 16) | ((tlclamped << 2) | 3);
922 lewdata[1] = (((tlclamped << 2) | 3) << 16) | (tlclamped << 2);
923 lewdata[2] = sh << 16;
924 lewdata[3] = sl << 16;
925 lewdata[4] = sh << 16;
926 lewdata[5] = ((sl << 3) << 16) | (tl << 3);
927 lewdata[6] = (dxt & 0xff) << 8;
928 lewdata[7] = ((0x80 >> state[wid].ti_size) << 16) | (dxt >> 8);
929 lewdata[8] = 0x20;
930 lewdata[9] = 0x20;
931
932 edgewalker_for_loads(wid, lewdata);
933
934 }
935
tile_tlut_common_cs_decoder(uint32_t wid,const uint32_t * args)936 static void tile_tlut_common_cs_decoder(uint32_t wid, const uint32_t* args)
937 {
938 int tilenum = (args[1] >> 24) & 0x7;
939 int sl, tl, sh, th;
940
941
942 state[wid].tile[tilenum].sl = sl = ((args[0] >> 12) & 0xfff);
943 state[wid].tile[tilenum].tl = tl = ((args[0] >> 0) & 0xfff);
944 state[wid].tile[tilenum].sh = sh = ((args[1] >> 12) & 0xfff);
945 state[wid].tile[tilenum].th = th = ((args[1] >> 0) & 0xfff);
946
947 calculate_clamp_diffs(&state[wid].tile[tilenum]);
948
949
950 int32_t lewdata[10];
951
952 lewdata[0] = (args[0] & 0xff000000) | (0x10 << 19) | (tilenum << 16) | (th | 3);
953 lewdata[1] = ((th | 3) << 16) | (tl);
954 lewdata[2] = ((sh >> 2) << 16) | ((sh & 3) << 14);
955 lewdata[3] = ((sl >> 2) << 16) | ((sl & 3) << 14);
956 lewdata[4] = ((sh >> 2) << 16) | ((sh & 3) << 14);
957 lewdata[5] = ((sl << 3) << 16) | (tl << 3);
958 lewdata[6] = 0;
959 lewdata[7] = (0x200 >> state[wid].ti_size) << 16;
960 lewdata[8] = 0x20;
961 lewdata[9] = 0x20;
962
963 edgewalker_for_loads(wid, lewdata);
964 }
965
rdp_load_tlut(uint32_t wid,const uint32_t * args)966 void rdp_load_tlut(uint32_t wid, const uint32_t* args)
967 {
968 tile_tlut_common_cs_decoder(wid, args);
969 }
970
rdp_load_tile(uint32_t wid,const uint32_t * args)971 void rdp_load_tile(uint32_t wid, const uint32_t* args)
972 {
973 tile_tlut_common_cs_decoder(wid, args);
974 }
975
rdp_set_tile(uint32_t wid,const uint32_t * args)976 void rdp_set_tile(uint32_t wid, const uint32_t* args)
977 {
978 int tilenum = (args[1] >> 24) & 0x7;
979
980 state[wid].tile[tilenum].format = (args[0] >> 21) & 0x7;
981 state[wid].tile[tilenum].size = (args[0] >> 19) & 0x3;
982 state[wid].tile[tilenum].line = (args[0] >> 9) & 0x1ff;
983 state[wid].tile[tilenum].tmem = (args[0] >> 0) & 0x1ff;
984 state[wid].tile[tilenum].palette = (args[1] >> 20) & 0xf;
985 state[wid].tile[tilenum].ct = (args[1] >> 19) & 0x1;
986 state[wid].tile[tilenum].mt = (args[1] >> 18) & 0x1;
987 state[wid].tile[tilenum].mask_t = (args[1] >> 14) & 0xf;
988 state[wid].tile[tilenum].shift_t = (args[1] >> 10) & 0xf;
989 state[wid].tile[tilenum].cs = (args[1] >> 9) & 0x1;
990 state[wid].tile[tilenum].ms = (args[1] >> 8) & 0x1;
991 state[wid].tile[tilenum].mask_s = (args[1] >> 4) & 0xf;
992 state[wid].tile[tilenum].shift_s = (args[1] >> 0) & 0xf;
993
994 calculate_tile_derivs(&state[wid].tile[tilenum]);
995 }
996
rdp_set_texture_image(uint32_t wid,const uint32_t * args)997 void rdp_set_texture_image(uint32_t wid, const uint32_t* args)
998 {
999 state[wid].ti_format = (args[0] >> 21) & 0x7;
1000 state[wid].ti_size = (args[0] >> 19) & 0x3;
1001 state[wid].ti_width = (args[0] & 0x3ff) + 1;
1002 state[wid].ti_address = args[1] & 0x0ffffff;
1003
1004
1005
1006 }
1007
rdp_set_convert(uint32_t wid,const uint32_t * args)1008 void rdp_set_convert(uint32_t wid, const uint32_t* args)
1009 {
1010 int32_t k0 = (args[0] >> 13) & 0x1ff;
1011 int32_t k1 = (args[0] >> 4) & 0x1ff;
1012 int32_t k2 = ((args[0] & 0xf) << 5) | ((args[1] >> 27) & 0x1f);
1013 int32_t k3 = (args[1] >> 18) & 0x1ff;
1014 state[wid].k0_tf = (SIGN(k0, 9) << 1) + 1;
1015 state[wid].k1_tf = (SIGN(k1, 9) << 1) + 1;
1016 state[wid].k2_tf = (SIGN(k2, 9) << 1) + 1;
1017 state[wid].k3_tf = (SIGN(k3, 9) << 1) + 1;
1018 state[wid].k4 = (args[1] >> 9) & 0x1ff;
1019 state[wid].k5 = args[1] & 0x1ff;
1020 }
1021
tex_init_lut(void)1022 static void tex_init_lut(void)
1023 {
1024 tmem_init_lut();
1025 tcoord_init_lut();
1026 }
1027
tex_init(uint32_t wid)1028 static void tex_init(uint32_t wid)
1029 {
1030 int i;
1031 tcoord_init(wid);
1032
1033 for (i = 0; i < 8; i++)
1034 {
1035 calculate_tile_derivs(&state[wid].tile[i]);
1036 calculate_clamp_diffs(&state[wid].tile[i]);
1037 }
1038 }
1039