tcmask(uint32_t wid,int32_t * S,int32_t * T,int32_t num)1 static STRICTINLINE void tcmask(uint32_t wid, int32_t* S, int32_t* T, int32_t num)
2 {
3     int32_t wrap;
4 
5 
6 
7     if (state[wid].tile[num].mask_s)
8     {
9         if (state[wid].tile[num].ms)
10         {
11             wrap = *S >> state[wid].tile[num].f.masksclamped;
12             wrap &= 1;
13             *S ^= (-wrap);
14         }
15         *S &= maskbits_table[state[wid].tile[num].mask_s];
16     }
17 
18     if (state[wid].tile[num].mask_t)
19     {
20         if (state[wid].tile[num].mt)
21         {
22             wrap = *T >> state[wid].tile[num].f.masktclamped;
23             wrap &= 1;
24             *T ^= (-wrap);
25         }
26 
27         *T &= maskbits_table[state[wid].tile[num].mask_t];
28     }
29 }
30 
31 
tcmask_coupled(uint32_t wid,int32_t * S,int32_t * sdiff,int32_t * T,int32_t * tdiff,int32_t num)32 static STRICTINLINE void tcmask_coupled(uint32_t wid, int32_t* S, int32_t* sdiff, int32_t* T, int32_t* tdiff, int32_t num)
33 {
34     int32_t wrap;
35     int32_t maskbits;
36     int32_t wrapthreshold;
37 
38 
39     if (state[wid].tile[num].mask_s)
40     {
41         maskbits = maskbits_table[state[wid].tile[num].mask_s];
42 
43         if (state[wid].tile[num].ms)
44         {
45             wrapthreshold = state[wid].tile[num].f.masksclamped;
46 
47             wrap = (*S >> wrapthreshold) & 1;
48             *S ^= (-wrap);
49             *S &= maskbits;
50 
51 
52             if (((*S - wrap) & maskbits) == maskbits)
53                 *sdiff = 0;
54             else
55                 *sdiff = 1 - (wrap << 1);
56         }
57         else
58         {
59             *S &= maskbits;
60             if (*S == maskbits)
61                 *sdiff = -(*S);
62             else
63                 *sdiff = 1;
64         }
65     }
66     else
67         *sdiff = 1;
68 
69     if (state[wid].tile[num].mask_t)
70     {
71         maskbits = maskbits_table[state[wid].tile[num].mask_t];
72 
73         if (state[wid].tile[num].mt)
74         {
75             wrapthreshold = state[wid].tile[num].f.masktclamped;
76 
77             wrap = (*T >> wrapthreshold) & 1;
78             *T ^= (-wrap);
79             *T &= maskbits;
80 
81             if (((*T - wrap) & maskbits) == maskbits)
82                 *tdiff = 0;
83             else
84                 *tdiff = 1 - (wrap << 1);
85         }
86         else
87         {
88             *T &= maskbits;
89             if (*T == maskbits)
90                 *tdiff = -(*T & 0xff);
91             else
92                 *tdiff = 1;
93         }
94     }
95     else
96         *tdiff = 1;
97 }
98 
99 
calculate_clamp_diffs(struct tile * t)100 static INLINE void calculate_clamp_diffs(struct tile* t)
101 {
102     t->f.clampdiffs = ((t->sh >> 2) - (t->sl >> 2)) & 0x3ff;
103     t->f.clampdifft = ((t->th >> 2) - (t->tl >> 2)) & 0x3ff;
104 }
105 
106 
calculate_tile_derivs(struct tile * t)107 static INLINE void calculate_tile_derivs(struct tile* t)
108 {
109     t->f.clampens = t->cs || !t->mask_s;
110     t->f.clampent = t->ct || !t->mask_t;
111     t->f.masksclamped = t->mask_s <= 10 ? t->mask_s : 10;
112     t->f.masktclamped = t->mask_t <= 10 ? t->mask_t : 10;
113     t->f.notlutswitch = (t->format << 2) | t->size;
114     t->f.tlutswitch = (t->size << 2) | ((t->format + 2) & 3);
115 
116     if (t->format < 5)
117     {
118         t->f.notlutswitch = (t->format << 2) | t->size;
119         t->f.tlutswitch = (t->size << 2) | ((t->format + 2) & 3);
120     }
121     else
122     {
123         t->f.notlutswitch = 0x10 | t->size;
124         t->f.tlutswitch = (t->size << 2) | 2;
125     }
126 }
127 
get_texel1_1cycle(uint32_t wid,int32_t * s1,int32_t * t1,int32_t s,int32_t t,int32_t w,int32_t dsinc,int32_t dtinc,int32_t dwinc,int32_t scanline,struct spansigs * sigs)128 static STRICTINLINE void get_texel1_1cycle(uint32_t wid, int32_t* s1, int32_t* t1, int32_t s, int32_t t, int32_t w, int32_t dsinc, int32_t dtinc, int32_t dwinc, int32_t scanline, struct spansigs* sigs)
129 {
130     int32_t nexts, nextt, nextsw;
131 
132     if (!sigs->endspan || !sigs->longspan || !state[wid].span[scanline + 1].validline)
133     {
134 
135 
136         nextsw = (w + dwinc) >> 16;
137         nexts = (s + dsinc) >> 16;
138         nextt = (t + dtinc) >> 16;
139     }
140     else
141     {
142 
143 
144 
145 
146 
147 
148 
149         int32_t nextscan = scanline + 1;
150         nextt = state[wid].span[nextscan].t >> 16;
151         nexts = state[wid].span[nextscan].s >> 16;
152         nextsw = state[wid].span[nextscan].w >> 16;
153     }
154 
155     state[wid].tcdiv_ptr(nexts, nextt, nextsw, s1, t1);
156 }
157 
texture_pipeline_cycle(uint32_t wid,struct color * TEX,struct color * prev,int32_t SSS,int32_t SST,uint32_t tilenum,uint32_t cycle)158 static STRICTINLINE void texture_pipeline_cycle(uint32_t wid, struct color* TEX, struct color* prev, int32_t SSS, int32_t SST, uint32_t tilenum, uint32_t cycle)
159 {
160     int32_t maxs, maxt, invt3r, invt3g, invt3b, invt3a;
161     int32_t sfrac, tfrac, invsf, invtf, sfracrg, invsfrg;
162     int upper, upperrg, center, centerrg;
163 
164 
165     int bilerp = cycle ? state[wid].other_modes.bi_lerp1 : state[wid].other_modes.bi_lerp0;
166     int convert = state[wid].other_modes.convert_one && cycle;
167     struct color t0, t1, t2, t3;
168     int sss1, sst1, sdiff, tdiff;
169 
170     sss1 = SSS;
171     sst1 = SST;
172 
173     tcshift_cycle(wid, &sss1, &sst1, &maxs, &maxt, tilenum);
174 
175     sss1 = TRELATIVE(sss1, state[wid].tile[tilenum].sl);
176     sst1 = TRELATIVE(sst1, state[wid].tile[tilenum].tl);
177 
178     if (state[wid].other_modes.sample_type || state[wid].other_modes.en_tlut)
179     {
180         sfrac = sss1 & 0x1f;
181         tfrac = sst1 & 0x1f;
182 
183 
184 
185 
186         tcclamp_cycle(wid, &sss1, &sst1, &sfrac, &tfrac, maxs, maxt, tilenum);
187 
188 
189 
190 
191 
192 
193         tcmask_coupled(wid, &sss1, &sdiff, &sst1, &tdiff, tilenum);
194 
195 
196 
197 
198 
199 
200 
201         upper = (sfrac + tfrac) & 0x20;
202 
203 
204 
205 
206         if (state[wid].tile[tilenum].format == FORMAT_YUV)
207         {
208             sfracrg = (sfrac >> 1) | ((sss1 & 1) << 4);
209 
210 
211 
212             upperrg = (sfracrg + tfrac) & 0x20;
213         }
214         else
215         {
216             upperrg = upper;
217             sfracrg = sfrac;
218         }
219 
220 
221 
222 
223 
224 
225 
226 
227 
228 
229 
230 
231 
232 
233 
234 
235 
236         if (bilerp)
237         {
238 
239             if (!state[wid].other_modes.sample_type)
240                 fetch_texel_entlut_quadro_nearest(wid, &t0, &t1, &t2, &t3, sss1, sst1, tilenum, upper, upperrg);
241             else if (state[wid].other_modes.en_tlut)
242                 fetch_texel_entlut_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper, upperrg);
243             else
244                 fetch_texel_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper - upperrg);
245 
246             if (!state[wid].other_modes.mid_texel)
247                 center = centerrg = 0;
248             else
249             {
250 
251                 center = (sfrac == 0x10 && tfrac == 0x10);
252                 centerrg = (sfracrg == 0x10 && tfrac == 0x10);
253             }
254 
255             if (!convert)
256             {
257                 invtf = 0x20 - tfrac;
258 
259                 if (!centerrg)
260                 {
261 
262 
263                     if (upperrg)
264                     {
265 
266                         invsfrg = 0x20 - sfracrg;
267 
268                         TEX->r = t3.r + ((invsfrg * (t2.r - t3.r) + invtf * (t1.r - t3.r) + 0x10) >> 5);
269                         TEX->g = t3.g + ((invsfrg * (t2.g - t3.g) + invtf * (t1.g - t3.g) + 0x10) >> 5);
270                     }
271                     else
272                     {
273                         TEX->r = t0.r + ((sfracrg * (t1.r - t0.r) + tfrac * (t2.r - t0.r) + 0x10) >> 5);
274                         TEX->g = t0.g + ((sfracrg * (t1.g - t0.g) + tfrac * (t2.g - t0.g) + 0x10) >> 5);
275                     }
276                 }
277                 else
278                 {
279 
280                     invt3r = ~t3.r;
281                     invt3g = ~t3.g;
282 
283 
284                     TEX->r = t3.r + ((((t1.r + t2.r) << 6) - (t3.r << 7) + ((invt3r + t0.r) << 6) + 0xc0) >> 8);
285                     TEX->g = t3.g + ((((t1.g + t2.g) << 6) - (t3.g << 7) + ((invt3g + t0.g) << 6) + 0xc0) >> 8);
286                 }
287 
288                 if (!center)
289                 {
290                     if (upper)
291                     {
292                         invsf = 0x20 - sfrac;
293 
294                         TEX->b = t3.b + ((invsf * (t2.b - t3.b) + invtf * (t1.b - t3.b) + 0x10) >> 5);
295                         TEX->a = t3.a + ((invsf * (t2.a - t3.a) + invtf * (t1.a - t3.a) + 0x10) >> 5);
296                     }
297                     else
298                     {
299                         TEX->b = t0.b + ((sfrac * (t1.b - t0.b) + tfrac * (t2.b - t0.b) + 0x10) >> 5);
300                         TEX->a = t0.a + ((sfrac * (t1.a - t0.a) + tfrac * (t2.a - t0.a) + 0x10) >> 5);
301                     }
302                 }
303                 else
304                 {
305                     invt3b = ~t3.b;
306                     invt3a = ~t3.a;
307 
308                     TEX->b = t3.b + ((((t1.b + t2.b) << 6) - (t3.b << 7) + ((invt3b + t0.b) << 6) + 0xc0) >> 8);
309                     TEX->a = t3.a + ((((t1.a + t2.a) << 6) - (t3.a << 7) + ((invt3a + t0.a) << 6) + 0xc0) >> 8);
310                 }
311             }
312             else
313             {
314                 int32_t prevr, prevg, prevb;
315                 prevr = SIGN(prev->r, 9);
316                 prevg = SIGN(prev->g, 9);
317                 prevb = SIGN(prev->b, 9);
318 
319                 if (!centerrg)
320                 {
321                     if (upperrg)
322                     {
323                         TEX->r = prevb + ((prevr * (t2.r - t3.r) + prevg * (t1.r - t3.r) + 0x80) >> 8);
324                         TEX->g = prevb + ((prevr * (t2.g - t3.g) + prevg * (t1.g - t3.g) + 0x80) >> 8);
325                     }
326                     else
327                     {
328                         TEX->r = prevb + ((prevr * (t1.r - t0.r) + prevg * (t2.r - t0.r) + 0x80) >> 8);
329                         TEX->g = prevb + ((prevr * (t1.g - t0.g) + prevg * (t2.g - t0.g) + 0x80) >> 8);
330                     }
331                 }
332                 else
333                 {
334                     invt3r = ~t3.r;
335                     invt3g = ~t3.g;
336 
337                     TEX->r = prevb + ((prevr * (t2.r - t3.r) + prevg * (t1.r - t3.r) + ((invt3r + t0.r) << 6) + 0xc0) >> 8);
338                     TEX->g = prevb + ((prevr * (t2.g - t3.g) + prevg * (t1.g - t3.g) + ((invt3g + t0.g) << 6) + 0xc0) >> 8);
339                 }
340 
341                 if (!center)
342                 {
343                     if (upper)
344                     {
345                         TEX->b = prevb + ((prevr * (t2.b - t3.b) + prevg * (t1.b - t3.b) + 0x80) >> 8);
346                         TEX->a = prevb + ((prevr * (t2.a - t3.a) + prevg * (t1.a - t3.a) + 0x80) >> 8);
347                     }
348                     else
349                     {
350                         TEX->b = prevb + ((prevr * (t1.b - t0.b) + prevg * (t2.b - t0.b) + 0x80) >> 8);
351                         TEX->a = prevb + ((prevr * (t1.a - t0.a) + prevg * (t2.a - t0.a) + 0x80) >> 8);
352                     }
353                 }
354                 else
355                 {
356                     invt3b = ~t3.b;
357                     invt3a = ~t3.a;
358 
359                     TEX->b = prevb + ((prevr * (t2.b - t3.b) + prevg * (t1.b - t3.b) + ((invt3b + t0.b) << 6) + 0xc0) >> 8);
360                     TEX->a = prevb + ((prevr * (t2.a - t3.a) + prevg * (t1.a - t3.a) + ((invt3a + t0.a) << 6) + 0xc0) >> 8);
361                 }
362             }
363         }
364         else
365         {
366 
367 
368 
369             if (convert)
370             {
371                 t0 = t3 = *prev;
372                 t0.r = SIGN(t0.r, 9);
373                 t0.g = SIGN(t0.g, 9);
374                 t0.b = SIGN(t0.b, 9);
375                 t3.r = SIGN(t3.r, 9);
376                 t3.g = SIGN(t3.g, 9);
377                 t3.b = SIGN(t3.b, 9);
378             }
379             else
380             {
381                 if (!state[wid].other_modes.sample_type)
382                     fetch_texel_entlut_quadro_nearest(wid, &t0, &t1, &t2, &t3, sss1, sst1, tilenum, upper, upperrg);
383                 else if (state[wid].other_modes.en_tlut)
384                     fetch_texel_entlut_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper, upperrg);
385                 else
386                     fetch_texel_quadro(wid, &t0, &t1, &t2, &t3, sss1, sdiff, sst1, tdiff, tilenum, upper - upperrg);
387             }
388 
389 
390             if (upperrg)
391             {
392                 if (upper)
393                 {
394                     TEX->r = t3.b + ((state[wid].k0_tf * t3.g + 0x80) >> 8);
395                     TEX->g = t3.b + ((state[wid].k1_tf * t3.r + state[wid].k2_tf * t3.g + 0x80) >> 8);
396                     TEX->b = t3.b + ((state[wid].k3_tf * t3.r + 0x80) >> 8);
397                     TEX->a = t3.b;
398                 }
399                 else
400                 {
401                     TEX->r = t0.b + ((state[wid].k0_tf * t3.g + 0x80) >> 8);
402                     TEX->g = t0.b + ((state[wid].k1_tf * t3.r + state[wid].k2_tf * t3.g + 0x80) >> 8);
403                     TEX->b = t0.b + ((state[wid].k3_tf * t3.r + 0x80) >> 8);
404                     TEX->a = t0.b;
405                 }
406             }
407             else
408             {
409                 if (upper)
410                 {
411                     TEX->r = t3.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
412                     TEX->g = t3.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
413                     TEX->b = t3.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
414                     TEX->a = t3.b;
415                 }
416                 else
417                 {
418                     TEX->r = t0.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
419                     TEX->g = t0.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
420                     TEX->b = t0.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
421                     TEX->a = t0.b;
422                 }
423             }
424         }
425 
426         TEX->r &= 0x1ff;
427         TEX->g &= 0x1ff;
428         TEX->b &= 0x1ff;
429         TEX->a &= 0x1ff;
430 
431 
432     }
433     else
434     {
435 
436 
437 
438 
439         tcclamp_cycle_light(wid, &sss1, &sst1, maxs, maxt, tilenum);
440 
441         tcmask(wid, &sss1, &sst1, tilenum);
442 
443 
444 
445 
446         if (bilerp)
447         {
448             if (!convert)
449             {
450 
451                 fetch_texel(wid, &t0, sss1, sst1, tilenum);
452 
453                 TEX->r = t0.r & 0x1ff;
454                 TEX->g = t0.g & 0x1ff;
455                 TEX->b = t0.b;
456                 TEX->a = t0.a;
457             }
458             else
459                 TEX->r = TEX->g = TEX->b = TEX->a = prev->b;
460         }
461         else
462         {
463             if (convert)
464             {
465                 t0 = *prev;
466                 t0.r = SIGN(t0.r, 9);
467                 t0.g = SIGN(t0.g, 9);
468                 t0.b = SIGN(t0.b, 9);
469             }
470             else
471                 fetch_texel(wid, &t0, sss1, sst1, tilenum);
472 
473             TEX->r = t0.b + ((state[wid].k0_tf * t0.g + 0x80) >> 8);
474             TEX->g = t0.b + ((state[wid].k1_tf * t0.r + state[wid].k2_tf * t0.g + 0x80) >> 8);
475             TEX->b = t0.b + ((state[wid].k3_tf * t0.r + 0x80) >> 8);
476             TEX->a = t0.b & 0x1ff;
477             TEX->r &= 0x1ff;
478             TEX->g &= 0x1ff;
479             TEX->b &= 0x1ff;
480         }
481     }
482 
483 }
484 
loading_pipeline(uint32_t wid,int start,int end,int tilenum,int coord_quad,int ltlut)485 static void loading_pipeline(uint32_t wid, int start, int end, int tilenum, int coord_quad, int ltlut)
486 {
487 
488 
489     int localdebugmode = 0, cnt = 0;
490     int i, j;
491 
492     int dsinc, dtinc;
493     dsinc = state[wid].spans_ds;
494     dtinc = state[wid].spans_dt;
495 
496     int s, t;
497     int ss, st;
498     int xstart, xend, xendsc;
499     int sss = 0, sst = 0;
500     int ti_index, length;
501 
502     uint32_t tmemidx0 = 0, tmemidx1 = 0, tmemidx2 = 0, tmemidx3 = 0;
503     int dswap = 0;
504     uint32_t readval0, readval1, readval2, readval3;
505     uint32_t readidx32;
506     uint64_t loadqword;
507     uint16_t tempshort;
508     int tmem_formatting = 0;
509     uint32_t bit3fl = 0, hibit = 0;
510 
511     if (end > start && ltlut)
512     {
513         rdp_pipeline_crashed = 1;
514         return;
515     }
516 
517     if (state[wid].tile[tilenum].format == FORMAT_YUV)
518         tmem_formatting = 0;
519     else if (state[wid].tile[tilenum].format == FORMAT_RGBA && state[wid].tile[tilenum].size == PIXEL_SIZE_32BIT)
520         tmem_formatting = 1;
521     else
522         tmem_formatting = 2;
523 
524     int tiadvance = 0, spanadvance = 0;
525     int tiptr = 0;
526     switch (state[wid].ti_size)
527     {
528     case PIXEL_SIZE_4BIT:
529         rdp_pipeline_crashed = 1;
530         return;
531         break;
532     case PIXEL_SIZE_8BIT:
533         tiadvance = 8;
534         spanadvance = 8;
535         break;
536     case PIXEL_SIZE_16BIT:
537         if (!ltlut)
538         {
539             tiadvance = 8;
540             spanadvance = 4;
541         }
542         else
543         {
544             tiadvance = 2;
545             spanadvance = 1;
546         }
547         break;
548     case PIXEL_SIZE_32BIT:
549         tiadvance = 8;
550         spanadvance = 2;
551         break;
552     }
553 
554     for (i = start; i <= end; i++)
555     {
556         xstart = state[wid].span[i].lx;
557         xend = state[wid].span[i].unscrx;
558         xendsc = state[wid].span[i].rx;
559         s = state[wid].span[i].s;
560         t = state[wid].span[i].t;
561 
562         ti_index = state[wid].ti_width * i + xend;
563         tiptr = state[wid].ti_address + PIXELS_TO_BYTES(ti_index, state[wid].ti_size);
564 
565         length = (xstart - xend + 1) & 0xfff;
566 
567         for (j = 0; j < length; j+= spanadvance)
568         {
569             ss = s >> 16;
570             st = t >> 16;
571 
572 
573 
574 
575 
576 
577 
578             sss = ss & 0xffff;
579             sst = st & 0xffff;
580 
581             tc_pipeline_load(wid, &sss, &sst, tilenum, coord_quad);
582 
583             dswap = sst & 1;
584 
585 
586             get_tmem_idx(wid, sss, sst, tilenum, &tmemidx0, &tmemidx1, &tmemidx2, &tmemidx3, &bit3fl, &hibit);
587 
588             readidx32 = (tiptr >> 2) & ~1;
589             RREADIDX32(readval0, readidx32);
590             readidx32++;
591             RREADIDX32(readval1, readidx32);
592             readidx32++;
593             RREADIDX32(readval2, readidx32);
594             readidx32++;
595             RREADIDX32(readval3, readidx32);
596 
597 
598             switch(tiptr & 7)
599             {
600             case 0:
601                 if (!ltlut)
602                     loadqword = ((uint64_t)readval0 << 32) | readval1;
603                 else
604                 {
605                     tempshort = readval0 >> 16;
606                     loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
607                 }
608                 break;
609             case 1:
610                 loadqword = ((uint64_t)readval0 << 40) | ((uint64_t)readval1 << 8) | (readval2 >> 24);
611                 break;
612             case 2:
613                 if (!ltlut)
614                     loadqword = ((uint64_t)readval0 << 48) | ((uint64_t)readval1 << 16) | (readval2 >> 16);
615                 else
616                 {
617                     tempshort = readval0 & 0xffff;
618                     loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
619                 }
620                 break;
621             case 3:
622                 loadqword = ((uint64_t)readval0 << 56) | ((uint64_t)readval1 << 24) | (readval2 >> 8);
623                 break;
624             case 4:
625                 if (!ltlut)
626                     loadqword = ((uint64_t)readval1 << 32) | readval2;
627                 else
628                 {
629                     tempshort = readval1 >> 16;
630                     loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
631                 }
632                 break;
633             case 5:
634                 loadqword = ((uint64_t)readval1 << 40) | ((uint64_t)readval2 << 8) | (readval3 >> 24);
635                 break;
636             case 6:
637                 if (!ltlut)
638                     loadqword = ((uint64_t)readval1 << 48) | ((uint64_t)readval2 << 16) | (readval3 >> 16);
639                 else
640                 {
641                     tempshort = readval1 & 0xffff;
642                     loadqword = ((uint64_t)tempshort << 48) | ((uint64_t) tempshort << 32) | ((uint64_t) tempshort << 16) | tempshort;
643                 }
644                 break;
645             case 7:
646                 loadqword = ((uint64_t)readval1 << 56) | ((uint64_t)readval2 << 24) | (readval3 >> 8);
647                 break;
648             }
649 
650 
651             switch(tmem_formatting)
652             {
653             case 0:
654                 readval0 = (uint32_t)((((loadqword >> 56) & 0xff) << 24) | (((loadqword >> 40) & 0xff) << 16) | (((loadqword >> 24) & 0xff) << 8) | (((loadqword >> 8) & 0xff) << 0));
655                 readval1 = (uint32_t)((((loadqword >> 48) & 0xff) << 24) | (((loadqword >> 32) & 0xff) << 16) | (((loadqword >> 16) & 0xff) << 8) | (((loadqword >> 0) & 0xff) << 0));
656                 if (bit3fl)
657                 {
658                     tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
659                     tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
660                     tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
661                     tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
662                 }
663                 else
664                 {
665                     tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
666                     tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
667                     tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
668                     tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
669                 }
670                 break;
671             case 1:
672                 readval0 = (uint32_t)(((loadqword >> 48) << 16) | ((loadqword >> 16) & 0xffff));
673                 readval1 = (uint32_t)((((loadqword >> 32) & 0xffff) << 16) | (loadqword & 0xffff));
674 
675                 if (bit3fl)
676                 {
677                     tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
678                     tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
679                     tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
680                     tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
681                 }
682                 else
683                 {
684                     tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 >> 16);
685                     tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(readval0 & 0xffff);
686                     tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 >> 16);
687                     tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(readval1 & 0xffff);
688                 }
689                 break;
690             case 2:
691                 if (!dswap)
692                 {
693                     if (!hibit)
694                     {
695                         tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
696                         tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
697                         tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
698                         tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
699                     }
700                     else
701                     {
702                         tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
703                         tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
704                         tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
705                         tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
706                     }
707                 }
708                 else
709                 {
710                     if (!hibit)
711                     {
712                         tmem16[tmemidx0 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
713                         tmem16[tmemidx1 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
714                         tmem16[tmemidx2 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
715                         tmem16[tmemidx3 ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
716                     }
717                     else
718                     {
719                         tmem16[(tmemidx0 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 16);
720                         tmem16[(tmemidx1 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword & 0xffff);
721                         tmem16[(tmemidx2 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 48);
722                         tmem16[(tmemidx3 | 0x400) ^ WORD_ADDR_XOR] = (uint16_t)(loadqword >> 32);
723                     }
724                 }
725             break;
726             }
727 
728 
729             s = (s + dsinc) & ~0x1f;
730             t = (t + dtinc) & ~0x1f;
731             tiptr += tiadvance;
732         }
733     }
734 }
735 
edgewalker_for_loads(uint32_t wid,int32_t * lewdata)736 static void edgewalker_for_loads(uint32_t wid, int32_t* lewdata)
737 {
738     int j = 0;
739     int xleft = 0, xright = 0;
740     int xstart = 0, xend = 0;
741     int s = 0, t = 0, w = 0;
742     int dsdx = 0, dtdx = 0;
743     int dsdy = 0, dtdy = 0;
744     int dsde = 0, dtde = 0;
745     int tilenum = 0, flip = 0;
746     int32_t yl = 0, ym = 0, yh = 0;
747     int32_t xl = 0, xm = 0, xh = 0;
748     int32_t dxldy = 0, dxhdy = 0, dxmdy = 0;
749 
750     int cmd_id = CMD_ID(lewdata);
751     int ltlut = (cmd_id == CMD_ID_LOAD_TLUT);
752     int coord_quad = ltlut || (cmd_id == CMD_ID_LOAD_BLOCK);
753     flip = 1;
754     state[wid].max_level = 0;
755     tilenum = (lewdata[0] >> 16) & 7;
756 
757 
758     yl = SIGN(lewdata[0], 14);
759     ym = lewdata[1] >> 16;
760     ym = SIGN(ym, 14);
761     yh = SIGN(lewdata[1], 14);
762 
763     xl = SIGN(lewdata[2], 28);
764     xh = SIGN(lewdata[3], 28);
765     xm = SIGN(lewdata[4], 28);
766 
767     dxldy = 0;
768     dxhdy = 0;
769     dxmdy = 0;
770 
771 
772     s    = lewdata[5] & 0xffff0000;
773     t    = (lewdata[5] & 0xffff) << 16;
774     w    = 0;
775     dsdx = (lewdata[7] & 0xffff0000) | ((lewdata[6] >> 16) & 0xffff);
776     dtdx = ((lewdata[7] << 16) & 0xffff0000)    | (lewdata[6] & 0xffff);
777     dsde = 0;
778     dtde = (lewdata[9] & 0xffff) << 16;
779     dsdy = 0;
780     dtdy = (lewdata[8] & 0xffff) << 16;
781 
782     state[wid].spans_ds = dsdx & ~0x1f;
783     state[wid].spans_dt = dtdx & ~0x1f;
784     state[wid].spans_dw = 0;
785 
786 
787 
788 
789 
790 
791     xright = xh & ~0x1;
792     xleft = xm & ~0x1;
793 
794     int k = 0;
795 
796     int sign_dxhdy = 0;
797 
798     int do_offset = 0;
799 
800     int xfrac = 0;
801 
802 
803 
804 
805 
806 
807 #define ADJUST_ATTR_LOAD()                                      \
808 {                                                               \
809     state[wid].span[j].s = s & ~0x3ff;                                     \
810     state[wid].span[j].t = t & ~0x3ff;                                     \
811 }
812 
813 
814 #define ADDVALUES_LOAD() {  \
815             t += dtde;      \
816 }
817 
818     int32_t maxxmx, minxhx;
819 
820     int spix = 0;
821     int ycur =  yh & ~3;
822     int ylfar = yl | 3;
823 
824     int valid_y = 1;
825     int length = 0;
826     int32_t xrsc = 0, xlsc = 0, stickybit = 0;
827     int32_t yllimit = yl;
828     int32_t yhlimit = yh;
829 
830     xfrac = 0;
831     xend = xright >> 16;
832 
833 
834     for (k = ycur; k <= ylfar; k++)
835     {
836         if (k == ym)
837             xleft = xl & ~1;
838 
839         spix = k & 3;
840 
841         if (!(k & ~0xfff))
842         {
843             j = k >> 2;
844             valid_y = !(k < yhlimit || k >= yllimit);
845 
846             if (spix == 0)
847             {
848                 maxxmx = 0;
849                 minxhx = 0xfff;
850             }
851 
852             xrsc = (xright >> 13) & 0x7ffe;
853 
854 
855 
856             xlsc = (xleft >> 13) & 0x7ffe;
857 
858             if (valid_y)
859             {
860                 maxxmx = (((xlsc >> 3) & 0xfff) > maxxmx) ? (xlsc >> 3) & 0xfff : maxxmx;
861                 minxhx = (((xrsc >> 3) & 0xfff) < minxhx) ? (xrsc >> 3) & 0xfff : minxhx;
862             }
863 
864             if (spix == 0)
865             {
866                 state[wid].span[j].unscrx = xend;
867                 ADJUST_ATTR_LOAD();
868             }
869 
870             if (spix == 3)
871             {
872                 state[wid].span[j].lx = maxxmx;
873                 state[wid].span[j].rx = minxhx;
874 
875 
876             }
877 
878 
879         }
880 
881         if (spix == 3)
882         {
883             ADDVALUES_LOAD();
884         }
885 
886 
887 
888     }
889 
890     loading_pipeline(wid, yhlimit >> 2, yllimit >> 2, tilenum, coord_quad, ltlut);
891 }
892 
rdp_set_tile_size(uint32_t wid,const uint32_t * args)893 void rdp_set_tile_size(uint32_t wid, const uint32_t* args)
894 {
895     int tilenum = (args[1] >> 24) & 0x7;
896     state[wid].tile[tilenum].sl = (args[0] >> 12) & 0xfff;
897     state[wid].tile[tilenum].tl = (args[0] >>  0) & 0xfff;
898     state[wid].tile[tilenum].sh = (args[1] >> 12) & 0xfff;
899     state[wid].tile[tilenum].th = (args[1] >>  0) & 0xfff;
900 
901     calculate_clamp_diffs(&state[wid].tile[tilenum]);
902 }
903 
rdp_load_block(uint32_t wid,const uint32_t * args)904 void rdp_load_block(uint32_t wid, const uint32_t* args)
905 {
906     int tilenum = (args[1] >> 24) & 0x7;
907     int sl, sh, tl, dxt;
908 
909 
910     state[wid].tile[tilenum].sl = sl = ((args[0] >> 12) & 0xfff);
911     state[wid].tile[tilenum].tl = tl = ((args[0] >>  0) & 0xfff);
912     state[wid].tile[tilenum].sh = sh = ((args[1] >> 12) & 0xfff);
913     state[wid].tile[tilenum].th = dxt  = ((args[1] >>  0) & 0xfff);
914 
915     calculate_clamp_diffs(&state[wid].tile[tilenum]);
916 
917     int tlclamped = tl & 0x3ff;
918 
919     int32_t lewdata[10];
920 
921     lewdata[0] = (args[0] & 0xff000000) | (0x10 << 19) | (tilenum << 16) | ((tlclamped << 2) | 3);
922     lewdata[1] = (((tlclamped << 2) | 3) << 16) | (tlclamped << 2);
923     lewdata[2] = sh << 16;
924     lewdata[3] = sl << 16;
925     lewdata[4] = sh << 16;
926     lewdata[5] = ((sl << 3) << 16) | (tl << 3);
927     lewdata[6] = (dxt & 0xff) << 8;
928     lewdata[7] = ((0x80 >> state[wid].ti_size) << 16) | (dxt >> 8);
929     lewdata[8] = 0x20;
930     lewdata[9] = 0x20;
931 
932     edgewalker_for_loads(wid, lewdata);
933 
934 }
935 
tile_tlut_common_cs_decoder(uint32_t wid,const uint32_t * args)936 static void tile_tlut_common_cs_decoder(uint32_t wid, const uint32_t* args)
937 {
938     int tilenum = (args[1] >> 24) & 0x7;
939     int sl, tl, sh, th;
940 
941 
942     state[wid].tile[tilenum].sl = sl = ((args[0] >> 12) & 0xfff);
943     state[wid].tile[tilenum].tl = tl = ((args[0] >>  0) & 0xfff);
944     state[wid].tile[tilenum].sh = sh = ((args[1] >> 12) & 0xfff);
945     state[wid].tile[tilenum].th = th = ((args[1] >>  0) & 0xfff);
946 
947     calculate_clamp_diffs(&state[wid].tile[tilenum]);
948 
949 
950     int32_t lewdata[10];
951 
952     lewdata[0] = (args[0] & 0xff000000) | (0x10 << 19) | (tilenum << 16) | (th | 3);
953     lewdata[1] = ((th | 3) << 16) | (tl);
954     lewdata[2] = ((sh >> 2) << 16) | ((sh & 3) << 14);
955     lewdata[3] = ((sl >> 2) << 16) | ((sl & 3) << 14);
956     lewdata[4] = ((sh >> 2) << 16) | ((sh & 3) << 14);
957     lewdata[5] = ((sl << 3) << 16) | (tl << 3);
958     lewdata[6] = 0;
959     lewdata[7] = (0x200 >> state[wid].ti_size) << 16;
960     lewdata[8] = 0x20;
961     lewdata[9] = 0x20;
962 
963     edgewalker_for_loads(wid, lewdata);
964 }
965 
rdp_load_tlut(uint32_t wid,const uint32_t * args)966 void rdp_load_tlut(uint32_t wid, const uint32_t* args)
967 {
968     tile_tlut_common_cs_decoder(wid, args);
969 }
970 
rdp_load_tile(uint32_t wid,const uint32_t * args)971 void rdp_load_tile(uint32_t wid, const uint32_t* args)
972 {
973     tile_tlut_common_cs_decoder(wid, args);
974 }
975 
rdp_set_tile(uint32_t wid,const uint32_t * args)976 void rdp_set_tile(uint32_t wid, const uint32_t* args)
977 {
978     int tilenum = (args[1] >> 24) & 0x7;
979 
980     state[wid].tile[tilenum].format    = (args[0] >> 21) & 0x7;
981     state[wid].tile[tilenum].size      = (args[0] >> 19) & 0x3;
982     state[wid].tile[tilenum].line      = (args[0] >>  9) & 0x1ff;
983     state[wid].tile[tilenum].tmem      = (args[0] >>  0) & 0x1ff;
984     state[wid].tile[tilenum].palette   = (args[1] >> 20) & 0xf;
985     state[wid].tile[tilenum].ct        = (args[1] >> 19) & 0x1;
986     state[wid].tile[tilenum].mt        = (args[1] >> 18) & 0x1;
987     state[wid].tile[tilenum].mask_t    = (args[1] >> 14) & 0xf;
988     state[wid].tile[tilenum].shift_t   = (args[1] >> 10) & 0xf;
989     state[wid].tile[tilenum].cs        = (args[1] >>  9) & 0x1;
990     state[wid].tile[tilenum].ms        = (args[1] >>  8) & 0x1;
991     state[wid].tile[tilenum].mask_s    = (args[1] >>  4) & 0xf;
992     state[wid].tile[tilenum].shift_s   = (args[1] >>  0) & 0xf;
993 
994     calculate_tile_derivs(&state[wid].tile[tilenum]);
995 }
996 
rdp_set_texture_image(uint32_t wid,const uint32_t * args)997 void rdp_set_texture_image(uint32_t wid, const uint32_t* args)
998 {
999     state[wid].ti_format   = (args[0] >> 21) & 0x7;
1000     state[wid].ti_size     = (args[0] >> 19) & 0x3;
1001     state[wid].ti_width    = (args[0] & 0x3ff) + 1;
1002     state[wid].ti_address  = args[1] & 0x0ffffff;
1003 
1004 
1005 
1006 }
1007 
rdp_set_convert(uint32_t wid,const uint32_t * args)1008 void rdp_set_convert(uint32_t wid, const uint32_t* args)
1009 {
1010     int32_t k0 = (args[0] >> 13) & 0x1ff;
1011     int32_t k1 = (args[0] >> 4) & 0x1ff;
1012     int32_t k2 = ((args[0] & 0xf) << 5) | ((args[1] >> 27) & 0x1f);
1013     int32_t k3 = (args[1] >> 18) & 0x1ff;
1014     state[wid].k0_tf = (SIGN(k0, 9) << 1) + 1;
1015     state[wid].k1_tf = (SIGN(k1, 9) << 1) + 1;
1016     state[wid].k2_tf = (SIGN(k2, 9) << 1) + 1;
1017     state[wid].k3_tf = (SIGN(k3, 9) << 1) + 1;
1018     state[wid].k4 = (args[1] >> 9) & 0x1ff;
1019     state[wid].k5 = args[1] & 0x1ff;
1020 }
1021 
tex_init_lut(void)1022 static void tex_init_lut(void)
1023 {
1024     tmem_init_lut();
1025     tcoord_init_lut();
1026 }
1027 
tex_init(uint32_t wid)1028 static void tex_init(uint32_t wid)
1029 {
1030     int i;
1031     tcoord_init(wid);
1032 
1033     for (i = 0; i < 8; i++)
1034     {
1035         calculate_tile_derivs(&state[wid].tile[i]);
1036         calculate_clamp_diffs(&state[wid].tile[i]);
1037     }
1038 }
1039