1 /////////////////////////////////////////////////////////////////////////
2 // $Id: voodoo_func.h 14297 2021-07-01 19:32:28Z vruppert $
3 /////////////////////////////////////////////////////////////////////////
4 /*
5 * Portion of this software comes with the following license
6 */
7
8 /***************************************************************************
9
10 Copyright Aaron Giles
11 All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are
15 met:
16
17 * Redistributions of source code must retain the above copyright
18 notice, this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in
21 the documentation and/or other materials provided with the
22 distribution.
23 * Neither the name 'MAME' nor the names of its contributors may be
24 used to endorse or promote products derived from this software
25 without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR
28 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT,
31 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38
39 ***************************************************************************/
40
41 Bit32u voodoo_last_msg = 255;
42
43
44 #define poly_wait(x,y)
45 #define cpu_eat_cycles(x,y)
46
47 #define DEBUG_DEPTH (0)
48 #define DEBUG_LOD (0)
49
50 #define LOG_VBLANK_SWAP (0)
51 #define LOG_FIFO (0)
52 #define LOG_FIFO_VERBOSE (0)
53 #define LOG_REGISTERS (0)
54 #define LOG_WAITS (0)
55 #define LOG_LFB (0)
56 #define LOG_TEXTURE_RAM (0)
57 #define LOG_RASTERIZERS (0)
58 #define LOG_CMDFIFO (0)
59 #define LOG_CMDFIFO_VERBOSE (0)
60
61 #define MODIFY_PIXEL(VV)
62
63 /* fifo thread variable */
64 BX_THREAD_VAR(fifo_thread_var);
65 /* CMDFIFO thread mutex (Voodoo2) */
66 BX_MUTEX(cmdfifo_mutex);
67 /* render mutex (Banshee) */
68 BX_MUTEX(render_mutex);
69 /* FIFO event stuff */
70 BX_MUTEX(fifo_mutex);
71 bx_thread_sem_t fifo_wakeup;
72 bx_thread_sem_t fifo_not_full;
73 static bx_thread_sem_t vertical_sem;
74
75 /* fast dither lookup */
76 static Bit8u dither4_lookup[256*16*2];
77 static Bit8u dither2_lookup[256*16*2];
78
79 /* fast reciprocal+log2 lookup */
80 Bit32u voodoo_reciplog[(2 << RECIPLOG_LOOKUP_BITS) + 2];
81
82
raster_function(int tmus,void * destbase,Bit32s y,const poly_extent * extent,const void * extradata,int threadid)83 void raster_function(int tmus, void *destbase, Bit32s y, const poly_extent *extent, const void *extradata, int threadid) {
84 const poly_extra_data *extra = (const poly_extra_data *) extradata;
85 voodoo_state *v = extra->state;
86 stats_block *stats = &v->thread_stats[threadid];
87 DECLARE_DITHER_POINTERS;
88 Bit32s startx = extent->startx;
89 Bit32s stopx = extent->stopx;
90 Bit32s iterr, iterg, iterb, itera;
91 Bit32s iterz;
92 Bit64s iterw, iterw0 = 0, iterw1 = 0;
93 Bit64s iters0 = 0, iters1 = 0;
94 Bit64s itert0 = 0, itert1 = 0;
95 Bit16u *depth;
96 Bit16u *dest;
97 Bit32s dx, dy;
98 Bit32s scry;
99 Bit32s x;
100
101 Bit32u fbzcolorpath= v->reg[fbzColorPath].u;
102 Bit32u fbzmode= v->reg[fbzMode].u;
103 Bit32u alphamode= v->reg[alphaMode].u;
104 Bit32u fogmode= v->reg[fogMode].u;
105 Bit32u texmode0= (tmus==0? 0 : v->tmu[0].reg[textureMode].u);
106 Bit32u texmode1= (tmus<=1? 0 : v->tmu[1].reg[textureMode].u);
107
108 /* determine the screen Y */
109 scry = y;
110 if (FBZMODE_Y_ORIGIN(fbzmode))
111 scry = (v->fbi.yorigin - y) & 0x3ff;
112
113 /* compute dithering */
114 COMPUTE_DITHER_POINTERS(fbzmode, y);
115
116 /* apply clipping */
117 if (FBZMODE_ENABLE_CLIPPING(fbzmode)) {
118 Bit32s tempclip;
119
120 /* Y clipping buys us the whole scanline */
121 if (scry < (Bit32s) ((v->reg[clipLowYHighY].u >> 16) & 0x3ff)
122 || scry >= (Bit32s) (v->reg[clipLowYHighY].u & 0x3ff)) {
123 stats->pixels_in += stopx - startx;
124 stats->clip_fail += stopx - startx;
125 return;
126 }
127
128 /* X clipping */
129 tempclip = (v->reg[clipLeftRight].u >> 16) & 0x3ff;
130 if (startx < tempclip) {
131 stats->pixels_in += tempclip - startx;
132 startx = tempclip;
133 }
134 tempclip = v->reg[clipLeftRight].u & 0x3ff;
135 if (stopx >= tempclip) {
136 stats->pixels_in += stopx - tempclip;
137 stopx = tempclip - 1;
138 }
139 }
140
141 /* get pointers to the target buffer and depth buffer */
142 dest = (Bit16u *) destbase + scry * v->fbi.rowpixels;
143 depth =
144 (v->fbi.auxoffs != (Bit32u) ~0) ?
145 ((Bit16u *) (v->fbi.ram + v->fbi.auxoffs)
146 + scry * v->fbi.rowpixels) :
147 NULL;
148
149 /* compute the starting parameters */
150 dx = startx - (extra->ax >> 4);
151 dy = y - (extra->ay >> 4);
152 iterr = extra->startr + dy * extra->drdy + dx * extra->drdx;
153 iterg = extra->startg + dy * extra->dgdy + dx * extra->dgdx;
154 iterb = extra->startb + dy * extra->dbdy + dx * extra->dbdx;
155 itera = extra->starta + dy * extra->dady + dx * extra->dadx;
156 iterz = extra->startz + dy * extra->dzdy + dx * extra->dzdx;
157 iterw = extra->startw + dy * extra->dwdy + dx * extra->dwdx;
158 if (tmus >= 1) {
159 iterw0 = extra->startw0 + dy * extra->dw0dy + dx * extra->dw0dx;
160 iters0 = extra->starts0 + dy * extra->ds0dy + dx * extra->ds0dx;
161 itert0 = extra->startt0 + dy * extra->dt0dy + dx * extra->dt0dx;
162 }
163 if (tmus >= 2) {
164 iterw1 = extra->startw1 + dy * extra->dw1dy + dx * extra->dw1dx;
165 iters1 = extra->starts1 + dy * extra->ds1dy + dx * extra->ds1dx;
166 itert1 = extra->startt1 + dy * extra->dt1dy + dx * extra->dt1dx;
167 }
168
169 /* loop in X */
170 for (x = startx; x < stopx; x++) {
171 rgb_union iterargb = { 0 };
172 rgb_union texel = { 0 };
173
174 /* pixel pipeline part 1 handles depth testing and stippling */
175 PIXEL_PIPELINE_BEGIN(v, stats, x, y, fbzcolorpath, fbzmode,
176 iterz, iterw)
177 ;
178
179 /* run the texture pipeline on TMU1 to produce a value in texel */
180 /* note that they set LOD min to 8 to "disable" a TMU */
181 if (tmus >= 2 && v->tmu[1].lodmin < (8 << 8))
182 TEXTURE_PIPELINE(&v->tmu[1], x, dither4, texmode1, texel,
183 v->tmu[1].lookup, extra->lodbase1, iters1, itert1,
184 iterw1, texel);
185
186 /* run the texture pipeline on TMU0 to produce a final */
187 /* result in texel */
188 /* note that they set LOD min to 8 to "disable" a TMU */
189 if (tmus >= 1 && v->tmu[0].lodmin < (8 << 8)) {
190 if (v->send_config == 0)
191 TEXTURE_PIPELINE(&v->tmu[0], x, dither4, texmode0, texel,
192 v->tmu[0].lookup, extra->lodbase0, iters0, itert0,
193 iterw0, texel);
194 /* send config data to the frame buffer */
195 else
196 texel.u = v->tmu_config;
197 }
198 /* colorpath pipeline selects source colors and does blending */
199 CLAMPED_ARGB(iterr, iterg, iterb, itera, fbzcolorpath, iterargb);
200 COLORPATH_PIPELINE(v, stats, fbzcolorpath, fbzmode, alphamode,
201 texel, iterz, iterw, iterargb);
202
203 /* pixel pipeline part 2 handles fog, alpha, and final output */
204 PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x,
205 dest, depth, fbzmode, fbzcolorpath, alphamode, fogmode,
206 iterz, iterw, iterargb);
207
208 /* update the iterated parameters */
209 iterr += extra->drdx;
210 iterg += extra->dgdx;
211 iterb += extra->dbdx;
212 itera += extra->dadx;
213 iterz += extra->dzdx;
214 iterw += extra->dwdx;
215 if (tmus >= 1) {
216 iterw0 += extra->dw0dx;
217 iters0 += extra->ds0dx;
218 itert0 += extra->dt0dx;
219 }
220 if (tmus >= 2) {
221 iterw1 += extra->dw1dx;
222 iters1 += extra->ds1dx;
223 itert1 += extra->dt1dx;
224 }
225 }
226 }
227
228 /*************************************
229 *
230 * NCC table management
231 *
232 *************************************/
233
ncc_table_write(ncc_table * n,offs_t regnum,Bit32u data)234 void ncc_table_write(ncc_table *n, offs_t regnum, Bit32u data)
235 {
236 /* I/Q entries reference the plaette if the high bit is set */
237 if (regnum >= 4 && (data & 0x80000000) && n->palette)
238 {
239 int index = ((data >> 23) & 0xfe) | (regnum & 1);
240
241 /* set the ARGB for this palette index */
242 n->palette[index] = 0xff000000 | data;
243
244 /* if we have an ARGB palette as well, compute its value */
245 if (n->palettea)
246 {
247 int a = ((data >> 16) & 0xfc) | ((data >> 22) & 0x03);
248 int r = ((data >> 10) & 0xfc) | ((data >> 16) & 0x03);
249 int g = ((data >> 4) & 0xfc) | ((data >> 10) & 0x03);
250 int b = ((data << 2) & 0xfc) | ((data >> 4) & 0x03);
251 n->palettea[index] = MAKE_ARGB(a, r, g, b);
252 }
253
254 /* this doesn't dirty the table or go to the registers, so bail */
255 return;
256 }
257
258 /* if the register matches, don't update */
259 if (data == n->reg[regnum].u)
260 return;
261 n->reg[regnum].u = data;
262
263 /* first four entries are packed Y values */
264 if (regnum < 4)
265 {
266 regnum *= 4;
267 n->y[regnum+0] = (data >> 0) & 0xff;
268 n->y[regnum+1] = (data >> 8) & 0xff;
269 n->y[regnum+2] = (data >> 16) & 0xff;
270 n->y[regnum+3] = (data >> 24) & 0xff;
271 }
272
273 /* the second four entries are the I RGB values */
274 else if (regnum < 8)
275 {
276 regnum &= 3;
277 n->ir[regnum] = (Bit32s)(data << 5) >> 23;
278 n->ig[regnum] = (Bit32s)(data << 14) >> 23;
279 n->ib[regnum] = (Bit32s)(data << 23) >> 23;
280 }
281
282 /* the final four entries are the Q RGB values */
283 else
284 {
285 regnum &= 3;
286 n->qr[regnum] = (Bit32s)(data << 5) >> 23;
287 n->qg[regnum] = (Bit32s)(data << 14) >> 23;
288 n->qb[regnum] = (Bit32s)(data << 23) >> 23;
289 }
290
291 /* mark the table dirty */
292 n->dirty = 1;
293 }
294
295
ncc_table_update(ncc_table * n)296 void ncc_table_update(ncc_table *n)
297 {
298 int r, g, b, i;
299
300 /* generte all 256 possibilities */
301 for (i = 0; i < 256; i++)
302 {
303 int vi = (i >> 2) & 0x03;
304 int vq = (i >> 0) & 0x03;
305
306 /* start with the intensity */
307 r = g = b = n->y[(i >> 4) & 0x0f];
308
309 /* add the coloring */
310 r += n->ir[vi] + n->qr[vq];
311 g += n->ig[vi] + n->qg[vq];
312 b += n->ib[vi] + n->qb[vq];
313
314 /* clamp */
315 CLAMP(r, 0, 255);
316 CLAMP(g, 0, 255);
317 CLAMP(b, 0, 255);
318
319 /* fill in the table */
320 n->texel[i] = MAKE_ARGB(0xff, r, g, b);
321 }
322
323 /* no longer dirty */
324 n->dirty = 0;
325 }
326
recompute_texture_params(tmu_state * t)327 void recompute_texture_params(tmu_state *t)
328 {
329 int bppscale;
330 Bit32u base;
331 int lod;
332 static Bit32u count = 0;
333
334 /* Unimplemented switch */
335 if (TEXLOD_LOD_ZEROFRAC(t->reg[tLOD].u)) {
336 if (count < 50) BX_ERROR(("TEXLOD_LOD_ZEROFRAC not implemented yet"));
337 count++;
338 }
339 /* Banshee: unimplemented switches */
340 if (TEXLOD_TMIRROR_S(t->reg[tLOD].u)) {
341 BX_ERROR(("TEXLOD_TMIRROR_S not implemented yet"));
342 }
343 if (TEXLOD_TMIRROR_T(t->reg[tLOD].u)) {
344 BX_ERROR(("TEXLOD_TMIRROR_T not implemented yet"));
345 }
346 /* extract LOD parameters */
347 t->lodmin = TEXLOD_LODMIN(t->reg[tLOD].u) << 6;
348 t->lodmax = TEXLOD_LODMAX(t->reg[tLOD].u) << 6;
349 t->lodbias = (Bit8s)(TEXLOD_LODBIAS(t->reg[tLOD].u) << 2) << 4;
350
351 /* determine which LODs are present */
352 t->lodmask = 0x1ff;
353 if (TEXLOD_LOD_TSPLIT(t->reg[tLOD].u))
354 {
355 if (!TEXLOD_LOD_ODD(t->reg[tLOD].u))
356 t->lodmask = 0x155;
357 else
358 t->lodmask = 0x0aa;
359 }
360
361 /* determine base texture width/height */
362 t->wmask = t->hmask = 0xff;
363 if (TEXLOD_LOD_S_IS_WIDER(t->reg[tLOD].u))
364 t->hmask >>= TEXLOD_LOD_ASPECT(t->reg[tLOD].u);
365 else
366 t->wmask >>= TEXLOD_LOD_ASPECT(t->reg[tLOD].u);
367
368 /* determine the bpp of the texture */
369 bppscale = TEXMODE_FORMAT(t->reg[textureMode].u) >> 3;
370
371 /* start with the base of LOD 0 */
372 if (t->texaddr_shift == 0 && (t->reg[texBaseAddr].u & 1))
373 BX_DEBUG(("Tiled texture"));
374 base = (t->reg[texBaseAddr].u & t->texaddr_mask) << t->texaddr_shift;
375 t->lodoffset[0] = base & t->mask;
376
377 /* LODs 1-3 are different depending on whether we are in multitex mode */
378 /* Several Voodoo 2 games leave the upper bits of TLOD == 0xff, meaning we think */
379 /* they want multitex mode when they really don't -- disable for now */
380 if (TEXLOD_TMULTIBASEADDR(t->reg[tLOD].u)) {
381 BX_ERROR(("TEXLOD_TMULTIBASEADDR disabled for now"));
382 }
383 if (0)//TEXLOD_TMULTIBASEADDR(t->reg[tLOD].u))
384 {
385 base = (t->reg[texBaseAddr_1].u & t->texaddr_mask) << t->texaddr_shift;
386 t->lodoffset[1] = base & t->mask;
387 base = (t->reg[texBaseAddr_2].u & t->texaddr_mask) << t->texaddr_shift;
388 t->lodoffset[2] = base & t->mask;
389 base = (t->reg[texBaseAddr_3_8].u & t->texaddr_mask) << t->texaddr_shift;
390 t->lodoffset[3] = base & t->mask;
391 }
392 else
393 {
394 if (t->lodmask & (1 << 0))
395 base += (((t->wmask >> 0) + 1) * ((t->hmask >> 0) + 1)) << bppscale;
396 t->lodoffset[1] = base & t->mask;
397 if (t->lodmask & (1 << 1))
398 base += (((t->wmask >> 1) + 1) * ((t->hmask >> 1) + 1)) << bppscale;
399 t->lodoffset[2] = base & t->mask;
400 if (t->lodmask & (1 << 2))
401 base += (((t->wmask >> 2) + 1) * ((t->hmask >> 2) + 1)) << bppscale;
402 t->lodoffset[3] = base & t->mask;
403 }
404
405 /* remaining LODs make sense */
406 for (lod = 4; lod <= 8; lod++)
407 {
408 if (t->lodmask & (1 << (lod - 1)))
409 {
410 Bit32u size = ((t->wmask >> (lod - 1)) + 1) * ((t->hmask >> (lod - 1)) + 1);
411 if (size < 4) size = 4;
412 base += size << bppscale;
413 }
414 t->lodoffset[lod] = base & t->mask;
415 }
416
417 /* set the NCC lookup appropriately */
418 t->texel[1] = t->texel[9] = t->ncc[TEXMODE_NCC_TABLE_SELECT(t->reg[textureMode].u)].texel;
419
420 /* pick the lookup table */
421 t->lookup = t->texel[TEXMODE_FORMAT(t->reg[textureMode].u)];
422
423 /* compute the detail parameters */
424 t->detailmax = TEXDETAIL_DETAIL_MAX(t->reg[tDetail].u);
425 t->detailbias = (Bit8s)(TEXDETAIL_DETAIL_BIAS(t->reg[tDetail].u) << 2) << 6;
426 t->detailscale = TEXDETAIL_DETAIL_SCALE(t->reg[tDetail].u);
427
428 /* no longer dirty */
429 t->regdirty = 0;
430
431 /* check for separate RGBA filtering */
432 if (TEXDETAIL_SEPARATE_RGBA_FILTER(t->reg[tDetail].u))
433 BX_PANIC(("Separate RGBA filters!"));
434 }
435
prepare_tmu(tmu_state * t)436 BX_CPP_INLINE Bit32s prepare_tmu(tmu_state *t)
437 {
438 Bit64s texdx, texdy;
439 Bit32s lodbase;
440
441 /* if the texture parameters are dirty, update them */
442 if (t->regdirty) {
443 recompute_texture_params(t);
444
445 /* ensure that the NCC tables are up to date */
446 if ((TEXMODE_FORMAT(t->reg[textureMode].u) & 7) == 1)
447 {
448 ncc_table *n = &t->ncc[TEXMODE_NCC_TABLE_SELECT(t->reg[textureMode].u)];
449 t->texel[1] = t->texel[9] = n->texel;
450 if (n->dirty)
451 ncc_table_update(n);
452 }
453 }
454
455 /* compute (ds^2 + dt^2) in both X and Y as 28.36 numbers */
456 texdx = (Bit64s)(t->dsdx >> 14) * (Bit64s)(t->dsdx >> 14) + (Bit64s)(t->dtdx >> 14) * (Bit64s)(t->dtdx >> 14);
457 texdy = (Bit64s)(t->dsdy >> 14) * (Bit64s)(t->dsdy >> 14) + (Bit64s)(t->dtdy >> 14) * (Bit64s)(t->dtdy >> 14);
458
459 /* pick whichever is larger and shift off some high bits -> 28.20 */
460 if (texdx < texdy)
461 texdx = texdy;
462 texdx >>= 16;
463
464 /* use our fast reciprocal/log on this value; it expects input as a */
465 /* 16.32 number, and returns the log of the reciprocal, so we have to */
466 /* adjust the result: negative to get the log of the original value */
467 /* plus 12 to account for the extra exponent, and divided by 2 to */
468 /* get the log of the square root of texdx */
469 (void)fast_reciplog(texdx, &lodbase);
470 return (-lodbase + (12 << 8)) / 2;
471 }
472
473
round_coordinate(float value)474 BX_CPP_INLINE Bit32s round_coordinate(float value)
475 {
476 Bit32s result = (Bit32s)floor(value);
477 return result + (value - (float)result > 0.5f);
478 }
479
poly_render_triangle(void * dest,const rectangle * cliprect,int texcount,int paramcount,const poly_vertex * v1,const poly_vertex * v2,const poly_vertex * v3,poly_extra_data * extra)480 Bit32u poly_render_triangle(void *dest, const rectangle *cliprect, int texcount, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, poly_extra_data *extra)
481 {
482 float dxdy_v1v2, dxdy_v1v3, dxdy_v2v3;
483 const poly_vertex *tv;
484 Bit32s curscan, scaninc=1;
485
486 Bit32s v1yclip, v3yclip;
487 Bit32s v1y, v3y;
488 Bit32s pixels = 0;
489
490 /* first sort by Y */
491 if (v2->y < v1->y)
492 {
493 tv = v1;
494 v1 = v2;
495 v2 = tv;
496 }
497 if (v3->y < v2->y)
498 {
499 tv = v2;
500 v2 = v3;
501 v3 = tv;
502 if (v2->y < v1->y)
503 {
504 tv = v1;
505 v1 = v2;
506 v2 = tv;
507 }
508 }
509
510 /* compute some integral X/Y vertex values */
511 v1y = round_coordinate(v1->y);
512 v3y = round_coordinate(v3->y);
513
514 /* clip coordinates */
515 v1yclip = v1y;
516 v3yclip = v3y;
517 if (cliprect != NULL)
518 {
519 v1yclip = MAX(v1yclip, cliprect->min_y);
520 v3yclip = MIN(v3yclip, cliprect->max_y + 1);
521 }
522 if (v3yclip - v1yclip <= 0)
523 return 0;
524
525 /* compute the slopes for each portion of the triangle */
526 dxdy_v1v2 = (v2->y == v1->y) ? 0.0f : (v2->x - v1->x) / (v2->y - v1->y);
527 dxdy_v1v3 = (v3->y == v1->y) ? 0.0f : (v3->x - v1->x) / (v3->y - v1->y);
528 dxdy_v2v3 = (v3->y == v2->y) ? 0.0f : (v3->x - v2->x) / (v3->y - v2->y);
529
530 /* compute the X extents for each scanline */
531 poly_extent extent;
532 int extnum=0;
533 for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
534 {
535 {
536 float fully = (float)(curscan + extnum) + 0.5f;
537 float startx = v1->x + (fully - v1->y) * dxdy_v1v3;
538 float stopx;
539 Bit32s istartx, istopx;
540
541 /* compute the ending X based on which part of the triangle we're in */
542 if (fully < v2->y)
543 stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
544 else
545 stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
546
547 /* clamp to full pixels */
548 istartx = round_coordinate(startx);
549 istopx = round_coordinate(stopx);
550
551 /* force start < stop */
552 if (istartx > istopx)
553 {
554 Bit32s temp = istartx;
555 istartx = istopx;
556 istopx = temp;
557 }
558
559 /* apply left/right clipping */
560 if (cliprect != NULL)
561 {
562 if (istartx < cliprect->min_x)
563 istartx = cliprect->min_x;
564 if (istopx > cliprect->max_x)
565 istopx = cliprect->max_x + 1;
566 }
567
568 /* set the extent and update the total pixel count */
569 if (istartx >= istopx)
570 istartx = istopx = 0;
571 extent.startx = istartx;
572 extent.stopx = istopx;
573 raster_function(texcount,dest,curscan,&extent,extra,0);
574
575 pixels += istopx - istartx;
576 }
577 }
578
579 return pixels;
580 }
581
triangle_create_work_item(Bit16u * drawbuf,int texcount)582 Bit32s triangle_create_work_item(Bit16u *drawbuf, int texcount)
583 {
584 poly_extra_data extra;
585 poly_vertex vert[3];
586 Bit32u retval;
587
588 /* fill in the vertex data */
589 vert[0].x = (float)v->fbi.ax * (1.0f / 16.0f);
590 vert[0].y = (float)v->fbi.ay * (1.0f / 16.0f);
591 vert[1].x = (float)v->fbi.bx * (1.0f / 16.0f);
592 vert[1].y = (float)v->fbi.by * (1.0f / 16.0f);
593 vert[2].x = (float)v->fbi.cx * (1.0f / 16.0f);
594 vert[2].y = (float)v->fbi.cy * (1.0f / 16.0f);
595
596 /* fill in the extra data */
597 extra.state = v;
598
599 /* fill in triangle parameters */
600 extra.ax = v->fbi.ax;
601 extra.ay = v->fbi.ay;
602 extra.startr = v->fbi.startr;
603 extra.startg = v->fbi.startg;
604 extra.startb = v->fbi.startb;
605 extra.starta = v->fbi.starta;
606 extra.startz = v->fbi.startz;
607 extra.startw = v->fbi.startw;
608 extra.drdx = v->fbi.drdx;
609 extra.dgdx = v->fbi.dgdx;
610 extra.dbdx = v->fbi.dbdx;
611 extra.dadx = v->fbi.dadx;
612 extra.dzdx = v->fbi.dzdx;
613 extra.dwdx = v->fbi.dwdx;
614 extra.drdy = v->fbi.drdy;
615 extra.dgdy = v->fbi.dgdy;
616 extra.dbdy = v->fbi.dbdy;
617 extra.dady = v->fbi.dady;
618 extra.dzdy = v->fbi.dzdy;
619 extra.dwdy = v->fbi.dwdy;
620
621 /* fill in texture 0 parameters */
622 if (texcount > 0)
623 {
624 extra.starts0 = v->tmu[0].starts;
625 extra.startt0 = v->tmu[0].startt;
626 extra.startw0 = v->tmu[0].startw;
627 extra.ds0dx = v->tmu[0].dsdx;
628 extra.dt0dx = v->tmu[0].dtdx;
629 extra.dw0dx = v->tmu[0].dwdx;
630 extra.ds0dy = v->tmu[0].dsdy;
631 extra.dt0dy = v->tmu[0].dtdy;
632 extra.dw0dy = v->tmu[0].dwdy;
633 extra.lodbase0 = prepare_tmu(&v->tmu[0]);
634
635 /* fill in texture 1 parameters */
636 if (texcount > 1)
637 {
638 extra.starts1 = v->tmu[1].starts;
639 extra.startt1 = v->tmu[1].startt;
640 extra.startw1 = v->tmu[1].startw;
641 extra.ds1dx = v->tmu[1].dsdx;
642 extra.dt1dx = v->tmu[1].dtdx;
643 extra.dw1dx = v->tmu[1].dwdx;
644 extra.ds1dy = v->tmu[1].dsdy;
645 extra.dt1dy = v->tmu[1].dtdy;
646 extra.dw1dy = v->tmu[1].dwdy;
647 extra.lodbase1 = prepare_tmu(&v->tmu[1]);
648 }
649 }
650
651 /* farm the rasterization out to other threads */
652 retval = poly_render_triangle(drawbuf, NULL, texcount, 0, &vert[0], &vert[1], &vert[2], &extra);
653
654 return retval;
655 }
656
657
triangle()658 Bit32s triangle()
659 {
660 int texcount = 0;
661 Bit16u *drawbuf;
662 int destbuf;
663 int pixels;
664
665 /* determine the number of TMUs involved */
666 texcount = 0;
667 if (!FBIINIT3_DISABLE_TMUS(v->reg[fbiInit3].u) && FBZCP_TEXTURE_ENABLE(v->reg[fbzColorPath].u))
668 {
669 texcount = 1;
670 if (v->chipmask & 0x04)
671 texcount = 2;
672 }
673
674 /* perform subpixel adjustments */
675 if (FBZCP_CCA_SUBPIXEL_ADJUST(v->reg[fbzColorPath].u))
676 {
677 Bit32s dx = 8 - (v->fbi.ax & 15);
678 Bit32s dy = 8 - (v->fbi.ay & 15);
679
680 /* adjust iterated R,G,B,A and W/Z */
681 v->fbi.startr += (dy * v->fbi.drdy + dx * v->fbi.drdx) >> 4;
682 v->fbi.startg += (dy * v->fbi.dgdy + dx * v->fbi.dgdx) >> 4;
683 v->fbi.startb += (dy * v->fbi.dbdy + dx * v->fbi.dbdx) >> 4;
684 v->fbi.starta += (dy * v->fbi.dady + dx * v->fbi.dadx) >> 4;
685 v->fbi.startw += (dy * v->fbi.dwdy + dx * v->fbi.dwdx) >> 4;
686 v->fbi.startz += mul_32x32_shift(dy, v->fbi.dzdy, 4) + mul_32x32_shift(dx, v->fbi.dzdx, 4);
687
688 /* adjust iterated W/S/T for TMU 0 */
689 if (texcount >= 1)
690 {
691 v->tmu[0].startw += (dy * v->tmu[0].dwdy + dx * v->tmu[0].dwdx) >> 4;
692 v->tmu[0].starts += (dy * v->tmu[0].dsdy + dx * v->tmu[0].dsdx) >> 4;
693 v->tmu[0].startt += (dy * v->tmu[0].dtdy + dx * v->tmu[0].dtdx) >> 4;
694
695 /* adjust iterated W/S/T for TMU 1 */
696 if (texcount >= 2)
697 {
698 v->tmu[1].startw += (dy * v->tmu[1].dwdy + dx * v->tmu[1].dwdx) >> 4;
699 v->tmu[1].starts += (dy * v->tmu[1].dsdy + dx * v->tmu[1].dsdx) >> 4;
700 v->tmu[1].startt += (dy * v->tmu[1].dtdy + dx * v->tmu[1].dtdx) >> 4;
701 }
702 }
703 }
704
705 /* determine the draw buffer */
706 destbuf = (v->type >= VOODOO_BANSHEE) ? 1 : FBZMODE_DRAW_BUFFER(v->reg[fbzMode].u);
707 switch (destbuf)
708 {
709 case 0: /* front buffer */
710 drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
711 v->fbi.video_changed = 1;
712 break;
713
714 case 1: /* back buffer */
715 drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
716 break;
717
718 default: /* reserved */
719 return TRIANGLE_SETUP_CLOCKS;
720 }
721
722 /* find a rasterizer that matches our current state */
723 pixels = triangle_create_work_item(/*v, */drawbuf, texcount);
724
725 /* update stats */
726 v->reg[fbiTrianglesOut].u++;
727
728 /* 1 pixel per clock, plus some setup time */
729 if (LOG_REGISTERS) BX_DEBUG(("cycles = %d", TRIANGLE_SETUP_CLOCKS + pixels));
730 return TRIANGLE_SETUP_CLOCKS + pixels;
731 }
732
733
setup_and_draw_triangle()734 static Bit32s setup_and_draw_triangle()
735 {
736 float dx1, dy1, dx2, dy2;
737 float divisor, tdiv;
738
739 /* grab the X/Ys at least */
740 v->fbi.ax = (Bit16s)(v->fbi.svert[0].x * 16.0);
741 v->fbi.ay = (Bit16s)(v->fbi.svert[0].y * 16.0);
742 v->fbi.bx = (Bit16s)(v->fbi.svert[1].x * 16.0);
743 v->fbi.by = (Bit16s)(v->fbi.svert[1].y * 16.0);
744 v->fbi.cx = (Bit16s)(v->fbi.svert[2].x * 16.0);
745 v->fbi.cy = (Bit16s)(v->fbi.svert[2].y * 16.0);
746
747 /* compute the divisor */
748 divisor = 1.0f / ((v->fbi.svert[0].x - v->fbi.svert[1].x) * (v->fbi.svert[0].y - v->fbi.svert[2].y) -
749 (v->fbi.svert[0].x - v->fbi.svert[2].x) * (v->fbi.svert[0].y - v->fbi.svert[1].y));
750
751 /* backface culling */
752 if (v->reg[sSetupMode].u & 0x20000)
753 {
754 int culling_sign = (v->reg[sSetupMode].u >> 18) & 1;
755 int divisor_sign = (divisor < 0);
756
757 /* if doing strips and ping pong is enabled, apply the ping pong */
758 if ((v->reg[sSetupMode].u & 0x90000) == 0x00000)
759 culling_sign ^= (v->fbi.sverts - 3) & 1;
760
761 /* if our sign matches the culling sign, we're done for */
762 if (divisor_sign == culling_sign)
763 return TRIANGLE_SETUP_CLOCKS;
764 }
765
766 /* compute the dx/dy values */
767 dx1 = v->fbi.svert[0].y - v->fbi.svert[2].y;
768 dx2 = v->fbi.svert[0].y - v->fbi.svert[1].y;
769 dy1 = v->fbi.svert[0].x - v->fbi.svert[1].x;
770 dy2 = v->fbi.svert[0].x - v->fbi.svert[2].x;
771
772 /* set up R,G,B */
773 tdiv = divisor * 4096.0f;
774 if (v->reg[sSetupMode].u & (1 << 0))
775 {
776 v->fbi.startr = (Bit32s)(v->fbi.svert[0].r * 4096.0f);
777 v->fbi.drdx = (Bit32s)(((v->fbi.svert[0].r - v->fbi.svert[1].r) * dx1 - (v->fbi.svert[0].r - v->fbi.svert[2].r) * dx2) * tdiv);
778 v->fbi.drdy = (Bit32s)(((v->fbi.svert[0].r - v->fbi.svert[2].r) * dy1 - (v->fbi.svert[0].r - v->fbi.svert[1].r) * dy2) * tdiv);
779 v->fbi.startg = (Bit32s)(v->fbi.svert[0].g * 4096.0f);
780 v->fbi.dgdx = (Bit32s)(((v->fbi.svert[0].g - v->fbi.svert[1].g) * dx1 - (v->fbi.svert[0].g - v->fbi.svert[2].g) * dx2) * tdiv);
781 v->fbi.dgdy = (Bit32s)(((v->fbi.svert[0].g - v->fbi.svert[2].g) * dy1 - (v->fbi.svert[0].g - v->fbi.svert[1].g) * dy2) * tdiv);
782 v->fbi.startb = (Bit32s)(v->fbi.svert[0].b * 4096.0f);
783 v->fbi.dbdx = (Bit32s)(((v->fbi.svert[0].b - v->fbi.svert[1].b) * dx1 - (v->fbi.svert[0].b - v->fbi.svert[2].b) * dx2) * tdiv);
784 v->fbi.dbdy = (Bit32s)(((v->fbi.svert[0].b - v->fbi.svert[2].b) * dy1 - (v->fbi.svert[0].b - v->fbi.svert[1].b) * dy2) * tdiv);
785 }
786
787 /* set up alpha */
788 if (v->reg[sSetupMode].u & (1 << 1))
789 {
790 v->fbi.starta = (Bit32s)(v->fbi.svert[0].a * 4096.0);
791 v->fbi.dadx = (Bit32s)(((v->fbi.svert[0].a - v->fbi.svert[1].a) * dx1 - (v->fbi.svert[0].a - v->fbi.svert[2].a) * dx2) * tdiv);
792 v->fbi.dady = (Bit32s)(((v->fbi.svert[0].a - v->fbi.svert[2].a) * dy1 - (v->fbi.svert[0].a - v->fbi.svert[1].a) * dy2) * tdiv);
793 }
794
795 /* set up Z */
796 if (v->reg[sSetupMode].u & (1 << 2))
797 {
798 v->fbi.startz = (Bit32s)(v->fbi.svert[0].z * 4096.0);
799 v->fbi.dzdx = (Bit32s)(((v->fbi.svert[0].z - v->fbi.svert[1].z) * dx1 - (v->fbi.svert[0].z - v->fbi.svert[2].z) * dx2) * tdiv);
800 v->fbi.dzdy = (Bit32s)(((v->fbi.svert[0].z - v->fbi.svert[2].z) * dy1 - (v->fbi.svert[0].z - v->fbi.svert[1].z) * dy2) * tdiv);
801 }
802
803 /* set up Wb */
804 tdiv = divisor * 65536.0f * 65536.0f;
805 if (v->reg[sSetupMode].u & (1 << 3))
806 {
807 v->fbi.startw = v->tmu[0].startw = v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].wb * 65536.0f * 65536.0f);
808 v->fbi.dwdx = v->tmu[0].dwdx = v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].wb - v->fbi.svert[1].wb) * dx1 - (v->fbi.svert[0].wb - v->fbi.svert[2].wb) * dx2) * tdiv);
809 v->fbi.dwdy = v->tmu[0].dwdy = v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].wb - v->fbi.svert[2].wb) * dy1 - (v->fbi.svert[0].wb - v->fbi.svert[1].wb) * dy2) * tdiv);
810 }
811
812 /* set up W0 */
813 if (v->reg[sSetupMode].u & (1 << 4))
814 {
815 v->tmu[0].startw = v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].w0 * 65536.0f * 65536.0f);
816 v->tmu[0].dwdx = v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].w0 - v->fbi.svert[1].w0) * dx1 - (v->fbi.svert[0].w0 - v->fbi.svert[2].w0) * dx2) * tdiv);
817 v->tmu[0].dwdy = v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].w0 - v->fbi.svert[2].w0) * dy1 - (v->fbi.svert[0].w0 - v->fbi.svert[1].w0) * dy2) * tdiv);
818 }
819
820 /* set up S0,T0 */
821 if (v->reg[sSetupMode].u & (1 << 5))
822 {
823 v->tmu[0].starts = v->tmu[1].starts = (Bit64s)(v->fbi.svert[0].s0 * 65536.0f * 65536.0f);
824 v->tmu[0].dsdx = v->tmu[1].dsdx = (Bit64s)(((v->fbi.svert[0].s0 - v->fbi.svert[1].s0) * dx1 - (v->fbi.svert[0].s0 - v->fbi.svert[2].s0) * dx2) * tdiv);
825 v->tmu[0].dsdy = v->tmu[1].dsdy = (Bit64s)(((v->fbi.svert[0].s0 - v->fbi.svert[2].s0) * dy1 - (v->fbi.svert[0].s0 - v->fbi.svert[1].s0) * dy2) * tdiv);
826 v->tmu[0].startt = v->tmu[1].startt = (Bit64s)(v->fbi.svert[0].t0 * 65536.0f * 65536.0f);
827 v->tmu[0].dtdx = v->tmu[1].dtdx = (Bit64s)(((v->fbi.svert[0].t0 - v->fbi.svert[1].t0) * dx1 - (v->fbi.svert[0].t0 - v->fbi.svert[2].t0) * dx2) * tdiv);
828 v->tmu[0].dtdy = v->tmu[1].dtdy = (Bit64s)(((v->fbi.svert[0].t0 - v->fbi.svert[2].t0) * dy1 - (v->fbi.svert[0].t0 - v->fbi.svert[1].t0) * dy2) * tdiv);
829 }
830
831 /* set up W1 */
832 if (v->reg[sSetupMode].u & (1 << 6))
833 {
834 v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].w1 * 65536.0f * 65536.0f);
835 v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].w1 - v->fbi.svert[1].w1) * dx1 - (v->fbi.svert[0].w1 - v->fbi.svert[2].w1) * dx2) * tdiv);
836 v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].w1 - v->fbi.svert[2].w1) * dy1 - (v->fbi.svert[0].w1 - v->fbi.svert[1].w1) * dy2) * tdiv);
837 }
838
839 /* set up S1,T1 */
840 if (v->reg[sSetupMode].u & (1 << 7))
841 {
842 v->tmu[1].starts = (Bit64s)(v->fbi.svert[0].s1 * 65536.0f * 65536.0f);
843 v->tmu[1].dsdx = (Bit64s)(((v->fbi.svert[0].s1 - v->fbi.svert[1].s1) * dx1 - (v->fbi.svert[0].s1 - v->fbi.svert[2].s1) * dx2) * tdiv);
844 v->tmu[1].dsdy = (Bit64s)(((v->fbi.svert[0].s1 - v->fbi.svert[2].s1) * dy1 - (v->fbi.svert[0].s1 - v->fbi.svert[1].s1) * dy2) * tdiv);
845 v->tmu[1].startt = (Bit64s)(v->fbi.svert[0].t1 * 65536.0f * 65536.0f);
846 v->tmu[1].dtdx = (Bit64s)(((v->fbi.svert[0].t1 - v->fbi.svert[1].t1) * dx1 - (v->fbi.svert[0].t1 - v->fbi.svert[2].t1) * dx2) * tdiv);
847 v->tmu[1].dtdy = (Bit64s)(((v->fbi.svert[0].t1 - v->fbi.svert[2].t1) * dy1 - (v->fbi.svert[0].t1 - v->fbi.svert[1].t1) * dy2) * tdiv);
848 }
849
850 /* draw the triangle */
851 v->fbi.cheating_allowed = 1;
852 return triangle();
853 }
854
855
begin_triangle()856 static Bit32s begin_triangle()
857 {
858 setup_vertex *sv = &v->fbi.svert[2];
859
860 /* extract all the data from registers */
861 sv->x = v->reg[sVx].f;
862 sv->y = v->reg[sVy].f;
863 sv->wb = v->reg[sWb].f;
864 sv->w0 = v->reg[sWtmu0].f;
865 sv->s0 = v->reg[sS_W0].f;
866 sv->t0 = v->reg[sT_W0].f;
867 sv->w1 = v->reg[sWtmu1].f;
868 sv->s1 = v->reg[sS_Wtmu1].f;
869 sv->t1 = v->reg[sT_Wtmu1].f;
870 sv->a = v->reg[sAlpha].f;
871 sv->r = v->reg[sRed].f;
872 sv->g = v->reg[sGreen].f;
873 sv->b = v->reg[sBlue].f;
874
875 /* spread it across all three verts and reset the count */
876 v->fbi.svert[0] = v->fbi.svert[1] = v->fbi.svert[2];
877 v->fbi.sverts = 1;
878
879 return 0;
880 }
881
882
draw_triangle()883 static Bit32s draw_triangle()
884 {
885 setup_vertex *sv = &v->fbi.svert[2];
886 int cycles = 0;
887
888 /* for strip mode, shuffle vertex 1 down to 0 */
889 if (!(v->reg[sSetupMode].u & (1 << 16)))
890 v->fbi.svert[0] = v->fbi.svert[1];
891
892 /* copy 2 down to 1 regardless */
893 v->fbi.svert[1] = v->fbi.svert[2];
894
895 /* extract all the data from registers */
896 sv->x = v->reg[sVx].f;
897 sv->y = v->reg[sVy].f;
898 sv->wb = v->reg[sWb].f;
899 sv->w0 = v->reg[sWtmu0].f;
900 sv->s0 = v->reg[sS_W0].f;
901 sv->t0 = v->reg[sT_W0].f;
902 sv->w1 = v->reg[sWtmu1].f;
903 sv->s1 = v->reg[sS_Wtmu1].f;
904 sv->t1 = v->reg[sT_Wtmu1].f;
905 sv->a = v->reg[sAlpha].f;
906 sv->r = v->reg[sRed].f;
907 sv->g = v->reg[sGreen].f;
908 sv->b = v->reg[sBlue].f;
909
910 /* if we have enough verts, go ahead and draw */
911 if (++v->fbi.sverts >= 3)
912 cycles = setup_and_draw_triangle();
913
914 return cycles;
915 }
916
917
raster_fastfill(void * destbase,Bit32s y,const poly_extent * extent,const void * extradata,int threadid)918 static void raster_fastfill(void *destbase, Bit32s y, const poly_extent *extent, const void *extradata, int threadid)
919 {
920 const poly_extra_data *extra = (const poly_extra_data *)extradata;
921 voodoo_state *v = extra->state;
922 stats_block *stats = &v->thread_stats[threadid];
923 Bit32s startx = extent->startx;
924 Bit32s stopx = extent->stopx;
925 int scry, x;
926
927 /* determine the screen Y */
928 scry = y;
929 if (FBZMODE_Y_ORIGIN(v->reg[fbzMode].u))
930 scry = (v->fbi.yorigin - y) & 0x3ff;
931
932 /* fill this RGB row */
933 if (FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u))
934 {
935 const Bit16u *ditherow = &extra->dither[(y & 3) * 4];
936 Bit64u expanded = *(Bit64u *)ditherow;
937 Bit16u *dest = (Bit16u *)destbase + scry * v->fbi.rowpixels;
938
939 for (x = startx; x < stopx && (x & 3) != 0; x++)
940 dest[x] = ditherow[x & 3];
941 for ( ; x < (stopx & ~3); x += 4)
942 *(Bit64u *)&dest[x] = expanded;
943 for ( ; x < stopx; x++)
944 dest[x] = ditherow[x & 3];
945 stats->pixels_out += stopx - startx;
946 }
947
948 /* fill this dest buffer row */
949 if (FBZMODE_AUX_BUFFER_MASK(v->reg[fbzMode].u) && v->fbi.auxoffs != (Bit32u)~0)
950 {
951 Bit16u color = v->reg[zaColor].u;
952 Bit64u expanded = ((Bit64u)color << 48) | ((Bit64u)color << 32) | (color << 16) | color;
953 Bit16u *dest = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs) + scry * v->fbi.rowpixels;
954
955 for (x = startx; x < stopx && (x & 3) != 0; x++)
956 dest[x] = color;
957 for ( ; x < (stopx & ~3); x += 4)
958 *(Bit64u *)&dest[x] = expanded;
959 for ( ; x < stopx; x++)
960 dest[x] = color;
961 }
962 }
963
964
poly_render_triangle_custom(void * dest,const rectangle * cliprect,int startscanline,int numscanlines,const poly_extent * extents,poly_extra_data * extra)965 Bit32u poly_render_triangle_custom(void *dest, const rectangle *cliprect, int startscanline, int numscanlines, const poly_extent *extents, poly_extra_data *extra)
966 {
967 Bit32s curscan, scaninc;
968 Bit32s v1yclip, v3yclip;
969 Bit32s pixels = 0;
970
971 /* clip coordinates */
972 if (cliprect != NULL)
973 {
974 v1yclip = MAX(startscanline, cliprect->min_y);
975 v3yclip = MIN(startscanline + numscanlines, cliprect->max_y + 1);
976 }
977 else
978 {
979 v1yclip = startscanline;
980 v3yclip = startscanline + numscanlines;
981 }
982 if (v3yclip - v1yclip <= 0)
983 return 0;
984
985 /* compute the X extents for each scanline */
986 for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
987 {
988 int extnum=0;
989
990 /* determine how much to advance to hit the next bucket */
991 scaninc = 1;
992
993 /* iterate over extents */
994 {
995 const poly_extent *extent = &extents[(curscan + extnum) - startscanline];
996 Bit32s istartx = extent->startx, istopx = extent->stopx;
997
998 /* force start < stop */
999 if (istartx > istopx)
1000 {
1001 Bit32s temp = istartx;
1002 istartx = istopx;
1003 istopx = temp;
1004 }
1005
1006 /* apply left/right clipping */
1007 if (cliprect != NULL)
1008 {
1009 if (istartx < cliprect->min_x)
1010 istartx = cliprect->min_x;
1011 if (istopx > cliprect->max_x)
1012 istopx = cliprect->max_x + 1;
1013 }
1014
1015 /* set the extent and update the total pixel count */
1016 raster_fastfill(dest,curscan,extent,extra,0);
1017 if (istartx < istopx)
1018 pixels += istopx - istartx;
1019 }
1020 }
1021 #if KEEP_STATISTICS
1022 poly->unit_max = MAX(poly->unit_max, poly->unit_next);
1023 #endif
1024
1025 return pixels;
1026 }
1027
fastfill(voodoo_state * v)1028 Bit32s fastfill(voodoo_state *v)
1029 {
1030 int sx = (v->reg[clipLeftRight].u >> 16) & 0x3ff;
1031 int ex = (v->reg[clipLeftRight].u >> 0) & 0x3ff;
1032 int sy = (v->reg[clipLowYHighY].u >> 16) & 0x3ff;
1033 int ey = (v->reg[clipLowYHighY].u >> 0) & 0x3ff;
1034 poly_extent extents[64];
1035 Bit16u dithermatrix[16];
1036 Bit16u *drawbuf = NULL;
1037 Bit32u pixels = 0;
1038 int extnum, x, y;
1039
1040 /* if we're not clearing either, take no time */
1041 if (!FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u) && !FBZMODE_AUX_BUFFER_MASK(v->reg[fbzMode].u))
1042 return 0;
1043
1044 /* are we clearing the RGB buffer? */
1045 if (FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u))
1046 {
1047 /* determine the draw buffer */
1048 int destbuf = (v->type >= VOODOO_BANSHEE) ? 1 : FBZMODE_DRAW_BUFFER(v->reg[fbzMode].u);
1049 switch (destbuf)
1050 {
1051 case 0: /* front buffer */
1052 drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
1053 break;
1054
1055 case 1: /* back buffer */
1056 drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
1057 break;
1058
1059 default: /* reserved */
1060 break;
1061 }
1062
1063 /* determine the dither pattern */
1064 for (y = 0; y < 4; y++)
1065 {
1066 DECLARE_DITHER_POINTERS;
1067 UNUSED(dither);
1068 COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
1069 for (x = 0; x < 4; x++)
1070 {
1071 int r = v->reg[color1].rgb.r;
1072 int g = v->reg[color1].rgb.g;
1073 int b = v->reg[color1].rgb.b;
1074
1075 APPLY_DITHER(v->reg[fbzMode].u, x, dither_lookup, r, g, b);
1076 dithermatrix[y*4 + x] = (r << 11) | (g << 5) | b;
1077 }
1078 }
1079 }
1080
1081 /* fill in a block of extents */
1082 extents[0].startx = sx;
1083 extents[0].stopx = ex;
1084 for (extnum = 1; extnum < (int)ARRAY_LENGTH(extents); extnum++)
1085 extents[extnum] = extents[0];
1086
1087 poly_extra_data extra;
1088 /* iterate over blocks of extents */
1089 for (y = sy; y < ey; y += ARRAY_LENGTH(extents))
1090 {
1091 int count = MIN(ey - y, (int) ARRAY_LENGTH(extents));
1092
1093 extra.state = v;
1094 memcpy(extra.dither, dithermatrix, sizeof(extra.dither));
1095
1096 pixels += poly_render_triangle_custom(drawbuf, NULL, y, count, extents, &extra);
1097 }
1098
1099 /* 2 pixels per clock */
1100 return pixels / 2;
1101 }
1102
swap_buffers(voodoo_state * v)1103 void swap_buffers(voodoo_state *v)
1104 {
1105 int count;
1106
1107 /* force a partial update */
1108 v->fbi.video_changed = 1;
1109
1110 /* keep a history of swap intervals */
1111 count = v->fbi.vblank_count;
1112 if (count > 15)
1113 count = 15;
1114 v->reg[fbiSwapHistory].u = (v->reg[fbiSwapHistory].u << 4) | count;
1115
1116 /* rotate the buffers */
1117 if (v->type <= VOODOO_2)
1118 {
1119 if (v->type < VOODOO_2 || !v->fbi.vblank_dont_swap)
1120 {
1121 if (v->fbi.rgboffs[2] == (Bit32u)~0)
1122 {
1123 v->fbi.frontbuf = 1 - v->fbi.frontbuf;
1124 v->fbi.backbuf = 1 - v->fbi.frontbuf;
1125 }
1126 else
1127 {
1128 v->fbi.frontbuf = (v->fbi.frontbuf + 1) % 3;
1129 v->fbi.backbuf = (v->fbi.frontbuf + 1) % 3;
1130 }
1131 }
1132 }
1133 else
1134 v->fbi.rgboffs[0] = v->reg[leftOverlayBuf].u & v->fbi.mask & ~0x0f;
1135
1136 /* decrement the pending count and reset our state */
1137 if (v->fbi.swaps_pending)
1138 v->fbi.swaps_pending--;
1139 v->fbi.vblank_count = 0;
1140 v->fbi.vblank_swap_pending = 0;
1141 }
1142
1143 /*-------------------------------------------------
1144 swapbuffer - execute the 'swapbuffer'
1145 command
1146 -------------------------------------------------*/
swapbuffer(voodoo_state * v,Bit32u data)1147 Bit32s swapbuffer(voodoo_state *v, Bit32u data)
1148 {
1149 /* set the don't swap value for Voodoo 2 */
1150 v->fbi.vblank_swap_pending = 1;
1151 v->fbi.vblank_swap = (data >> 1) & 0xff;
1152 v->fbi.vblank_dont_swap = (data >> 9) & 1;
1153
1154 /* if we're not syncing to the retrace, process the command immediately */
1155 if (!(data & 1))
1156 {
1157 BX_LOCK(fifo_mutex);
1158 swap_buffers(v);
1159 BX_UNLOCK(fifo_mutex);
1160 return 0;
1161 } else {
1162 if (v->vtimer_running) {
1163 bx_wait_sem(&vertical_sem);
1164 }
1165 }
1166
1167 /* determine how many cycles to wait; we deliberately overshoot here because */
1168 /* the final count gets updated on the VBLANK */
1169 return (v->fbi.vblank_swap + 1) * v->freq / 30;
1170 }
1171
1172
1173 /*************************************
1174 *
1175 * Statistics management
1176 *
1177 *************************************/
1178
accumulate_statistics(voodoo_state * v,const stats_block * stats)1179 static void accumulate_statistics(voodoo_state *v, const stats_block *stats)
1180 {
1181 /* apply internal voodoo statistics */
1182 v->reg[fbiPixelsIn].u += stats->pixels_in;
1183 v->reg[fbiPixelsOut].u += stats->pixels_out;
1184 v->reg[fbiChromaFail].u += stats->chroma_fail;
1185 v->reg[fbiZfuncFail].u += stats->zfunc_fail;
1186 v->reg[fbiAfuncFail].u += stats->afunc_fail;
1187 }
1188
update_statistics(voodoo_state * v,int accumulate)1189 static void update_statistics(voodoo_state *v, int accumulate)
1190 {
1191 int threadnum;
1192
1193 /* accumulate/reset statistics from all units */
1194 for (threadnum = 0; threadnum < WORK_MAX_THREADS; threadnum++)
1195 {
1196 if (accumulate)
1197 accumulate_statistics(v, &v->thread_stats[threadnum]);
1198 memset(&v->thread_stats[threadnum], 0, sizeof(v->thread_stats[threadnum]));
1199 }
1200
1201 /* accumulate/reset statistics from the LFB */
1202 if (accumulate)
1203 accumulate_statistics(v, &v->fbi.lfb_stats);
1204 memset(&v->fbi.lfb_stats, 0, sizeof(v->fbi.lfb_stats));
1205 }
1206
reset_counters(voodoo_state * v)1207 void reset_counters(voodoo_state *v)
1208 {
1209 update_statistics(v, FALSE);
1210 v->reg[fbiPixelsIn].u = 0;
1211 v->reg[fbiChromaFail].u = 0;
1212 v->reg[fbiZfuncFail].u = 0;
1213 v->reg[fbiAfuncFail].u = 0;
1214 v->reg[fbiPixelsOut].u = 0;
1215 }
1216
1217
soft_reset(voodoo_state * v)1218 void soft_reset(voodoo_state *v)
1219 {
1220 reset_counters(v);
1221 v->reg[fbiTrianglesOut].u = 0;
1222 fifo_reset(&v->fbi.fifo);
1223 fifo_reset(&v->pci.fifo);
1224 v->pci.op_pending = 0;
1225 }
1226
1227
recompute_video_memory(voodoo_state * v)1228 void recompute_video_memory(voodoo_state *v)
1229 {
1230 Bit32u buffer_pages = FBIINIT2_VIDEO_BUFFER_OFFSET(v->reg[fbiInit2].u);
1231 Bit32u fifo_start_page = FBIINIT4_MEMORY_FIFO_START_ROW(v->reg[fbiInit4].u);
1232 Bit32u fifo_last_page = FBIINIT4_MEMORY_FIFO_STOP_ROW(v->reg[fbiInit4].u);
1233 Bit32u memory_config;
1234 int buf;
1235
1236 BX_DEBUG(("buffer_pages 0x%x", buffer_pages));
1237 /* memory config is determined differently between V1 and V2 */
1238 memory_config = FBIINIT2_ENABLE_TRIPLE_BUF(v->reg[fbiInit2].u);
1239 if (v->type == VOODOO_2 && memory_config == 0)
1240 memory_config = FBIINIT5_BUFFER_ALLOCATION(v->reg[fbiInit5].u);
1241
1242 /* tiles are 64x16/32; x_tiles specifies how many half-tiles */
1243 v->fbi.tile_width = (v->type == VOODOO_1) ? 64 : 32;
1244 v->fbi.tile_height = (v->type == VOODOO_1) ? 16 : 32;
1245 v->fbi.x_tiles = FBIINIT1_X_VIDEO_TILES(v->reg[fbiInit1].u);
1246 if (v->type == VOODOO_2)
1247 {
1248 v->fbi.x_tiles = (v->fbi.x_tiles << 1) |
1249 (FBIINIT1_X_VIDEO_TILES_BIT5(v->reg[fbiInit1].u) << 5) |
1250 (FBIINIT6_X_VIDEO_TILES_BIT0(v->reg[fbiInit6].u));
1251 }
1252 v->fbi.rowpixels = v->fbi.tile_width * v->fbi.x_tiles;
1253
1254 /* first RGB buffer always starts at 0 */
1255 v->fbi.rgboffs[0] = 0;
1256
1257 if (buffer_pages>0) {
1258 /* second RGB buffer starts immediately afterwards */
1259 v->fbi.rgboffs[1] = buffer_pages * 0x1000;
1260
1261 /* remaining buffers are based on the config */
1262 switch (memory_config) {
1263 case 3: /* reserved */
1264 BX_ERROR(("Unexpected memory configuration in recompute_video_memory!"));
1265 break;
1266
1267 case 0: /* 2 color buffers, 1 aux buffer */
1268 v->fbi.rgboffs[2] = ~0;
1269 v->fbi.auxoffs = 2 * buffer_pages * 0x1000;
1270 break;
1271
1272 case 1: /* 3 color buffers, 0 aux buffers */
1273 v->fbi.rgboffs[2] = 2 * buffer_pages * 0x1000;
1274 v->fbi.auxoffs = 3 * buffer_pages * 0x1000;
1275 break;
1276
1277 case 2: /* 3 color buffers, 1 aux buffers */
1278 v->fbi.rgboffs[2] = 2 * buffer_pages * 0x1000;
1279 v->fbi.auxoffs = 3 * buffer_pages * 0x1000;
1280 break;
1281 }
1282 }
1283
1284 /* clamp the RGB buffers to video memory */
1285 for (buf = 0; buf < 3; buf++)
1286 if (v->fbi.rgboffs[buf] != (Bit32u)~0 && v->fbi.rgboffs[buf] > v->fbi.mask)
1287 v->fbi.rgboffs[buf] = v->fbi.mask;
1288
1289 /* clamp the aux buffer to video memory */
1290 if (v->fbi.auxoffs != (Bit32u)~0 && v->fbi.auxoffs > v->fbi.mask)
1291 v->fbi.auxoffs = v->fbi.mask;
1292
1293 /* compute the memory FIFO location and size */
1294 if (fifo_last_page > v->fbi.mask / 0x1000)
1295 fifo_last_page = v->fbi.mask / 0x1000;
1296
1297 /* is it valid and enabled? */
1298 if ((fifo_start_page <= fifo_last_page) && v->fbi.fifo.enabled)
1299 {
1300 v->fbi.fifo.base = (Bit32u *)(v->fbi.ram + fifo_start_page * 0x1000);
1301 v->fbi.fifo.size = (fifo_last_page + 1 - fifo_start_page) * 0x1000 / 4;
1302 if (v->fbi.fifo.size > 65536*2)
1303 v->fbi.fifo.size = 65536*2;
1304 }
1305
1306 /* if not, disable the FIFO */
1307 else
1308 {
1309 v->fbi.fifo.base = NULL;
1310 v->fbi.fifo.size = 0;
1311 }
1312
1313 /* reset the FIFO */
1314 fifo_reset(&v->fbi.fifo);
1315 if (fifo_empty_locked(&v->pci.fifo)) v->pci.op_pending = 0;
1316
1317 /* reset our front/back buffers if they are out of range */
1318 if (v->fbi.rgboffs[2] == (Bit32u)~0)
1319 {
1320 if (v->fbi.frontbuf == 2)
1321 v->fbi.frontbuf = 0;
1322 if (v->fbi.backbuf == 2)
1323 v->fbi.backbuf = 0;
1324 }
1325 }
1326
1327
voodoo2_bitblt_mux(Bit8u rop,Bit8u * dst_ptr,Bit8u * src_ptr,int dpxsize)1328 void voodoo2_bitblt_mux(Bit8u rop, Bit8u *dst_ptr, Bit8u *src_ptr, int dpxsize)
1329 {
1330 Bit8u mask, inbits, outbits;
1331
1332 for (int i = 0; i < dpxsize; i++) {
1333 mask = 0x80;
1334 outbits = 0;
1335 for (int b = 7; b >= 0; b--) {
1336 inbits = (*dst_ptr & mask) > 0;
1337 inbits |= ((*src_ptr & mask) > 0) << 1;
1338 outbits |= ((rop & (1 << inbits)) > 0) << b;
1339 mask >>= 1;
1340 }
1341 *dst_ptr++ = outbits;
1342 src_ptr++;
1343 }
1344 }
1345
1346 #define BLT v->blt
1347
clip_check(Bit16u x,Bit16u y)1348 bool clip_check(Bit16u x, Bit16u y)
1349 {
1350 if (!BLT.clip_en)
1351 return 1;
1352 if ((x >= BLT.clipx0) && (x < BLT.clipx1) &&
1353 (y >= BLT.clipy0) && (y < BLT.clipy1)) {
1354 return 1;
1355 }
1356 return 0;
1357 }
1358
1359
chroma_check(Bit8u * ptr,Bit16u min,Bit16u max,bool dst)1360 Bit8u chroma_check(Bit8u *ptr, Bit16u min, Bit16u max, bool dst)
1361 {
1362 Bit8u pass = 0;
1363 Bit32u color;
1364 Bit8u r, g, b, rmin, rmax, gmin, gmax, bmin, bmax;
1365
1366 color = *ptr;
1367 color |= *(ptr + 1) << 8;
1368 r = (color >> 11);
1369 g = (color >> 5) & 0x3f;
1370 b = color & 0x1f;
1371 rmin = (min >> 11) & 0x1f;
1372 rmax = (max >> 11) & 0x1f;
1373 gmin = (min >> 5) & 0x3f;
1374 gmax = (max >> 5) & 0x3f;
1375 bmin = min & 0x1f;
1376 bmax = max & 0x1f;
1377 pass = ((r >= rmin) && (r <= rmax) && (g >= gmin) && (g <= gmax) &&
1378 (b >= bmin) && (b <= bmax));
1379 if (!dst) pass <<= 1;
1380 return pass;
1381 }
1382
voodoo2_bitblt(void)1383 void voodoo2_bitblt(void)
1384 {
1385 Bit8u cmd, rop = 0, *dst_ptr, *src_ptr;
1386 Bit16u c, cols, src_x, src_y, r, rows, size, x;
1387 Bit32u src_base, doffset, soffset, dstride, sstride;
1388 bool src_tiled, dst_tiled, x_dir, y_dir;
1389 int tmpval;
1390
1391 cmd = (Bit8u)(v->reg[bltCommand].u & 0x07);
1392 BLT.src_fmt = (Bit8u)((v->reg[bltCommand].u >> 3) & 0x1f);
1393 BLT.src_swizzle = (Bit8u)((v->reg[bltCommand].u >> 8) & 0x03);
1394 BLT.chroma_en = (Bit8u)((v->reg[bltCommand].u >> 10) & 0x01);
1395 BLT.chroma_en |= (Bit8u)((v->reg[bltCommand].u >> 11) & 0x02);
1396 src_tiled = ((v->reg[bltCommand].u >> 14) & 0x01);
1397 dst_tiled = ((v->reg[bltCommand].u >> 15) & 0x01);
1398 BLT.clip_en = ((v->reg[bltCommand].u >> 16) & 0x01);
1399 BLT.transp = ((v->reg[bltCommand].u >> 17) & 0x01);
1400 BLT.dst_w = (v->reg[bltSize].u & 0x7ff) + 1;
1401 x_dir = (v->reg[bltSize].u >> 11) & 1;
1402 tmpval = (v->reg[bltSize].u & 0xfff);
1403 if (x_dir && ((cmd == 0) || (cmd == 2))) {
1404 tmpval |= 0xfffff000;
1405 }
1406 BLT.dst_w = abs(tmpval) + 1;
1407 y_dir = (v->reg[bltSize].u >> 27) & 1;
1408 tmpval = ((v->reg[bltSize].u >> 16) & 0xfff);
1409 if (y_dir && ((cmd == 0) || (cmd == 2))) {
1410 tmpval |= 0xfffff000;
1411 }
1412 BLT.dst_h = abs(tmpval) + 1;
1413 BLT.dst_x = (Bit16u)(v->reg[bltDstXY].u & 0x7ff);
1414 BLT.dst_y = (Bit16u)((v->reg[bltDstXY].u >> 16) & 0x7ff);
1415 if (src_tiled) {
1416 src_base = (v->reg[bltSrcBaseAddr].u & 0x3ff) << 12;
1417 sstride = (v->reg[bltXYStrides].u & 0x3f) << 6;
1418 } else {
1419 src_base = v->reg[bltSrcBaseAddr].u & 0x3ffff8;
1420 sstride = v->reg[bltXYStrides].u & 0xff8;
1421 }
1422 if (dst_tiled) {
1423 BLT.dst_base = (v->reg[bltDstBaseAddr].u & 0x3ff) << 12;
1424 BLT.dst_pitch = (v->reg[bltXYStrides].u >> 10) & 0xfc0;
1425 } else {
1426 BLT.dst_base = v->reg[bltDstBaseAddr].u & 0x3ffff8;
1427 BLT.dst_pitch = (v->reg[bltXYStrides].u >> 16) & 0xff8;
1428 }
1429 BLT.h2s_mode = 0;
1430 switch (cmd) {
1431 case 0:
1432 BX_DEBUG(("Screen-to-Screen bitBLT: w = %d, h = %d, rop0 = %d",
1433 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1434 src_x = (Bit16u)(v->reg[bltSrcXY].u & 0x7ff);
1435 src_y = (Bit16u)((v->reg[bltSrcXY].u >> 16) & 0x7ff);
1436 cols = BLT.dst_w;
1437 rows = BLT.dst_h;
1438 dstride = BLT.dst_pitch;
1439 doffset = BLT.dst_base + BLT.dst_y * dstride + BLT.dst_x * 2;
1440 soffset = src_base + src_y * sstride + src_x * 2;
1441 for (r = 0; r <= rows; r++) {
1442 dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1443 src_ptr = &v->fbi.ram[soffset & v->fbi.mask];
1444 x = BLT.dst_x;
1445 for (c = 0; c < cols; c++) {
1446 if (clip_check(x, BLT.dst_y)) {
1447 if (BLT.chroma_en & 1) {
1448 rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1449 }
1450 if (BLT.chroma_en & 2) {
1451 rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1452 }
1453 voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1454 }
1455 if (x_dir) {
1456 dst_ptr -= 2;
1457 src_ptr -= 2;
1458 x--;
1459 } else {
1460 dst_ptr += 2;
1461 src_ptr += 2;
1462 x++;
1463 }
1464 }
1465 if (y_dir) {
1466 doffset -= dstride;
1467 soffset -= sstride;
1468 BLT.dst_y--;
1469 } else {
1470 doffset += dstride;
1471 soffset += sstride;
1472 BLT.dst_y++;
1473 }
1474 }
1475 break;
1476 case 1:
1477 BX_DEBUG(("CPU-to-Screen bitBLT: w = %d, h = %d, rop0 = %d",
1478 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1479 BLT.h2s_mode = 1;
1480 BLT.cur_x = BLT.dst_x;
1481 break;
1482 case 2:
1483 BX_DEBUG(("Rectangle fill: w = %d, h = %d, rop0 = %d",
1484 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1485 cols = BLT.dst_w;
1486 rows = BLT.dst_h;
1487 dstride = BLT.dst_pitch;
1488 doffset = BLT.dst_base + BLT.dst_y * dstride + BLT.dst_x * 2;
1489 src_ptr = BLT.fgcolor;
1490 for (r = 0; r <= rows; r++) {
1491 dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1492 x = BLT.dst_x;
1493 for (c = 0; c < cols; c++) {
1494 if (clip_check(x, BLT.dst_y)) {
1495 if (BLT.chroma_en & 2) {
1496 rop = chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1497 }
1498 voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1499 }
1500 if (x_dir) {
1501 dst_ptr -= 2;
1502 x--;
1503 } else {
1504 dst_ptr += 2;
1505 x++;
1506 }
1507 }
1508 if (y_dir) {
1509 doffset -= dstride;
1510 BLT.dst_y--;
1511 } else {
1512 doffset += dstride;
1513 BLT.dst_y++;
1514 }
1515 }
1516 break;
1517 case 3:
1518 BLT.dst_x = (Bit16u)(v->reg[bltDstXY].u & 0x1ff);
1519 BLT.dst_y = (Bit16u)((v->reg[bltDstXY].u >> 16) & 0x3ff);
1520 cols = (Bit16u)(v->reg[bltSize].u & 0x1ff);
1521 rows = (Bit16u)((v->reg[bltSize].u >> 16) & 0x3ff);
1522 BX_DEBUG(("SGRAM fill: x = %d y = %d w = %d h = %d color = 0x%02x%02x",
1523 BLT.dst_x, BLT.dst_y, cols, rows, BLT.fgcolor[1], BLT.fgcolor[0]));
1524 dstride = (1 << 12);
1525 doffset = BLT.dst_y * dstride;
1526 for (r = 0; r <= rows; r++) {
1527 if (r == 0) {
1528 dst_ptr = &v->fbi.ram[(doffset + BLT.dst_x * 8) & v->fbi.mask];
1529 size = dstride / 2 - (BLT.dst_x * 4);
1530 } else {
1531 dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1532 if (r == rows) {
1533 size = cols * 4;
1534 } else {
1535 size = dstride / 2;
1536 }
1537 }
1538 for (c = 0; c < size; c++) {
1539 *dst_ptr = BLT.fgcolor[0];
1540 *(dst_ptr + 1) = BLT.fgcolor[1];
1541 dst_ptr += 2;
1542 }
1543 doffset += dstride;
1544 }
1545 break;
1546 default:
1547 BX_ERROR(("Voodoo bitBLT: unknown command %d)", cmd));
1548 }
1549 v->fbi.video_changed = 1;
1550 }
1551
voodoo2_bitblt_cpu_to_screen(Bit32u data)1552 void voodoo2_bitblt_cpu_to_screen(Bit32u data)
1553 {
1554 Bit8u rop = 0, *dst_ptr, *dst_ptr1, *src_ptr, color[2];
1555 Bit8u b, c, g, i, j, r;
1556 bool set;
1557 Bit8u colfmt = BLT.src_fmt & 7, rgbfmt = BLT.src_fmt >> 3;
1558 Bit16u count = BLT.dst_x + BLT.dst_w - BLT.cur_x;
1559 Bit32u doffset = BLT.dst_base + BLT.dst_y * BLT.dst_pitch + BLT.cur_x * 2;
1560 dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1561
1562 if (BLT.src_swizzle & 1) {
1563 data = bx_bswap32(data);
1564 }
1565 if (BLT.src_swizzle & 2) {
1566 data = (data >> 16) | (data << 16);
1567 }
1568 if ((colfmt == 0) || (colfmt == 1)) {
1569 if (colfmt == 0) {
1570 c = (count > 32) ? 32 : count;
1571 r = 1;
1572 } else {
1573 c = (count > 8) ? 8 : count;
1574 r = (BLT.dst_h > 4) ? 4 : BLT.dst_h;
1575 }
1576 for (j = 0; j < r; j++) {
1577 dst_ptr1 = dst_ptr;
1578 for (i = 0; i < c; i++) {
1579 b = (i & 0x18) + (7 - (i & 7));
1580 set = (data & (1U << b)) > 0;
1581 if (set) {
1582 src_ptr = BLT.fgcolor;
1583 } else {
1584 src_ptr = BLT.bgcolor;
1585 }
1586 if (set || !BLT.transp) {
1587 if (clip_check(BLT.cur_x + i, BLT.dst_y + j)) {
1588 if (BLT.chroma_en & 2) {
1589 rop = chroma_check(dst_ptr1, BLT.dst_col_min, BLT.dst_col_max, 1);
1590 }
1591 voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr1, src_ptr, 2);
1592 }
1593 }
1594 dst_ptr1 += 2;
1595 }
1596 if (colfmt == 0) {
1597 if (c < count) {
1598 BLT.cur_x += c;
1599 } else {
1600 BLT.cur_x = BLT.dst_x;
1601 if (BLT.dst_h > 1) {
1602 BLT.dst_y++;
1603 BLT.dst_h--;
1604 } else {
1605 BLT.h2s_mode = 0;
1606 }
1607 }
1608 } else {
1609 data >>= 8;
1610 dst_ptr += BLT.dst_pitch;
1611 }
1612 }
1613 if (colfmt == 1) {
1614 if (c < count) {
1615 BLT.cur_x += c;
1616 } else {
1617 BLT.cur_x = BLT.dst_x;
1618 if (BLT.dst_h > 4) {
1619 BLT.dst_y += 4;
1620 BLT.dst_h -= 4;
1621 } else {
1622 BLT.h2s_mode = 0;
1623 }
1624 }
1625 }
1626 } else if (colfmt == 2) {
1627 if (rgbfmt & 1) {
1628 BX_ERROR(("Voodoo bitBLT: color order other than RGB not supported yet"));
1629 }
1630 #if BX_BIG_ENDIAN
1631 data = bx_bswap32(data);
1632 #endif
1633 src_ptr = (Bit8u*)&data;
1634 c = (count > 2) ? 2 : count;
1635 for (i = 0; i < c; i++) {
1636 if (clip_check(BLT.cur_x, BLT.dst_y)) {
1637 if (BLT.chroma_en & 1) {
1638 rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1639 }
1640 if (BLT.chroma_en & 2) {
1641 rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1642 }
1643 voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1644 }
1645 dst_ptr += 2;
1646 src_ptr += 2;
1647 BLT.cur_x++;
1648 if (--count == 0) {
1649 BLT.cur_x = BLT.dst_x;
1650 BLT.dst_y++;
1651 if (--BLT.dst_h == 0) {
1652 BLT.h2s_mode = 0;
1653 }
1654 }
1655 }
1656 } else if ((colfmt >= 3) && (colfmt <= 5)) {
1657 if (colfmt > 3) {
1658 BX_ERROR(("Voodoo bitBLT: 24 bpp source dithering not supported yet"));
1659 colfmt = 3;
1660 }
1661 switch (rgbfmt) {
1662 case 1:
1663 r = (Bit8u)((data >> 3) & 0x1f);
1664 g = (Bit8u)((data >> 10) & 0x3f);
1665 b = (Bit8u)((data >> 19) & 0x1f);
1666 break;
1667 case 2:
1668 r = (Bit8u)((data >> 27) & 0x1f);
1669 g = (Bit8u)((data >> 18) & 0x3f);
1670 b = (Bit8u)((data >> 11) & 0x1f);
1671 break;
1672 case 3:
1673 r = (Bit8u)((data >> 11) & 0x1f);
1674 g = (Bit8u)((data >> 18) & 0x3f);
1675 b = (Bit8u)((data >> 27) & 0x1f);
1676 break;
1677 default:
1678 r = (Bit8u)((data >> 19) & 0x1f);
1679 g = (Bit8u)((data >> 10) & 0x3f);
1680 b = (Bit8u)((data >> 3) & 0x1f);
1681 }
1682 color[0] = (Bit8u)((g << 5) | b);
1683 color[1] = (r << 3) | (g >> 3);
1684 src_ptr = color;
1685 if (clip_check(BLT.cur_x, BLT.dst_y)) {
1686 if (BLT.chroma_en & 1) {
1687 rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1688 }
1689 if (BLT.chroma_en & 2) {
1690 rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1691 }
1692 voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1693 }
1694 BLT.cur_x++;
1695 if (--count == 0) {
1696 BLT.cur_x = BLT.dst_x;
1697 BLT.dst_y++;
1698 if (--BLT.dst_h == 0) {
1699 BLT.h2s_mode = 0;
1700 }
1701 }
1702 } else {
1703 BX_ERROR(("CPU-to-Screen bitBLT: unknown color format 0x%02x", colfmt));
1704 }
1705 v->fbi.video_changed = 1;
1706 }
1707
1708
dacdata_w(dac_state * d,Bit8u regnum,Bit8u data)1709 void dacdata_w(dac_state *d, Bit8u regnum, Bit8u data)
1710 {
1711 d->reg[regnum] = data;
1712
1713 /* switch off the DAC register requested */
1714 switch (regnum) {
1715 case 4: // PLLWMA
1716 case 7: // PLLRMA
1717 if (data == 0x0e) {
1718 d->data_size = 1;
1719 } else {
1720 d->data_size = 2;
1721 }
1722 break;
1723 case 5: // PLLDATA
1724 switch (d->reg[4]) { // PLLWMA
1725 case 0x00:
1726 if (d->data_size == 2) {
1727 d->clk0_m = data;
1728 } else if (d->data_size == 1) {
1729 d->clk0_n = data & 0x1f;
1730 d->clk0_p = data >> 5;
1731 }
1732 break;
1733 case 0x0e:
1734 if ((d->data_size == 1) && (data == 0xf8)) {
1735 v->vidclk = 14318184.0f * ((float)(d->clk0_m + 2) / (float)(d->clk0_n + 2)) / (float)(1 << d->clk0_p);
1736 Bit8u dacr6 = d->reg[6] & 0xf0;
1737 if ((dacr6 == 0x20) || (dacr6 == 0x60) || (dacr6 == 0x70)) {
1738 v->vidclk /= 2.0f;
1739 }
1740 Voodoo_update_timing();
1741 }
1742 break;
1743 }
1744 d->data_size--;
1745 break;
1746 }
1747 }
1748
1749
dacdata_r(dac_state * d,Bit8u regnum)1750 void dacdata_r(dac_state *d, Bit8u regnum)
1751 {
1752 Bit8u result = 0xff;
1753
1754 /* switch off the DAC register requested */
1755 switch (regnum) {
1756 case 5: // PLLDATA
1757 switch (d->reg[7]) { // PLLRMA
1758 case 0x00:
1759 if (d->data_size == 2) {
1760 result = d->clk0_m;
1761 } else if (d->data_size == 1) {
1762 result = d->clk0_n | (d->clk0_p << 5);
1763 }
1764 break;
1765 /* this is just to make startup happy */
1766 case 0x01: result = 0x55; break;
1767 case 0x07: result = 0x71; break;
1768 case 0x0b: result = 0x79; break;
1769 }
1770 d->data_size--;
1771 break;
1772
1773 default:
1774 result = d->reg[regnum];
1775 break;
1776 }
1777
1778 /* remember the read result; it is fetched elsewhere */
1779 d->read_result = result;
1780 }
1781
register_w(Bit32u offset,Bit32u data,bool log)1782 void register_w(Bit32u offset, Bit32u data, bool log)
1783 {
1784 Bit32u regnum = (offset) & 0xff;
1785 Bit32u chips = (offset>>8) & 0xf;
1786 Bit64s data64;
1787 static Bit32u count = 0;
1788
1789 if (chips == 0)
1790 chips = 0xf;
1791
1792 /* the first 64 registers can be aliased differently */
1793 if ((offset & 0x800c0) == 0x80000 && v->alt_regmap)
1794 regnum = register_alias_map[offset & 0x3f];
1795 else
1796 regnum = offset & 0xff;
1797
1798 if (log)
1799 BX_DEBUG(("write chip 0x%x reg 0x%x value 0x%08x(%s)", chips, regnum<<2, data, v->regnames[regnum]));
1800
1801 switch (regnum) {
1802 /* Vertex data is 12.4 formatted fixed point */
1803 case fvertexAx:
1804 data = float_to_Bit32s(data, 4);
1805 case vertexAx:
1806 if (chips & 1) v->fbi.ax = (Bit16s)data;
1807 break;
1808
1809 case fvertexAy:
1810 data = float_to_Bit32s(data, 4);
1811 case vertexAy:
1812 if (chips & 1) v->fbi.ay = (Bit16s)data;
1813 break;
1814
1815 case fvertexBx:
1816 data = float_to_Bit32s(data, 4);
1817 case vertexBx:
1818 if (chips & 1) v->fbi.bx = (Bit16s)data;
1819 break;
1820
1821 case fvertexBy:
1822 data = float_to_Bit32s(data, 4);
1823 case vertexBy:
1824 if (chips & 1) v->fbi.by = (Bit16s)data;
1825 break;
1826
1827 case fvertexCx:
1828 data = float_to_Bit32s(data, 4);
1829 case vertexCx:
1830 if (chips & 1) v->fbi.cx = (Bit16s)data;
1831 break;
1832
1833 case fvertexCy:
1834 data = float_to_Bit32s(data, 4);
1835 case vertexCy:
1836 if (chips & 1) v->fbi.cy = (Bit16s)data;
1837 break;
1838
1839 /* RGB data is 12.12 formatted fixed point */
1840 case fstartR:
1841 data = float_to_Bit32s(data, 12);
1842 case startR:
1843 if (chips & 1) v->fbi.startr = (Bit32s)(data << 8) >> 8;
1844 break;
1845
1846 case fstartG:
1847 data = float_to_Bit32s(data, 12);
1848 case startG:
1849 if (chips & 1) v->fbi.startg = (Bit32s)(data << 8) >> 8;
1850 break;
1851
1852 case fstartB:
1853 data = float_to_Bit32s(data, 12);
1854 case startB:
1855 if (chips & 1) v->fbi.startb = (Bit32s)(data << 8) >> 8;
1856 break;
1857
1858 case fstartA:
1859 data = float_to_Bit32s(data, 12);
1860 case startA:
1861 if (chips & 1) v->fbi.starta = (Bit32s)(data << 8) >> 8;
1862 break;
1863
1864 case fdRdX:
1865 data = float_to_Bit32s(data, 12);
1866 case dRdX:
1867 if (chips & 1) v->fbi.drdx = (Bit32s)(data << 8) >> 8;
1868 break;
1869
1870 case fdGdX:
1871 data = float_to_Bit32s(data, 12);
1872 case dGdX:
1873 if (chips & 1) v->fbi.dgdx = (Bit32s)(data << 8) >> 8;
1874 break;
1875
1876 case fdBdX:
1877 data = float_to_Bit32s(data, 12);
1878 case dBdX:
1879 if (chips & 1) v->fbi.dbdx = (Bit32s)(data << 8) >> 8;
1880 break;
1881
1882 case fdAdX:
1883 data = float_to_Bit32s(data, 12);
1884 case dAdX:
1885 if (chips & 1) v->fbi.dadx = (Bit32s)(data << 8) >> 8;
1886 break;
1887
1888 case fdRdY:
1889 data = float_to_Bit32s(data, 12);
1890 case dRdY:
1891 if (chips & 1) v->fbi.drdy = (Bit32s)(data << 8) >> 8;
1892 break;
1893
1894 case fdGdY:
1895 data = float_to_Bit32s(data, 12);
1896 case dGdY:
1897 if (chips & 1) v->fbi.dgdy = (Bit32s)(data << 8) >> 8;
1898 break;
1899
1900 case fdBdY:
1901 data = float_to_Bit32s(data, 12);
1902 case dBdY:
1903 if (chips & 1) v->fbi.dbdy = (Bit32s)(data << 8) >> 8;
1904 break;
1905
1906 case fdAdY:
1907 data = float_to_Bit32s(data, 12);
1908 case dAdY:
1909 if (chips & 1) v->fbi.dady = (Bit32s)(data << 8) >> 8;
1910 break;
1911
1912 /* Z data is 20.12 formatted fixed point */
1913 case fstartZ:
1914 data = float_to_Bit32s(data, 12);
1915 case startZ:
1916 if (chips & 1) v->fbi.startz = (Bit32s)data;
1917 break;
1918
1919 case fdZdX:
1920 data = float_to_Bit32s(data, 12);
1921 case dZdX:
1922 if (chips & 1) v->fbi.dzdx = (Bit32s)data;
1923 break;
1924
1925 case fdZdY:
1926 data = float_to_Bit32s(data, 12);
1927 case dZdY:
1928 if (chips & 1) v->fbi.dzdy = (Bit32s)data;
1929 break;
1930
1931 /* S,T data is 14.18 formatted fixed point, converted to 16.32 internally */
1932 case fstartS:
1933 data64 = float_to_Bit64s(data, 32);
1934 if (chips & 2) v->tmu[0].starts = data64;
1935 if (chips & 4) v->tmu[1].starts = data64;
1936 break;
1937 case startS:
1938 if (chips & 2) v->tmu[0].starts = (Bit64s)(Bit32s)data << 14;
1939 if (chips & 4) v->tmu[1].starts = (Bit64s)(Bit32s)data << 14;
1940 break;
1941
1942 case fstartT:
1943 data64 = float_to_Bit64s(data, 32);
1944 if (chips & 2) v->tmu[0].startt = data64;
1945 if (chips & 4) v->tmu[1].startt = data64;
1946 break;
1947 case startT:
1948 if (chips & 2) v->tmu[0].startt = (Bit64s)(Bit32s)data << 14;
1949 if (chips & 4) v->tmu[1].startt = (Bit64s)(Bit32s)data << 14;
1950 break;
1951
1952 case fdSdX:
1953 data64 = float_to_Bit64s(data, 32);
1954 if (chips & 2) v->tmu[0].dsdx = data64;
1955 if (chips & 4) v->tmu[1].dsdx = data64;
1956 break;
1957 case dSdX:
1958 if (chips & 2) v->tmu[0].dsdx = (Bit64s)(Bit32s)data << 14;
1959 if (chips & 4) v->tmu[1].dsdx = (Bit64s)(Bit32s)data << 14;
1960 break;
1961
1962 case fdTdX:
1963 data64 = float_to_Bit64s(data, 32);
1964 if (chips & 2) v->tmu[0].dtdx = data64;
1965 if (chips & 4) v->tmu[1].dtdx = data64;
1966 break;
1967 case dTdX:
1968 if (chips & 2) v->tmu[0].dtdx = (Bit64s)(Bit32s)data << 14;
1969 if (chips & 4) v->tmu[1].dtdx = (Bit64s)(Bit32s)data << 14;
1970 break;
1971
1972 case fdSdY:
1973 data64 = float_to_Bit64s(data, 32);
1974 if (chips & 2) v->tmu[0].dsdy = data64;
1975 if (chips & 4) v->tmu[1].dsdy = data64;
1976 break;
1977 case dSdY:
1978 if (chips & 2) v->tmu[0].dsdy = (Bit64s)(Bit32s)data << 14;
1979 if (chips & 4) v->tmu[1].dsdy = (Bit64s)(Bit32s)data << 14;
1980 break;
1981
1982 case fdTdY:
1983 data64 = float_to_Bit64s(data, 32);
1984 if (chips & 2) v->tmu[0].dtdy = data64;
1985 if (chips & 4) v->tmu[1].dtdy = data64;
1986 break;
1987 case dTdY:
1988 if (chips & 2) v->tmu[0].dtdy = (Bit64s)(Bit32s)data << 14;
1989 if (chips & 4) v->tmu[1].dtdy = (Bit64s)(Bit32s)data << 14;
1990 break;
1991
1992 /* W data is 2.30 formatted fixed point, converted to 16.32 internally */
1993 case fstartW:
1994 data64 = float_to_Bit64s(data, 32);
1995 if (chips & 1) v->fbi.startw = data64;
1996 if (chips & 2) v->tmu[0].startw = data64;
1997 if (chips & 4) v->tmu[1].startw = data64;
1998 break;
1999 case startW:
2000 if (chips & 1) v->fbi.startw = (Bit64s)(Bit32s)data << 2;
2001 if (chips & 2) v->tmu[0].startw = (Bit64s)(Bit32s)data << 2;
2002 if (chips & 4) v->tmu[1].startw = (Bit64s)(Bit32s)data << 2;
2003 break;
2004
2005 case fdWdX:
2006 data64 = float_to_Bit64s(data, 32);
2007 if (chips & 1) v->fbi.dwdx = data64;
2008 if (chips & 2) v->tmu[0].dwdx = data64;
2009 if (chips & 4) v->tmu[1].dwdx = data64;
2010 break;
2011 case dWdX:
2012 if (chips & 1) v->fbi.dwdx = (Bit64s)(Bit32s)data << 2;
2013 if (chips & 2) v->tmu[0].dwdx = (Bit64s)(Bit32s)data << 2;
2014 if (chips & 4) v->tmu[1].dwdx = (Bit64s)(Bit32s)data << 2;
2015 break;
2016
2017 case fdWdY:
2018 data64 = float_to_Bit64s(data, 32);
2019 if (chips & 1) v->fbi.dwdy = data64;
2020 if (chips & 2) v->tmu[0].dwdy = data64;
2021 if (chips & 4) v->tmu[1].dwdy = data64;
2022 break;
2023 case dWdY:
2024 if (chips & 1) v->fbi.dwdy = (Bit64s)(Bit32s)data << 2;
2025 if (chips & 2) v->tmu[0].dwdy = (Bit64s)(Bit32s)data << 2;
2026 if (chips & 4) v->tmu[1].dwdy = (Bit64s)(Bit32s)data << 2;
2027 break;
2028 /* setup bits */
2029 case sARGB:
2030 if (chips & 1)
2031 {
2032 v->reg[sAlpha].f = (float)RGB_ALPHA(data);
2033 v->reg[sRed].f = (float)RGB_RED(data);
2034 v->reg[sGreen].f = (float)RGB_GREEN(data);
2035 v->reg[sBlue].f = (float)RGB_BLUE(data);
2036 }
2037 break;
2038
2039 /* mask off invalid bits for different cards */
2040 case fbzColorPath:
2041 poly_wait(v->poly, v->regnames[regnum]);
2042 if (v->type < VOODOO_2)
2043 data &= 0x0fffffff;
2044 if (chips & 1) v->reg[fbzColorPath].u = data;
2045 break;
2046
2047 case fbzMode:
2048 poly_wait(v->poly, v->regnames[regnum]);
2049 if (v->type < VOODOO_2)
2050 data &= 0x001fffff;
2051 if (chips & 1) v->reg[fbzMode].u = data;
2052 break;
2053
2054 case fogMode:
2055 poly_wait(v->poly, v->regnames[regnum]);
2056 if (v->type < VOODOO_2)
2057 data &= 0x0000003f;
2058 if (chips & 1) v->reg[fogMode].u = data;
2059 break;
2060
2061 /* triangle drawing */
2062 case triangleCMD:
2063 v->fbi.cheating_allowed = (v->fbi.ax != 0 || v->fbi.ay != 0 || v->fbi.bx > 50 || v->fbi.by != 0 || v->fbi.cx != 0 || v->fbi.cy > 50);
2064 v->fbi.sign = data;
2065 triangle();
2066 break;
2067
2068 case ftriangleCMD:
2069 v->fbi.cheating_allowed = 1;
2070 v->fbi.sign = data;
2071 triangle();
2072 break;
2073
2074 case sBeginTriCMD:
2075 begin_triangle();
2076 break;
2077
2078 case sDrawTriCMD:
2079 draw_triangle();
2080 break;
2081
2082 /* other commands */
2083 case nopCMD:
2084 poly_wait(v->poly, v->regnames[regnum]);
2085 if (data & 1)
2086 reset_counters(v);
2087 if (data & 2)
2088 v->reg[fbiTrianglesOut].u = 0;
2089 break;
2090
2091 case fastfillCMD:
2092 fastfill(v);
2093 break;
2094
2095 case swapbufferCMD:
2096 poly_wait(v->poly, v->regnames[regnum]);
2097 swapbuffer(v, data);
2098 break;
2099 /* gamma table access -- Voodoo/Voodoo2 only */
2100 case clutData:
2101 if (v->type <= VOODOO_2 && (chips & 1))
2102 {
2103 poly_wait(v->poly, v->regnames[regnum]);
2104 if (!FBIINIT1_VIDEO_TIMING_RESET(v->reg[fbiInit1].u))
2105 {
2106 int index = data >> 24;
2107 if (index <= 32)
2108 {
2109 v->fbi.clut[index] = data;
2110 v->fbi.clut_dirty = 1;
2111 }
2112 }
2113 else
2114 BX_DEBUG(("clutData ignored because video timing reset = 1"));
2115 }
2116 break;
2117 /* nccTable entries are processed and expanded immediately */
2118 case nccTable+0:
2119 case nccTable+1:
2120 case nccTable+2:
2121 case nccTable+3:
2122 case nccTable+4:
2123 case nccTable+5:
2124 case nccTable+6:
2125 case nccTable+7:
2126 case nccTable+8:
2127 case nccTable+9:
2128 case nccTable+10:
2129 case nccTable+11:
2130 poly_wait(v->poly, v->regnames[regnum]);
2131 if (chips & 2) ncc_table_write(&v->tmu[0].ncc[0], regnum - nccTable, data);
2132 if (chips & 4) ncc_table_write(&v->tmu[1].ncc[0], regnum - nccTable, data);
2133 break;
2134
2135 case nccTable+12:
2136 case nccTable+13:
2137 case nccTable+14:
2138 case nccTable+15:
2139 case nccTable+16:
2140 case nccTable+17:
2141 case nccTable+18:
2142 case nccTable+19:
2143 case nccTable+20:
2144 case nccTable+21:
2145 case nccTable+22:
2146 case nccTable+23:
2147 poly_wait(v->poly, v->regnames[regnum]);
2148 if (chips & 2) ncc_table_write(&v->tmu[0].ncc[1], regnum - (nccTable+12), data);
2149 if (chips & 4) ncc_table_write(&v->tmu[1].ncc[1], regnum - (nccTable+12), data);
2150 break;
2151
2152 /* fogTable entries are processed and expanded immediately */
2153 case fogTable+0:
2154 case fogTable+1:
2155 case fogTable+2:
2156 case fogTable+3:
2157 case fogTable+4:
2158 case fogTable+5:
2159 case fogTable+6:
2160 case fogTable+7:
2161 case fogTable+8:
2162 case fogTable+9:
2163 case fogTable+10:
2164 case fogTable+11:
2165 case fogTable+12:
2166 case fogTable+13:
2167 case fogTable+14:
2168 case fogTable+15:
2169 case fogTable+16:
2170 case fogTable+17:
2171 case fogTable+18:
2172 case fogTable+19:
2173 case fogTable+20:
2174 case fogTable+21:
2175 case fogTable+22:
2176 case fogTable+23:
2177 case fogTable+24:
2178 case fogTable+25:
2179 case fogTable+26:
2180 case fogTable+27:
2181 case fogTable+28:
2182 case fogTable+29:
2183 case fogTable+30:
2184 case fogTable+31:
2185 poly_wait(v->poly, v->regnames[regnum]);
2186 if (chips & 1)
2187 {
2188 int base = 2 * (regnum - fogTable);
2189 v->fbi.fogdelta[base + 0] = (data >> 0) & 0xff;
2190 v->fbi.fogblend[base + 0] = (data >> 8) & 0xff;
2191 v->fbi.fogdelta[base + 1] = (data >> 16) & 0xff;
2192 v->fbi.fogblend[base + 1] = (data >> 24) & 0xff;
2193 }
2194 break;
2195
2196 /* texture modifications cause us to recompute everything */
2197 case textureMode:
2198 if (((chips & 6) > 0) && TEXMODE_TRILINEAR(data)) {
2199 if (count < 50) BX_INFO(("Trilinear textures not implemented yet"));
2200 count++;
2201 }
2202 case tLOD:
2203 case tDetail:
2204 case texBaseAddr:
2205 case texBaseAddr_1:
2206 case texBaseAddr_2:
2207 case texBaseAddr_3_8:
2208 poly_wait(v->poly, v->regnames[regnum]);
2209 if (chips & 2)
2210 {
2211 v->tmu[0].reg[regnum].u = data;
2212 v->tmu[0].regdirty = 1;
2213 }
2214 if (chips & 4)
2215 {
2216 v->tmu[1].reg[regnum].u = data;
2217 v->tmu[1].regdirty = 1;
2218 }
2219 break;
2220
2221 case trexInit1:
2222 /* send tmu config data to the frame buffer */
2223 v->send_config = TREXINIT_SEND_TMU_CONFIG(data);
2224 goto default_case;
2225 break;
2226
2227 case userIntrCMD:
2228 BX_ERROR(("Writing to register %s not supported yet", v->regnames[regnum]));
2229 v->reg[regnum].u = data;
2230 break;
2231
2232 case bltData:
2233 v->reg[regnum].u = data;
2234 if (BLT.h2s_mode) {
2235 voodoo2_bitblt_cpu_to_screen(data);
2236 } else {
2237 BX_ERROR(("Write to register %s ignored", v->regnames[regnum]));
2238 }
2239 break;
2240
2241 case bltSrcChromaRange:
2242 v->reg[regnum].u = data;
2243 BLT.src_col_min = (Bit16u)data;
2244 BLT.src_col_max = (Bit16u)(data >> 16);
2245 break;
2246
2247 case bltDstChromaRange:
2248 v->reg[regnum].u = data;
2249 BLT.dst_col_min = (Bit16u)data;
2250 BLT.dst_col_max = (Bit16u)(data >> 16);
2251 break;
2252
2253 case bltClipX:
2254 v->reg[regnum].u = data;
2255 BLT.clipx0 = (Bit16u)(data >> 16);
2256 BLT.clipx1 = (Bit16u)(data & 0x0fff);
2257 break;
2258
2259 case bltClipY:
2260 v->reg[regnum].u = data;
2261 BLT.clipy0 = (Bit16u)(data >> 16);
2262 BLT.clipy1 = (Bit16u)(data & 0x0fff);
2263 break;
2264
2265 case bltRop:
2266 v->reg[regnum].u = data;
2267 BLT.rop[0] = (Bit8u)(data & 0x0f);
2268 BLT.rop[1] = (Bit8u)((data >> 4) & 0x0f);
2269 BLT.rop[2] = (Bit8u)((data >> 8) & 0x0f);
2270 BLT.rop[3] = (Bit8u)((data >> 12) & 0x0f);
2271 break;
2272
2273 case bltColor:
2274 v->reg[regnum].u = data;
2275 BLT.fgcolor[0] = (Bit8u)data;
2276 BLT.fgcolor[1] = (Bit8u)(data >> 8);
2277 BLT.bgcolor[0] = (Bit8u)(data >> 16);
2278 BLT.bgcolor[1] = (Bit8u)(data >> 24);
2279 break;
2280
2281 case bltDstXY:
2282 case bltSize:
2283 case bltCommand:
2284 v->reg[regnum].u = data;
2285 if ((data >> 31) & 1) {
2286 voodoo2_bitblt();
2287 }
2288 break;
2289
2290 case colBufferAddr: /* Banshee */
2291 if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2292 v->fbi.rgboffs[1] = data & v->fbi.mask & ~0x0f;
2293 }
2294 break;
2295
2296 case colBufferStride: /* Banshee */
2297 if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2298 if (data & 0x8000)
2299 v->fbi.rowpixels = (data & 0x7f) << 6;
2300 else
2301 v->fbi.rowpixels = (data & 0x3fff) >> 1;
2302 }
2303 break;
2304
2305 case auxBufferAddr: /* Banshee */
2306 if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2307 v->fbi.auxoffs = data & v->fbi.mask & ~0x0f;
2308 }
2309 break;
2310
2311 case auxBufferStride: /* Banshee */
2312 if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2313 Bit32u rowpixels;
2314
2315 if (data & 0x8000)
2316 rowpixels = (data & 0x7f) << 6;
2317 else
2318 rowpixels = (data & 0x3fff) >> 1;
2319 if (v->fbi.rowpixels != rowpixels)
2320 BX_ERROR(("aux buffer stride differs from color buffer stride"));
2321 }
2322 break;
2323
2324 /* these registers are referenced in the renderer; we must wait for pending work before changing */
2325 case chromaRange:
2326 case chromaKey:
2327 case alphaMode:
2328 case fogColor:
2329 case stipple:
2330 case zaColor:
2331 case color1:
2332 case color0:
2333 case clipLowYHighY:
2334 case clipLeftRight:
2335 poly_wait(v->poly, v->regnames[regnum]);
2336 /* fall through to default implementation */
2337
2338 /* by default, just feed the data to the chips */
2339 default:
2340 default_case:
2341 if (chips & 1) v->reg[0x000 + regnum].u = data;
2342 if (chips & 2) v->reg[0x100 + regnum].u = data;
2343 if (chips & 4) v->reg[0x200 + regnum].u = data;
2344 if (chips & 8) v->reg[0x300 + regnum].u = data;
2345 break;
2346 }
2347 }
2348
texture_w(Bit32u offset,Bit32u data)2349 Bit32s texture_w(Bit32u offset, Bit32u data)
2350 {
2351 int tmunum = (offset >> 19) & 0x03;
2352 BX_DEBUG(("write TMU%d offset 0x%x value 0x%x", tmunum, offset, data));
2353
2354 tmu_state *t;
2355
2356 /* point to the right TMU */
2357 if (!(v->chipmask & (2 << tmunum)) || (tmunum >= MAX_TMU))
2358 return 0;
2359 t = &v->tmu[tmunum];
2360
2361 if (TEXLOD_TDIRECT_WRITE(t->reg[tLOD].u))
2362 BX_PANIC(("Texture direct write!"));
2363
2364 /* wait for any outstanding work to finish */
2365 poly_wait(v->poly, "Texture write");
2366
2367 /* update texture info if dirty */
2368 if (t->regdirty)
2369 recompute_texture_params(t);
2370
2371 /* swizzle the data */
2372 if (TEXLOD_TDATA_SWIZZLE(t->reg[tLOD].u))
2373 data = bx_bswap32(data);
2374 if (TEXLOD_TDATA_SWAP(t->reg[tLOD].u))
2375 data = (data >> 16) | (data << 16);
2376
2377 /* 8-bit texture case */
2378 if (TEXMODE_FORMAT(t->reg[textureMode].u) < 8)
2379 {
2380 int lod, tt, ts;
2381 Bit32u tbaseaddr;
2382 Bit8u *dest;
2383
2384 /* extract info */
2385 if (v->type <= VOODOO_2)
2386 {
2387 lod = (offset >> 15) & 0x0f;
2388 tt = (offset >> 7) & 0xff;
2389
2390 /* old code has a bit about how this is broken in gauntleg unless we always look at TMU0 */
2391 if (TEXMODE_SEQ_8_DOWNLD(v->tmu[0].reg[textureMode].u))
2392 ts = (offset << 2) & 0xfc;
2393 else
2394 ts = (offset << 1) & 0xfc;
2395
2396 /* validate parameters */
2397 if (lod > 8)
2398 return 0;
2399
2400 /* compute the base address */
2401 tbaseaddr = t->lodoffset[lod];
2402 tbaseaddr += tt * ((t->wmask >> lod) + 1) + ts;
2403
2404 if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 8-bit w: lod=%d s=%d t=%d data=0x%08x", lod, ts, tt, data));
2405 }
2406 else
2407 {
2408 tbaseaddr = t->lodoffset[0] + offset*4;
2409
2410 if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: offset=0x%x data=0x%08x", offset*4, data));
2411 }
2412
2413 /* write the four bytes in little-endian order */
2414 dest = t->ram;
2415 tbaseaddr &= t->mask;
2416 dest[BYTE4_XOR_LE(tbaseaddr + 0)] = (data >> 0) & 0xff;
2417 dest[BYTE4_XOR_LE(tbaseaddr + 1)] = (data >> 8) & 0xff;
2418 dest[BYTE4_XOR_LE(tbaseaddr + 2)] = (data >> 16) & 0xff;
2419 dest[BYTE4_XOR_LE(tbaseaddr + 3)] = (data >> 24) & 0xff;
2420 }
2421
2422 /* 16-bit texture case */
2423 else
2424 {
2425 int lod, tt, ts;
2426 Bit32u tbaseaddr;
2427 Bit16u *dest;
2428
2429 /* extract info */
2430 if (v->type <= VOODOO_2)
2431 {
2432 tmunum = (offset >> 19) & 0x03;
2433 lod = (offset >> 15) & 0x0f;
2434 tt = (offset >> 7) & 0xff;
2435 ts = (offset << 1) & 0xfe;
2436
2437 /* validate parameters */
2438 if (lod > 8)
2439 return 0;
2440
2441 /* compute the base address */
2442 tbaseaddr = t->lodoffset[lod];
2443 tbaseaddr += 2 * (tt * ((t->wmask >> lod) + 1) + ts);
2444
2445 if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: lod=%d s=%d t=%d data=%08X", lod, ts, tt, data));
2446 }
2447 else
2448 {
2449 tbaseaddr = t->lodoffset[0] + offset*4;
2450
2451 if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: offset=0x%x data=0x%08x", offset*4, data));
2452 }
2453
2454 /* write the two words in little-endian order */
2455 dest = (Bit16u *)t->ram;
2456 tbaseaddr &= t->mask;
2457 tbaseaddr >>= 1;
2458 dest[BYTE_XOR_LE(tbaseaddr + 0)] = (data >> 0) & 0xffff;
2459 dest[BYTE_XOR_LE(tbaseaddr + 1)] = (data >> 16) & 0xffff;
2460 }
2461
2462 return 0;
2463 }
2464
lfb_w(Bit32u offset,Bit32u data,Bit32u mem_mask)2465 Bit32u lfb_w(Bit32u offset, Bit32u data, Bit32u mem_mask)
2466 {
2467 Bit16u *dest, *depth;
2468 Bit32u destmax, depthmax;
2469 Bit32u forcefront=0;
2470
2471 int sr[2], sg[2], sb[2], sa[2], sw[2];
2472 int x, y, scry, mask;
2473 int pix, destbuf;
2474
2475 BX_DEBUG(("write LFB offset 0x%x value 0x%08x", offset, data));
2476
2477 /* byte swizzling */
2478 if (LFBMODE_BYTE_SWIZZLE_WRITES(v->reg[lfbMode].u))
2479 {
2480 data = bx_bswap32(data);
2481 mem_mask = bx_bswap32(mem_mask);
2482 }
2483
2484 /* word swapping */
2485 if (LFBMODE_WORD_SWAP_WRITES(v->reg[lfbMode].u))
2486 {
2487 data = (data << 16) | (data >> 16);
2488 mem_mask = (mem_mask << 16) | (mem_mask >> 16);
2489 }
2490
2491 /* extract default depth and alpha values */
2492 sw[0] = sw[1] = v->reg[zaColor].u & 0xffff;
2493 sa[0] = sa[1] = v->reg[zaColor].u >> 24;
2494
2495 /* first extract A,R,G,B from the data */
2496 switch (LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u) + 16 * LFBMODE_RGBA_LANES(v->reg[lfbMode].u))
2497 {
2498 case 16*0 + 0: /* ARGB, 16-bit RGB 5-6-5 */
2499 case 16*2 + 0: /* RGBA, 16-bit RGB 5-6-5 */
2500 EXTRACT_565_TO_888(data, sr[0], sg[0], sb[0]);
2501 EXTRACT_565_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2502 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2503 offset <<= 1;
2504 break;
2505 case 16*1 + 0: /* ABGR, 16-bit RGB 5-6-5 */
2506 case 16*3 + 0: /* BGRA, 16-bit RGB 5-6-5 */
2507 EXTRACT_565_TO_888(data, sb[0], sg[0], sr[0]);
2508 EXTRACT_565_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2509 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2510 offset <<= 1;
2511 break;
2512
2513 case 16*0 + 1: /* ARGB, 16-bit RGB x-5-5-5 */
2514 EXTRACT_x555_TO_888(data, sr[0], sg[0], sb[0]);
2515 EXTRACT_x555_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2516 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2517 offset <<= 1;
2518 break;
2519 case 16*1 + 1: /* ABGR, 16-bit RGB x-5-5-5 */
2520 EXTRACT_x555_TO_888(data, sb[0], sg[0], sr[0]);
2521 EXTRACT_x555_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2522 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2523 offset <<= 1;
2524 break;
2525 case 16*2 + 1: /* RGBA, 16-bit RGB x-5-5-5 */
2526 EXTRACT_555x_TO_888(data, sr[0], sg[0], sb[0]);
2527 EXTRACT_555x_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2528 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2529 offset <<= 1;
2530 break;
2531 case 16*3 + 1: /* BGRA, 16-bit RGB x-5-5-5 */
2532 EXTRACT_555x_TO_888(data, sb[0], sg[0], sr[0]);
2533 EXTRACT_555x_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2534 mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2535 offset <<= 1;
2536 break;
2537
2538 case 16*0 + 2: /* ARGB, 16-bit ARGB 1-5-5-5 */
2539 EXTRACT_1555_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2540 EXTRACT_1555_TO_8888(data >> 16, sa[1], sr[1], sg[1], sb[1]);
2541 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2542 offset <<= 1;
2543 break;
2544 case 16*1 + 2: /* ABGR, 16-bit ARGB 1-5-5-5 */
2545 EXTRACT_1555_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2546 EXTRACT_1555_TO_8888(data >> 16, sa[1], sb[1], sg[1], sr[1]);
2547 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2548 offset <<= 1;
2549 break;
2550 case 16*2 + 2: /* RGBA, 16-bit ARGB 1-5-5-5 */
2551 EXTRACT_5551_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2552 EXTRACT_5551_TO_8888(data >> 16, sr[1], sg[1], sb[1], sa[1]);
2553 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2554 offset <<= 1;
2555 break;
2556 case 16*3 + 2: /* BGRA, 16-bit ARGB 1-5-5-5 */
2557 EXTRACT_5551_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2558 EXTRACT_5551_TO_8888(data >> 16, sb[1], sg[1], sr[1], sa[1]);
2559 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2560 offset <<= 1;
2561 break;
2562
2563 case 16*0 + 4: /* ARGB, 32-bit RGB x-8-8-8 */
2564 EXTRACT_x888_TO_888(data, sr[0], sg[0], sb[0]);
2565 mask = LFB_RGB_PRESENT;
2566 break;
2567 case 16*1 + 4: /* ABGR, 32-bit RGB x-8-8-8 */
2568 EXTRACT_x888_TO_888(data, sb[0], sg[0], sr[0]);
2569 mask = LFB_RGB_PRESENT;
2570 break;
2571 case 16*2 + 4: /* RGBA, 32-bit RGB x-8-8-8 */
2572 EXTRACT_888x_TO_888(data, sr[0], sg[0], sb[0]);
2573 mask = LFB_RGB_PRESENT;
2574 break;
2575 case 16*3 + 4: /* BGRA, 32-bit RGB x-8-8-8 */
2576 EXTRACT_888x_TO_888(data, sb[0], sg[0], sr[0]);
2577 mask = LFB_RGB_PRESENT;
2578 break;
2579
2580 case 16*0 + 5: /* ARGB, 32-bit ARGB 8-8-8-8 */
2581 EXTRACT_8888_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2582 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2583 break;
2584 case 16*1 + 5: /* ABGR, 32-bit ARGB 8-8-8-8 */
2585 EXTRACT_8888_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2586 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2587 break;
2588 case 16*2 + 5: /* RGBA, 32-bit ARGB 8-8-8-8 */
2589 EXTRACT_8888_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2590 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2591 break;
2592 case 16*3 + 5: /* BGRA, 32-bit ARGB 8-8-8-8 */
2593 EXTRACT_8888_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2594 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2595 break;
2596
2597 case 16*0 + 12: /* ARGB, 32-bit depth+RGB 5-6-5 */
2598 case 16*2 + 12: /* RGBA, 32-bit depth+RGB 5-6-5 */
2599 sw[0] = data >> 16;
2600 EXTRACT_565_TO_888(data, sr[0], sg[0], sb[0]);
2601 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2602 break;
2603 case 16*1 + 12: /* ABGR, 32-bit depth+RGB 5-6-5 */
2604 case 16*3 + 12: /* BGRA, 32-bit depth+RGB 5-6-5 */
2605 sw[0] = data >> 16;
2606 EXTRACT_565_TO_888(data, sb[0], sg[0], sr[0]);
2607 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2608 break;
2609
2610 case 16*0 + 13: /* ARGB, 32-bit depth+RGB x-5-5-5 */
2611 sw[0] = data >> 16;
2612 EXTRACT_x555_TO_888(data, sr[0], sg[0], sb[0]);
2613 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2614 break;
2615 case 16*1 + 13: /* ABGR, 32-bit depth+RGB x-5-5-5 */
2616 sw[0] = data >> 16;
2617 EXTRACT_x555_TO_888(data, sb[0], sg[0], sr[0]);
2618 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2619 break;
2620 case 16*2 + 13: /* RGBA, 32-bit depth+RGB x-5-5-5 */
2621 sw[0] = data >> 16;
2622 EXTRACT_555x_TO_888(data, sr[0], sg[0], sb[0]);
2623 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2624 break;
2625 case 16*3 + 13: /* BGRA, 32-bit depth+RGB x-5-5-5 */
2626 sw[0] = data >> 16;
2627 EXTRACT_555x_TO_888(data, sb[0], sg[0], sr[0]);
2628 mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2629 break;
2630
2631 case 16*0 + 14: /* ARGB, 32-bit depth+ARGB 1-5-5-5 */
2632 sw[0] = data >> 16;
2633 EXTRACT_1555_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2634 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2635 break;
2636 case 16*1 + 14: /* ABGR, 32-bit depth+ARGB 1-5-5-5 */
2637 sw[0] = data >> 16;
2638 EXTRACT_1555_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2639 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2640 break;
2641 case 16*2 + 14: /* RGBA, 32-bit depth+ARGB 1-5-5-5 */
2642 sw[0] = data >> 16;
2643 EXTRACT_5551_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2644 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2645 break;
2646 case 16*3 + 14: /* BGRA, 32-bit depth+ARGB 1-5-5-5 */
2647 sw[0] = data >> 16;
2648 EXTRACT_5551_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2649 mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2650 break;
2651
2652 case 16*0 + 15: /* ARGB, 16-bit depth */
2653 case 16*1 + 15: /* ARGB, 16-bit depth */
2654 case 16*2 + 15: /* ARGB, 16-bit depth */
2655 case 16*3 + 15: /* ARGB, 16-bit depth */
2656 sw[0] = data & 0xffff;
2657 sw[1] = data >> 16;
2658 mask = LFB_DEPTH_PRESENT | (LFB_DEPTH_PRESENT << 4);
2659 offset <<= 1;
2660 break;
2661
2662 default: /* reserved */
2663 return 0;
2664 }
2665
2666 /* compute X,Y */
2667 x = (offset << 0) & ((1 << v->fbi.lfb_stride) - 1);
2668 y = (offset >> v->fbi.lfb_stride) & 0x7ff;
2669
2670 /* adjust the mask based on which half of the data is written */
2671 if (!ACCESSING_BITS_0_15)
2672 mask &= ~(0x0f - LFB_DEPTH_PRESENT_MSW);
2673 if (!ACCESSING_BITS_16_31)
2674 mask &= ~(0xf0 + LFB_DEPTH_PRESENT_MSW);
2675
2676 /* select the target buffer */
2677 destbuf = (v->type >= VOODOO_BANSHEE) ? (!forcefront) : LFBMODE_WRITE_BUFFER_SELECT(v->reg[lfbMode].u);
2678 switch (destbuf)
2679 {
2680 case 0: /* front buffer */
2681 dest = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
2682 destmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.frontbuf]) / 2;
2683 v->fbi.video_changed = 1;
2684 break;
2685
2686 case 1: /* back buffer */
2687 dest = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
2688 destmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.backbuf]) / 2;
2689 break;
2690
2691 default: /* reserved */
2692 return 0;
2693 }
2694 depth = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs);
2695 depthmax = (v->fbi.mask + 1 - v->fbi.auxoffs) / 2;
2696
2697 /* wait for any outstanding work to finish */
2698 poly_wait(v->poly, "LFB Write");
2699
2700 /* simple case: no pipeline */
2701 if (!LFBMODE_ENABLE_PIXEL_PIPELINE(v->reg[lfbMode].u))
2702 {
2703 DECLARE_DITHER_POINTERS;
2704 UNUSED(dither);
2705 Bit32u bufoffs;
2706
2707 if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:write raw mode %X (%d,%d) = %08X & %08X", v->index, LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u), x, y, data, mem_mask));
2708
2709 /* determine the screen Y */
2710 scry = y;
2711 if (LFBMODE_Y_ORIGIN(v->reg[lfbMode].u))
2712 scry = (v->fbi.yorigin - y) & 0x3ff;
2713
2714 /* advance pointers to the proper row */
2715 bufoffs = scry * v->fbi.rowpixels + x;
2716
2717 /* compute dithering */
2718 COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
2719
2720 /* loop over up to two pixels */
2721 for (pix = 0; mask; pix++)
2722 {
2723 /* make sure we care about this pixel */
2724 if (mask & 0x0f)
2725 {
2726 /* write to the RGB buffer */
2727 if ((mask & LFB_RGB_PRESENT) && bufoffs < destmax)
2728 {
2729 /* apply dithering and write to the screen */
2730 APPLY_DITHER(v->reg[fbzMode].u, x, dither_lookup, sr[pix], sg[pix], sb[pix]);
2731 dest[bufoffs] = (sr[pix] << 11) | (sg[pix] << 5) | sb[pix];
2732 }
2733
2734 /* make sure we have an aux buffer to write to */
2735 if (depth && bufoffs < depthmax)
2736 {
2737 /* write to the alpha buffer */
2738 if ((mask & LFB_ALPHA_PRESENT) && FBZMODE_ENABLE_ALPHA_PLANES(v->reg[fbzMode].u))
2739 depth[bufoffs] = sa[pix];
2740
2741 /* write to the depth buffer */
2742 if ((mask & (LFB_DEPTH_PRESENT | LFB_DEPTH_PRESENT_MSW)) && !FBZMODE_ENABLE_ALPHA_PLANES(v->reg[fbzMode].u))
2743 depth[bufoffs] = sw[pix];
2744 }
2745
2746 /* track pixel writes to the frame buffer regardless of mask */
2747 v->reg[fbiPixelsOut].u++;
2748 }
2749
2750 /* advance our pointers */
2751 bufoffs++;
2752 x++;
2753 mask >>= 4;
2754 }
2755 }
2756 /* tricky case: run the full pixel pipeline on the pixel */
2757 else
2758 {
2759 DECLARE_DITHER_POINTERS;
2760
2761 if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:write pipelined mode %X (%d,%d) = %08X & %08X", v->index, LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u), x, y, data, mem_mask));
2762
2763 /* determine the screen Y */
2764 scry = y;
2765 if (FBZMODE_Y_ORIGIN(v->reg[fbzMode].u))
2766 scry = (v->fbi.yorigin - y) & 0x3ff;
2767
2768 /* advance pointers to the proper row */
2769 dest += scry * v->fbi.rowpixels;
2770 if (depth)
2771 depth += scry * v->fbi.rowpixels;
2772
2773 /* compute dithering */
2774 COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
2775
2776 /* loop over up to two pixels */
2777 for (pix = 0; mask; pix++)
2778 {
2779 /* make sure we care about this pixel */
2780 if (mask & 0x0f)
2781 {
2782 stats_block *stats = &v->fbi.lfb_stats;
2783 Bit64s iterw = sw[pix] << (30-16);
2784 Bit32s iterz = sw[pix] << 12;
2785 rgb_union color;
2786
2787 /* apply clipping */
2788 if (FBZMODE_ENABLE_CLIPPING(v->reg[fbzMode].u))
2789 {
2790 if (x < (int)((v->reg[clipLeftRight].u >> 16) & 0x3ff) ||
2791 x >= (int)(v->reg[clipLeftRight].u & 0x3ff) ||
2792 scry < (int)((v->reg[clipLowYHighY].u >> 16) & 0x3ff) ||
2793 scry >= (int)(v->reg[clipLowYHighY].u & 0x3ff))
2794 {
2795 stats->pixels_in++;
2796 stats->clip_fail++;
2797 goto nextpixel;
2798 }
2799 }
2800
2801 /* pixel pipeline part 1 handles depth testing and stippling */
2802 PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw);
2803
2804 /* use the RGBA we stashed above */
2805 color.rgb.r = r = sr[pix];
2806 color.rgb.g = g = sg[pix];
2807 color.rgb.b = b = sb[pix];
2808 color.rgb.a = a = sa[pix];
2809
2810 /* apply chroma key, alpha mask, and alpha testing */
2811 APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
2812 APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
2813 APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
2814
2815 /* pixel pipeline part 2 handles color combine, fog, alpha, and final output */
2816 PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x, dest, depth, v->reg[fbzMode].u, v->reg[fbzColorPath].u, v->reg[alphaMode].u, v->reg[fogMode].u, iterz, iterw, v->reg[zaColor]);
2817 }
2818 nextpixel:
2819 /* advance our pointers */
2820 x++;
2821 mask >>= 4;
2822 }
2823 }
2824
2825 return 0;
2826 }
2827
cmdfifo_calc_depth_needed(cmdfifo_info * f)2828 Bit32u cmdfifo_calc_depth_needed(cmdfifo_info *f)
2829 {
2830 Bit32u command, needed = BX_MAX_BIT32U;
2831 Bit8u type;
2832 int i, count = 0;
2833
2834 if (f->depth == 0)
2835 return needed;
2836 command = *(Bit32u*)(&v->fbi.ram[f->rdptr & v->fbi.mask]);
2837 type = (Bit8u)(command & 0x07);
2838 switch (type) {
2839 case 0:
2840 if (((command >> 3) & 7) == 4) {
2841 needed = 2;
2842 } else {
2843 needed = 1;
2844 }
2845 break;
2846 case 1:
2847 needed = 1 + (command >> 16);
2848 break;
2849 case 2:
2850 for (i = 3; i <= 31; i++)
2851 if (command & (1 << i)) count++;
2852 needed = 1 + count;
2853 break;
2854 case 3:
2855 count = 2; /* X/Y */
2856 if (command & (1 << 28)) {
2857 if (command & (3 << 10)) count++; /* ARGB */
2858 } else {
2859 if (command & (1 << 10)) count += 3; /* RGB */
2860 if (command & (1 << 11)) count++; /* A */
2861 }
2862 if (command & (1 << 12)) count++; /* Z */
2863 if (command & (1 << 13)) count++; /* Wb */
2864 if (command & (1 << 14)) count++; /* W0 */
2865 if (command & (1 << 15)) count += 2; /* S0/T0 */
2866 if (command & (1 << 16)) count++; /* W1 */
2867 if (command & (1 << 17)) count += 2; /* S1/T1 */
2868 count *= (command >> 6) & 15; /* numverts */
2869 needed = 1 + count + (command >> 29);
2870 break;
2871 case 4:
2872 for (i = 15; i <= 28; i++)
2873 if (command & (1 << i)) count++;
2874 needed = 1 + count + (command >> 29);
2875 break;
2876 case 5:
2877 needed = 2 + ((command >> 3) & 0x7ffff);
2878 break;
2879 default:
2880 BX_ERROR(("CMDFIFO: unsupported packet type %d", type));
2881 }
2882 return needed;
2883 }
2884
cmdfifo_w(cmdfifo_info * f,Bit32u fbi_offset,Bit32u data)2885 void cmdfifo_w(cmdfifo_info *f, Bit32u fbi_offset, Bit32u data)
2886 {
2887 BX_LOCK(cmdfifo_mutex);
2888 *(Bit32u*)(&v->fbi.ram[fbi_offset]) = data;
2889 /* count holes? */
2890 if (f->count_holes) {
2891 if ((f->holes == 0) && (fbi_offset == (f->amin + 4))) {
2892 /* in-order, no holes */
2893 f->amin = f->amax = fbi_offset;
2894 f->depth++;
2895 } else if (fbi_offset < f->amin) {
2896 /* out-of-order, below the minimum */
2897 if (f->holes != 0) {
2898 BX_ERROR(("Unexpected CMDFIFO: AMin=0x%08x AMax=0x%08x Holes=%d WroteTo:0x%08x RdPtr:0x%08x",
2899 f->amin, f->amax, f->holes, fbi_offset, f->rdptr));
2900 }
2901 f->amin = f->amax = fbi_offset;
2902 f->depth++;
2903 } else if (fbi_offset < f->amax) {
2904 /* out-of-order, but within the min-max range */
2905 f->holes--;
2906 if (f->holes == 0) {
2907 f->depth += (f->amax - f->amin) / 4;
2908 f->amin = f->amax;
2909 }
2910 } else {
2911 /* out-of-order, bumping max */
2912 f->holes += (fbi_offset - f->amax) / 4 - 1;
2913 f->amax = fbi_offset;
2914 }
2915 }
2916 if (f->depth_needed == BX_MAX_BIT32U) {
2917 f->depth_needed = cmdfifo_calc_depth_needed(f);
2918 }
2919 if (f->depth >= f->depth_needed) {
2920 f->cmd_ready = 1;
2921 if (!v->vtimer_running) {
2922 bx_set_sem(&fifo_wakeup);
2923 }
2924 }
2925 BX_UNLOCK(cmdfifo_mutex);
2926 }
2927
cmdfifo_r(cmdfifo_info * f)2928 Bit32u cmdfifo_r(cmdfifo_info *f)
2929 {
2930 Bit32u data;
2931
2932 data = *(Bit32u*)(&v->fbi.ram[f->rdptr & v->fbi.mask]);
2933 f->rdptr += 4;
2934 if (f->rdptr >= f->end) {
2935 BX_INFO(("CMDFIFO RdPtr rollover"));
2936 f->rdptr = f->base;
2937 }
2938 f->depth--;
2939 return data;
2940 }
2941
cmdfifo_process(cmdfifo_info * f)2942 void cmdfifo_process(cmdfifo_info *f)
2943 {
2944 Bit32u command, data, mask, nwords, regaddr;
2945 Bit8u type, code, nvertex, smode, disbytes;
2946 bool inc, pcolor;
2947 voodoo_reg reg;
2948 int i, w0, wn;
2949 setup_vertex svert = {0};
2950
2951 command = cmdfifo_r(f);
2952 type = (Bit8u)(command & 0x07);
2953 switch (type) {
2954 case 0:
2955 code = (Bit8u)((command >> 3) & 0x07);
2956 switch (code) {
2957 case 0: // NOP
2958 break;
2959 case 3: // JMP
2960 f->rdptr = (command >> 4) & 0xfffffc;
2961 if (f->count_holes) {
2962 BX_DEBUG(("cmdfifo_process(): JMP 0x%08x", f->rdptr));
2963 }
2964 break;
2965 case 4: // TODO: JMP AGP
2966 data = cmdfifo_r(f);
2967 default:
2968 BX_ERROR(("CMDFIFO packet type 0: unsupported code %d", code));
2969 }
2970 break;
2971 case 1:
2972 nwords = (command >> 16);
2973 regaddr = (command & 0x7ff8) >> 3;
2974 inc = (command >> 15) & 1;
2975 for (i = 0; i < (int)nwords; i++) {
2976 data = cmdfifo_r(f);
2977 BX_UNLOCK(cmdfifo_mutex);
2978 Voodoo_reg_write(regaddr, data);
2979 BX_LOCK(cmdfifo_mutex);
2980 if (inc) regaddr++;
2981 }
2982 break;
2983 case 2:
2984 mask = (command >> 3);
2985 if (v->type < VOODOO_BANSHEE) {
2986 regaddr = bltSrcBaseAddr;
2987 } else {
2988 regaddr = blt_clip0Min;
2989 }
2990 while (mask) {
2991 if (mask & 1) {
2992 data = cmdfifo_r(f);
2993 BX_UNLOCK(cmdfifo_mutex);
2994 if (v->type < VOODOO_BANSHEE) {
2995 register_w(regaddr, data, 1);
2996 } else {
2997 Banshee_2D_write(regaddr, data);
2998 }
2999 BX_LOCK(cmdfifo_mutex);
3000 }
3001 regaddr++;
3002 mask >>= 1;
3003 }
3004 break;
3005 case 3:
3006 nwords = (command >> 29);
3007 pcolor = (command >> 28) & 1;
3008 smode = (command >> 22) & 0x3f;
3009 mask = (command >> 10) & 0xff;
3010 nvertex = (command >> 6) & 0x0f;
3011 code = (command >> 3) & 0x07;
3012 /* copy relevant bits into the setup mode register */
3013 v->reg[sSetupMode].u = ((smode << 16) | mask);
3014 /* loop over triangles */
3015 for (i = 0; i < nvertex; i++) {
3016 reg.u = cmdfifo_r(f);
3017 svert.x = reg.f;
3018 reg.u = cmdfifo_r(f);
3019 svert.y = reg.f;
3020 if (pcolor) {
3021 if (mask & 0x03) {
3022 data = cmdfifo_r(f);
3023 if (mask & 0x01) {
3024 svert.r = (float)RGB_RED(data);
3025 svert.g = (float)RGB_GREEN(data);
3026 svert.b = (float)RGB_BLUE(data);
3027 }
3028 if (mask & 0x02) {
3029 svert.a = (float)RGB_ALPHA(data);
3030 }
3031 }
3032 } else {
3033 if (mask & 0x01) {
3034 reg.u = cmdfifo_r(f);
3035 svert.r = reg.f;
3036 reg.u = cmdfifo_r(f);
3037 svert.g = reg.f;
3038 reg.u = cmdfifo_r(f);
3039 svert.b = reg.f;
3040 }
3041 if (mask & 0x02) {
3042 reg.u = cmdfifo_r(f);
3043 svert.a = reg.f;
3044 }
3045 }
3046 if (mask & 0x04) {
3047 reg.u = cmdfifo_r(f);
3048 svert.z = reg.f;
3049 }
3050 if (mask & 0x08) {
3051 reg.u = cmdfifo_r(f);
3052 svert.wb = reg.f;
3053 }
3054 if (mask & 0x10) {
3055 reg.u = cmdfifo_r(f);
3056 svert.w0 = reg.f;
3057 }
3058 if (mask & 0x20) {
3059 reg.u = cmdfifo_r(f);
3060 svert.s0 = reg.f;
3061 reg.u = cmdfifo_r(f);
3062 svert.t0 = reg.f;
3063 }
3064 if (mask & 0x40) {
3065 reg.u = cmdfifo_r(f);
3066 svert.w1 = reg.f;
3067 }
3068 if (mask & 0x80) {
3069 reg.u = cmdfifo_r(f);
3070 svert.s1 = reg.f;
3071 reg.u = cmdfifo_r(f);
3072 svert.t1 = reg.f;
3073 }
3074 /* if we're starting a new strip, or if this is the first of a set of verts */
3075 /* for a series of individual triangles, initialize all the verts */
3076 if ((code == 1 && i == 0) || (code == 0 && i % 3 == 0)) {
3077 v->fbi.sverts = 1;
3078 v->fbi.svert[0] = v->fbi.svert[1] = v->fbi.svert[2] = svert;
3079 } else { /* otherwise, add this to the list */
3080 /* for strip mode, shuffle vertex 1 down to 0 */
3081 if (!(smode & 1))
3082 v->fbi.svert[0] = v->fbi.svert[1];
3083
3084 /* copy 2 down to 1 and add our new one regardless */
3085 v->fbi.svert[1] = v->fbi.svert[2];
3086 v->fbi.svert[2] = svert;
3087
3088 /* if we have enough, draw */
3089 if (++v->fbi.sverts >= 3) {
3090 BX_UNLOCK(cmdfifo_mutex);
3091 setup_and_draw_triangle();
3092 BX_LOCK(cmdfifo_mutex);
3093 }
3094 }
3095 }
3096 while (nwords--) cmdfifo_r(f);
3097 break;
3098 case 4:
3099 nwords = (command >> 29);
3100 mask = (command >> 15) & 0x3fff;
3101 regaddr = (command & 0x7ff8) >> 3;
3102 while (mask) {
3103 if (mask & 1) {
3104 data = cmdfifo_r(f);
3105 BX_UNLOCK(cmdfifo_mutex);
3106 Voodoo_reg_write(regaddr, data);
3107 BX_LOCK(cmdfifo_mutex);
3108 }
3109 regaddr++;
3110 mask >>= 1;
3111 }
3112 while (nwords--) cmdfifo_r(f);
3113 break;
3114 case 5:
3115 nwords = (command >> 3) & 0x7ffff;
3116 regaddr = (cmdfifo_r(f) & 0xffffff) >> 2;
3117 code = (command >> 30);
3118 disbytes = (command >> 22) & 0xff;
3119 if ((disbytes > 0) && (code != 0) && (code != 3)) {
3120 BX_ERROR(("CMDFIFO packet type 5: byte disable not supported yet (dest code = %d disbytes = 0x%02x)", code, disbytes));
3121 }
3122 switch (code) {
3123 case 0:
3124 regaddr <<= 2;
3125 w0 = 0;
3126 wn = nwords;
3127 if ((disbytes & 0xf0) > 0) {
3128 data = cmdfifo_r(f);
3129 if ((disbytes & 0xf0) == 0x30) {
3130 data >>= 16;
3131 } else if ((disbytes & 0xf0) == 0xc0) {
3132 data &= 0xffff;
3133 } else {
3134 BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 0)"));
3135 }
3136 BX_UNLOCK(cmdfifo_mutex);
3137 Banshee_LFB_write(regaddr, data, 2);
3138 BX_LOCK(cmdfifo_mutex);
3139 w0++;
3140 regaddr += 4;
3141 }
3142 for (i = w0; i < wn; i++) {
3143 data = cmdfifo_r(f);
3144 BX_UNLOCK(cmdfifo_mutex);
3145 Banshee_LFB_write(regaddr, data, 4);
3146 BX_LOCK(cmdfifo_mutex);
3147 regaddr += 4;
3148 }
3149 if ((disbytes & 0x0f) > 0) {
3150 BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 0)"));
3151 }
3152 break;
3153 case 2:
3154 for (i = 0; i < (int)nwords; i++) {
3155 data = cmdfifo_r(f);
3156 BX_UNLOCK(cmdfifo_mutex);
3157 lfb_w(regaddr, data, 0xffffffff);
3158 BX_LOCK(cmdfifo_mutex);
3159 regaddr++;
3160 }
3161 break;
3162 case 3:
3163 w0 = 0;
3164 wn = nwords;
3165 if ((disbytes & 0xf0) > 0) {
3166 data = cmdfifo_r(f);
3167 if ((disbytes & 0xf0) == 0x30) {
3168 data >>= 16;
3169 } else if ((disbytes & 0xf0) == 0xc0) {
3170 data &= 0xffff;
3171 } else if ((disbytes & 0xf0) == 0xe0) {
3172 data &= 0xff;
3173 } else {
3174 BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 3)"));
3175 }
3176 BX_UNLOCK(cmdfifo_mutex);
3177 texture_w(regaddr, data);
3178 BX_LOCK(cmdfifo_mutex);
3179 w0++;
3180 regaddr++;
3181 }
3182 for (i = w0; i < wn; i++) {
3183 data = cmdfifo_r(f);
3184 BX_UNLOCK(cmdfifo_mutex);
3185 texture_w(regaddr, data);
3186 BX_LOCK(cmdfifo_mutex);
3187 regaddr++;
3188 }
3189 if ((disbytes & 0x0f) > 0) {
3190 BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 3)"));
3191 }
3192 break;
3193 default:
3194 BX_ERROR(("CMDFIFO packet type 5: unsupported destination type %d", code));
3195 }
3196 break;
3197 case 6:
3198 // TODO: AGP to VRAM transfer
3199 cmdfifo_r(f);
3200 cmdfifo_r(f);
3201 cmdfifo_r(f);
3202 cmdfifo_r(f);
3203 default:
3204 BX_ERROR(("CMDFIFO: unsupported packet type %d", type));
3205 }
3206 f->depth_needed = cmdfifo_calc_depth_needed(f);
3207 if (f->depth < f->depth_needed) {
3208 f->cmd_ready = 0;
3209 }
3210 }
3211
3212
3213 #define FBI_TRICK 1
3214 #if FBI_TRICK
fifo_add_fbi(Bit32u type_offset,Bit32u data)3215 bool fifo_add_fbi(Bit32u type_offset, Bit32u data)
3216 {
3217 bool ret = 0;
3218
3219 BX_LOCK(fifo_mutex);
3220 if (v->fbi.fifo.enabled) {
3221 fifo_add(&v->fbi.fifo, type_offset, data);
3222 ret = 1;
3223 if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000)
3224 bx_set_sem(&fifo_wakeup);
3225 }
3226 BX_UNLOCK(fifo_mutex);
3227 return ret;
3228 }
3229
fifo_add_common(Bit32u type_offset,Bit32u data)3230 bool fifo_add_common(Bit32u type_offset, Bit32u data)
3231 {
3232 bool ret = 0;
3233
3234 BX_LOCK(fifo_mutex);
3235 if (v->fbi.fifo.enabled) {
3236 fifo_add(&v->fbi.fifo, type_offset, data);
3237 ret = 1;
3238 if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000)
3239 bx_set_sem(&fifo_wakeup);
3240 } else
3241 if (v->pci.fifo.enabled) {
3242 fifo_add(&v->pci.fifo, type_offset, data);
3243 ret = 1;
3244 if ((fifo_space(&v->pci.fifo)/2) <= 16)
3245 bx_set_sem(&fifo_wakeup);
3246 }
3247 BX_UNLOCK(fifo_mutex);
3248 return ret;
3249 }
3250 #else
fifo_add_common(Bit32u type_offset,Bit32u data)3251 bool fifo_add_common(Bit32u type_offset, Bit32u data)
3252 {
3253 bool ret = 0;
3254
3255 BX_LOCK(fifo_mutex);
3256 if (v->pci.fifo.enabled) {
3257 fifo_add(&v->pci.fifo, type_offset, data);
3258 ret = 1;
3259 if (v->fbi.fifo.enabled) {
3260 if ((fifo_space(&v->pci.fifo)/2) <= 16) {
3261 fifo_move(&v->pci.fifo, &v->fbi.fifo);
3262 }
3263 if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000) {
3264 bx_set_sem(&fifo_wakeup);
3265 }
3266 } else {
3267 if ((fifo_space(&v->pci.fifo)/2) <= 16) {
3268 bx_set_sem(&fifo_wakeup);
3269 }
3270 }
3271 }
3272 BX_UNLOCK(fifo_mutex);
3273 return ret;
3274 }
3275 #endif
3276
3277
register_w_common(Bit32u offset,Bit32u data)3278 void register_w_common(Bit32u offset, Bit32u data)
3279 {
3280 Bit32u regnum = (offset) & 0xff;
3281 Bit32u chips = (offset>>8) & 0xf;
3282
3283 /* Voodoo 2 CMDFIFO handling */
3284 if ((v->type == VOODOO_2) && v->fbi.cmdfifo[0].enabled) {
3285 if ((offset & 0x80000) > 0) {
3286 if (!FBIINIT7_CMDFIFO_MEMORY_STORE(v->reg[fbiInit7].u)) {
3287 BX_ERROR(("CMDFIFO-to-FIFO mode not supported yet"));
3288 } else {
3289 Bit32u fbi_offset = (v->fbi.cmdfifo[0].base + ((offset & 0xffff) << 2)) & v->fbi.mask;
3290 if (LOG_CMDFIFO) BX_DEBUG(("CMDFIFO write: FBI offset=0x%08x, data=0x%08x", fbi_offset, data));
3291 cmdfifo_w(&v->fbi.cmdfifo[0], fbi_offset, data);
3292 }
3293 return;
3294 } else {
3295 if (v->regaccess[regnum] & REGISTER_WRITETHRU) {
3296 BX_DEBUG(("Writing to register %s in CMDFIFO mode", v->regnames[regnum]));
3297 } else if (regnum == swapbufferCMD) {
3298 v->fbi.swaps_pending++;
3299 return;
3300 } else {
3301 BX_DEBUG(("Invalid attempt to write %s in CMDFIFO mode", v->regnames[regnum]));
3302 return;
3303 }
3304 }
3305 }
3306
3307 if (chips == 0)
3308 chips = 0xf;
3309
3310 /* the first 64 registers can be aliased differently */
3311 if ((offset & 0x800c0) == 0x80000 && v->alt_regmap)
3312 regnum = register_alias_map[offset & 0x3f];
3313 else
3314 regnum = offset & 0xff;
3315
3316 /* first make sure this register is writable */
3317 if (!(v->regaccess[regnum] & REGISTER_WRITE)) {
3318 BX_DEBUG(("Invalid attempt to write %s", v->regnames[regnum]));
3319 return;
3320 }
3321
3322 BX_DEBUG(("write chip 0x%x reg 0x%x value 0x%08x(%s)", chips, regnum<<2, data, v->regnames[regnum]));
3323
3324 switch (regnum) {
3325 /* external DAC access -- Voodoo/Voodoo2 only */
3326 case dacData:
3327 if (v->type <= VOODOO_2 /*&& (chips & 1)*/)
3328 {
3329 poly_wait(v->poly, v->regnames[regnum]);
3330 if (!(data & 0x800))
3331 dacdata_w(&v->dac, (data >> 8) & 7, data & 0xff);
3332 else
3333 dacdata_r(&v->dac, (data >> 8) & 7);
3334 }
3335 break;
3336
3337 /* vertical sync rate -- Voodoo/Voodoo2 only */
3338 case hSync:
3339 case vSync:
3340 case backPorch:
3341 case videoDimensions:
3342 if (v->type <= VOODOO_2 && (chips & 1))
3343 {
3344 poly_wait(v->poly, v->regnames[regnum]);
3345 v->reg[regnum].u = data;
3346 if (v->reg[hSync].u != 0 && v->reg[vSync].u != 0 && v->reg[videoDimensions].u != 0)
3347 {
3348 int htotal = ((v->reg[hSync].u >> 16) & 0x3ff) + 1 + (v->reg[hSync].u & 0xff) + 1;
3349 int vtotal = ((v->reg[vSync].u >> 16) & 0xfff) + (v->reg[vSync].u & 0xfff);
3350 int hvis = v->reg[videoDimensions].u & 0x3ff;
3351 int vvis = (v->reg[videoDimensions].u >> 16) & 0x3ff;
3352 int hbp = (v->reg[backPorch].u & 0xff) + 2;
3353 int vbp = (v->reg[backPorch].u >> 16) & 0xff;
3354 rectangle visarea;
3355
3356 /* create a new visarea */
3357 visarea.min_x = hbp;
3358 visarea.max_x = hbp + hvis - 1;
3359 visarea.min_y = vbp;
3360 visarea.max_y = vbp + vvis - 1;
3361
3362 /* keep within bounds */
3363 visarea.max_x = MIN(visarea.max_x, htotal - 1);
3364 visarea.max_y = MIN(visarea.max_y, vtotal - 1);
3365
3366 BX_DEBUG(("hSync=%08X vSync=%08X backPorch=%08X videoDimensions=%08X",
3367 v->reg[hSync].u, v->reg[vSync].u, v->reg[backPorch].u, v->reg[videoDimensions].u));
3368 BX_DEBUG(("Horiz: %d-%d (%d total) Vert: %d-%d (%d total) -- ", visarea.min_x, visarea.max_x, htotal, visarea.min_y, visarea.max_y, vtotal));
3369
3370 /* configure the new framebuffer info */
3371 v->fbi.width = hvis + 1;
3372 v->fbi.height = vvis;
3373 v->fbi.xoffs = hbp;
3374 v->fbi.yoffs = vbp;
3375 v->fbi.vsyncscan = (v->reg[vSync].u >> 16) & 0xfff;
3376
3377 /* if changing dimensions, update video memory layout */
3378 if (regnum == videoDimensions)
3379 recompute_video_memory(v);
3380
3381 Voodoo_UpdateScreenStart();
3382 }
3383 }
3384 break;
3385
3386 /* fbiInit0 can only be written if initEnable says we can -- Voodoo/Voodoo2 only */
3387 case fbiInit0:
3388 poly_wait(v->poly, v->regnames[regnum]);
3389 if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable)) {
3390 Voodoo_Output_Enable(data & 1);
3391 if (v->fbi.fifo.enabled != FBIINIT0_ENABLE_MEMORY_FIFO(data)) {
3392 v->fbi.fifo.enabled = FBIINIT0_ENABLE_MEMORY_FIFO(data);
3393 BX_INFO(("memory FIFO now %sabled",
3394 v->fbi.fifo.enabled ? "en" : "dis"));
3395 }
3396 v->reg[fbiInit0].u = data;
3397 if (FBIINIT0_GRAPHICS_RESET(data))
3398 soft_reset(v);
3399 if (FBIINIT0_FIFO_RESET(data))
3400 fifo_reset(&v->pci.fifo);
3401 recompute_video_memory(v);
3402 }
3403 break;
3404
3405 /* fbiInitX can only be written if initEnable says we can -- Voodoo/Voodoo2 only */
3406 /* most of these affect memory layout, so always recompute that when done */
3407 case fbiInit1:
3408 case fbiInit2:
3409 case fbiInit4:
3410 case fbiInit5:
3411 case fbiInit6:
3412 poly_wait(v->poly, v->regnames[regnum]);
3413
3414 if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3415 {
3416 v->reg[regnum].u = data;
3417 recompute_video_memory(v);
3418 v->fbi.video_changed = 1;
3419 v->fbi.clut_dirty = 1;
3420 }
3421 break;
3422
3423 case fbiInit3:
3424 poly_wait(v->poly, v->regnames[regnum]);
3425 if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3426 {
3427 v->reg[regnum].u = data;
3428 v->alt_regmap = FBIINIT3_TRI_REGISTER_REMAP(data);
3429 v->fbi.yorigin = FBIINIT3_YORIGIN_SUBTRACT(v->reg[fbiInit3].u);
3430 recompute_video_memory(v);
3431 }
3432 break;
3433
3434 case fbiInit7:
3435 /* case swapPending: -- Banshee */
3436 poly_wait(v->poly, v->regnames[regnum]);
3437
3438 if (v->type == VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3439 {
3440 v->fbi.cmdfifo[0].count_holes = !FBIINIT7_DISABLE_CMDFIFO_HOLES(data);
3441 if (v->fbi.cmdfifo[0].enabled != FBIINIT7_CMDFIFO_ENABLE(data)) {
3442 v->fbi.cmdfifo[0].enabled = FBIINIT7_CMDFIFO_ENABLE(data);
3443 BX_INFO(("CMDFIFO now %sabled", v->fbi.cmdfifo[0].enabled ? "en" : "dis"));
3444 }
3445 v->reg[regnum].u = data;
3446 } else if (v->type >= VOODOO_BANSHEE) {
3447 v->fbi.swaps_pending++;
3448 }
3449 break;
3450
3451 case cmdFifoBaseAddr:
3452 BX_LOCK(cmdfifo_mutex);
3453 v->fbi.cmdfifo[0].base = (data & 0x3ff) << 12;
3454 v->fbi.cmdfifo[0].end = (((data >> 16) & 0x3ff) + 1) << 12;
3455 BX_UNLOCK(cmdfifo_mutex);
3456 break;
3457
3458 case cmdFifoRdPtr:
3459 BX_LOCK(cmdfifo_mutex);
3460 v->fbi.cmdfifo[0].rdptr = data;
3461 BX_UNLOCK(cmdfifo_mutex);
3462 break;
3463
3464 case cmdFifoAMin:
3465 /* case colBufferAddr: -- Banshee */
3466 if (v->type == VOODOO_2 && (chips & 1)) {
3467 BX_LOCK(cmdfifo_mutex);
3468 v->fbi.cmdfifo[0].amin = data;
3469 BX_UNLOCK(cmdfifo_mutex);
3470 } else if (v->type >= VOODOO_BANSHEE && (chips & 1))
3471 v->fbi.rgboffs[1] = data & v->fbi.mask & ~0x0f;
3472 break;
3473
3474 case cmdFifoAMax:
3475 /* case colBufferStride: -- Banshee */
3476 if (v->type == VOODOO_2 && (chips & 1)) {
3477 BX_LOCK(cmdfifo_mutex);
3478 v->fbi.cmdfifo[0].amax = data;
3479 BX_UNLOCK(cmdfifo_mutex);
3480 } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3481 if (data & 0x8000)
3482 v->fbi.rowpixels = (data & 0x7f) << 6;
3483 else
3484 v->fbi.rowpixels = (data & 0x3fff) >> 1;
3485 }
3486 break;
3487
3488 case cmdFifoDepth:
3489 /* case auxBufferAddr: -- Banshee */
3490 if (v->type == VOODOO_2 && (chips & 1)) {
3491 BX_LOCK(cmdfifo_mutex);
3492 v->fbi.cmdfifo[0].depth = data & 0xffff;
3493 v->fbi.cmdfifo[0].depth_needed = BX_MAX_BIT32U;
3494 BX_UNLOCK(cmdfifo_mutex);
3495 } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3496 v->fbi.auxoffs = data & v->fbi.mask & ~0x0f;
3497 }
3498 break;
3499
3500 case cmdFifoHoles:
3501 /* case auxBufferStride: -- Banshee */
3502 if (v->type == VOODOO_2 && (chips & 1)) {
3503 BX_LOCK(cmdfifo_mutex);
3504 v->fbi.cmdfifo[0].holes = data;
3505 BX_UNLOCK(cmdfifo_mutex);
3506 } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3507 Bit32u rowpixels;
3508
3509 if (data & 0x8000)
3510 rowpixels = (data & 0x7f) << 6;
3511 else
3512 rowpixels = (data & 0x3fff) >> 1;
3513 if (v->fbi.rowpixels != rowpixels)
3514 BX_PANIC(("aux buffer stride differs from color buffer stride"));
3515 }
3516 break;
3517
3518 case intrCtrl:
3519 BX_ERROR(("Writing to register %s not supported yet", v->regnames[regnum]));
3520 break;
3521
3522 default:
3523 if (fifo_add_common(FIFO_WR_REG | offset, data)) {
3524 BX_LOCK(fifo_mutex);
3525 if ((regnum == triangleCMD) || (regnum == ftriangleCMD) || (regnum == nopCMD) ||
3526 (regnum == fastfillCMD) || (regnum == swapbufferCMD)) {
3527 v->pci.op_pending++;
3528 if (regnum == swapbufferCMD) {
3529 v->fbi.swaps_pending++;
3530 }
3531 bx_set_sem(&fifo_wakeup);
3532 }
3533 BX_UNLOCK(fifo_mutex);
3534 } else {
3535 register_w(offset, data, 0);
3536 }
3537 }
3538 }
3539
3540
register_r(Bit32u offset)3541 Bit32u register_r(Bit32u offset)
3542 {
3543 Bit32u regnum = (offset) & 0xff;
3544 Bit32u chips = (offset>>8) & 0xf;
3545
3546 if (!((voodoo_last_msg == regnum) && (regnum == status))) //show status reg only once
3547 BX_DEBUG(("read chip 0x%x reg 0x%x (%s)", chips, regnum<<2, v->regnames[regnum]));
3548 voodoo_last_msg = regnum;
3549
3550 /* first make sure this register is readable */
3551 if (!(v->regaccess[regnum] & REGISTER_READ)) {
3552 BX_DEBUG(("Invalid attempt to read %s", v->regnames[regnum]));
3553 return 0;
3554 }
3555 if ((v->type == VOODOO_2) && v->fbi.cmdfifo[0].enabled && ((offset & 0x80000) > 0)) {
3556 BX_DEBUG(("Invalid attempt to read from CMDFIFO"));
3557 return 0;
3558 }
3559
3560 Bit32u result;
3561
3562 /* default result is the FBI register value */
3563 result = v->reg[regnum].u;
3564
3565 /* some registers are dynamic; compute them */
3566 switch (regnum) {
3567 case status:
3568
3569 /* start with a blank slate */
3570 result = 0;
3571
3572 /* bits 5:0 are the PCI FIFO free space */
3573 if (fifo_empty_locked(&v->pci.fifo))
3574 result |= 0x3f << 0;
3575 else
3576 {
3577 BX_LOCK(fifo_mutex);
3578 int temp = fifo_space(&v->pci.fifo)/2;
3579 BX_UNLOCK(fifo_mutex);
3580 if (temp > 0x3f)
3581 temp = 0x3f;
3582 result |= temp << 0;
3583 }
3584
3585 /* bit 6 is the vertical retrace */
3586 result |= (Voodoo_get_retrace(0) > 0) << 6;
3587
3588 /* bit 7 is FBI graphics engine busy */
3589 if (v->pci.op_pending)
3590 result |= 1 << 7;
3591
3592 /* bit 8 is TREX busy */
3593 if (v->pci.op_pending)
3594 result |= 1 << 8;
3595
3596 /* bit 9 is overall busy */
3597 if (v->pci.op_pending)
3598 result |= 1 << 9;
3599
3600 if (v->type == VOODOO_2) {
3601 if (v->fbi.cmdfifo[0].enabled && v->fbi.cmdfifo[0].depth > 0)
3602 result |= 7 << 7;
3603 }
3604 /* Banshee is different starting here */
3605 if (v->type < VOODOO_BANSHEE)
3606 {
3607 /* bits 11:10 specifies which buffer is visible */
3608 result |= v->fbi.frontbuf << 10;
3609
3610 /* bits 27:12 indicate memory FIFO freespace */
3611 if (!v->fbi.fifo.enabled || fifo_empty_locked(&v->fbi.fifo))
3612 result |= 0xffff << 12;
3613 else
3614 {
3615 BX_LOCK(fifo_mutex);
3616 int temp = fifo_space(&v->fbi.fifo)/2;
3617 BX_UNLOCK(fifo_mutex);
3618 if (temp > 0xffff)
3619 temp = 0xffff;
3620 result |= temp << 12;
3621 }
3622 }
3623 else
3624 {
3625 /* bit 10 is 2D busy */
3626 if (v->banshee.blt.busy)
3627 result |= 3 << 9;
3628
3629 /* bit 11 is cmd FIFO 0 busy */
3630 if (v->fbi.cmdfifo[0].enabled && v->fbi.cmdfifo[0].depth > 0)
3631 result |= 5 << 9;
3632
3633 /* bit 12 is cmd FIFO 1 busy */
3634 if (v->fbi.cmdfifo[1].enabled && v->fbi.cmdfifo[1].depth > 0)
3635 result |= 9 << 9;
3636 }
3637
3638 /* bits 30:28 are the number of pending swaps */
3639 if (v->fbi.swaps_pending > 7)
3640 result |= 7 << 28;
3641 else
3642 result |= v->fbi.swaps_pending << 28;
3643
3644 /* bit 31 is not used */
3645
3646 /* eat some cycles since people like polling here */
3647 cpu_eat_cycles(v->cpu, 1000);
3648 break;
3649
3650 /* bit 2 of the initEnable register maps this to dacRead */
3651 case fbiInit2:
3652 if (INITEN_REMAP_INIT_TO_DAC(v->pci.init_enable))
3653 result = v->dac.read_result;
3654 break;
3655
3656 case vRetrace:
3657 result = Voodoo_get_retrace(0) & 0x1fff;
3658 break;
3659
3660 case hvRetrace:
3661 result = Voodoo_get_retrace(1);
3662 break;
3663
3664 case cmdFifoBaseAddr:
3665 result = (v->fbi.cmdfifo[0].base >> 12) | ((v->fbi.cmdfifo[0].end >> 12) << 16);
3666 break;
3667
3668 case cmdFifoRdPtr:
3669 result = v->fbi.cmdfifo[0].rdptr;
3670 break;
3671
3672 case cmdFifoDepth:
3673 result = v->fbi.cmdfifo[0].depth;
3674 break;
3675
3676 case cmdFifoAMin:
3677 result = v->fbi.cmdfifo[0].amin;
3678 break;
3679
3680 case cmdFifoAMax:
3681 result = v->fbi.cmdfifo[0].amax;
3682 break;
3683 }
3684
3685 return result;
3686 }
3687
lfb_r(Bit32u offset)3688 Bit32u lfb_r(Bit32u offset)
3689 {
3690 Bit16u *buffer;
3691 Bit32u bufmax;
3692 Bit32u bufoffs;
3693 Bit32u data;
3694 bool forcefront=false;
3695 int x, y, scry;
3696 Bit32u destbuf;
3697
3698 BX_DEBUG(("read LFB offset 0x%x", offset));
3699
3700 /* compute X,Y */
3701 x = (offset << 1) & 0x3fe;
3702 y = (offset >> 9) & 0x7ff;
3703
3704 /* select the target buffer */
3705 destbuf = (v->type >= VOODOO_BANSHEE) ? (!forcefront) : LFBMODE_READ_BUFFER_SELECT(v->reg[lfbMode].u);
3706 switch (destbuf)
3707 {
3708 case 0: /* front buffer */
3709 buffer = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
3710 bufmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.frontbuf]) / 2;
3711 break;
3712
3713 case 1: /* back buffer */
3714 buffer = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
3715 bufmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.backbuf]) / 2;
3716 break;
3717
3718 case 2: /* aux buffer */
3719 if (v->fbi.auxoffs == (Bit32u)~0)
3720 return 0xffffffff;
3721 buffer = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs);
3722 bufmax = (v->fbi.mask + 1 - v->fbi.auxoffs) / 2;
3723 break;
3724
3725 default: /* reserved */
3726 return 0xffffffff;
3727 }
3728
3729 /* determine the screen Y */
3730 scry = y;
3731 if (LFBMODE_Y_ORIGIN(v->reg[lfbMode].u))
3732 scry = (v->fbi.yorigin - y) & 0x3ff;
3733
3734 /* advance pointers to the proper row */
3735 bufoffs = scry * v->fbi.rowpixels + x;
3736 if (bufoffs >= bufmax)
3737 return 0xffffffff;
3738
3739 /* wait for any outstanding work to finish */
3740 poly_wait(v->poly, "LFB read");
3741
3742 /* compute the data */
3743 data = buffer[bufoffs + 0] | (buffer[bufoffs + 1] << 16);
3744
3745 /* word swapping */
3746 if (LFBMODE_WORD_SWAP_READS(v->reg[lfbMode].u))
3747 data = (data << 16) | (data >> 16);
3748
3749 /* byte swizzling */
3750 if (LFBMODE_BYTE_SWIZZLE_READS(v->reg[lfbMode].u))
3751 data = bx_bswap32(data);
3752
3753 if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:read (%d,%d) = %08X", v->index, x, y, data));
3754 return data;
3755 }
3756
voodoo_w(Bit32u offset,Bit32u data,Bit32u mask)3757 void voodoo_w(Bit32u offset, Bit32u data, Bit32u mask)
3758 {
3759 Bit32u type;
3760
3761 if ((offset & (0xc00000/4)) == 0)
3762 register_w_common(offset, data);
3763 else if (offset & (0x800000/4)) {
3764 if (!fifo_add_common(FIFO_WR_TEX | offset, data)) {
3765 texture_w(offset, data);
3766 }
3767 } else {
3768 if (mask == 0xffffffff) {
3769 type = FIFO_WR_FBI_32;
3770 } else if (mask & 1) {
3771 type = FIFO_WR_FBI_16L;
3772 } else {
3773 type = FIFO_WR_FBI_16H;
3774 }
3775 #if FBI_TRICK
3776 if (!fifo_add_fbi(type | offset, data)) {
3777 #else
3778 if (!fifo_add_common(type | offset, data)) {
3779 #endif
3780 lfb_w(offset, data, mask);
3781 }
3782 }
3783 }
3784
3785 Bit32u voodoo_r(Bit32u offset)
3786 {
3787 if (!(offset & (0xc00000/4)))
3788 return register_r(offset);
3789 else
3790 return lfb_r(offset);
3791
3792 return 0xffffffff;
3793 }
3794
3795 void init_tmu(voodoo_state *v, tmu_state *t, voodoo_reg *reg, void *memory, int tmem)
3796 {
3797 /* allocate texture RAM */
3798 t->ram = (Bit8u *)memory;
3799 t->mask = tmem - 1;
3800 t->reg = reg;
3801 t->regdirty = 1;
3802 t->bilinear_mask = (v->type >= VOODOO_2) ? 0xff : 0xf0;
3803
3804 /* mark the NCC tables dirty and configure their registers */
3805 t->ncc[0].dirty = t->ncc[1].dirty = 1;
3806 t->ncc[0].reg = &t->reg[nccTable+0];
3807 t->ncc[1].reg = &t->reg[nccTable+12];
3808
3809 /* create pointers to all the tables */
3810 t->texel[0] = v->tmushare.rgb332;
3811 t->texel[1] = t->ncc[0].texel;
3812 t->texel[2] = v->tmushare.alpha8;
3813 t->texel[3] = v->tmushare.int8;
3814 t->texel[4] = v->tmushare.ai44;
3815 t->texel[5] = t->palette;
3816 t->texel[6] = (v->type >= VOODOO_2) ? t->palettea : NULL;
3817 t->texel[7] = NULL;
3818 t->texel[8] = v->tmushare.rgb332;
3819 t->texel[9] = t->ncc[0].texel;
3820 t->texel[10] = v->tmushare.rgb565;
3821 t->texel[11] = v->tmushare.argb1555;
3822 t->texel[12] = v->tmushare.argb4444;
3823 t->texel[13] = v->tmushare.int8;
3824 t->texel[14] = t->palette;
3825 t->texel[15] = NULL;
3826 t->lookup = t->texel[0];
3827
3828 /* attach the palette to NCC table 0 */
3829 t->ncc[0].palette = t->palette;
3830 if (v->type >= VOODOO_2)
3831 t->ncc[0].palettea = t->palettea;
3832
3833 /* set up texture address calculations */
3834 if (v->type <= VOODOO_2)
3835 {
3836 t->texaddr_mask = 0x0fffff;
3837 t->texaddr_shift = 3;
3838 } else {
3839 t->texaddr_mask = 0xfffff0;
3840 t->texaddr_shift = 0;
3841 }
3842 }
3843
3844 void init_tmu_shared(tmu_shared_state *s)
3845 {
3846 int val;
3847
3848 /* build static 8-bit texel tables */
3849 for (val = 0; val < 256; val++) {
3850 int r, g, b, a;
3851
3852 /* 8-bit RGB (3-3-2) */
3853 EXTRACT_332_TO_888(val, r, g, b);
3854 s->rgb332[val] = MAKE_ARGB(0xff, r, g, b);
3855
3856 /* 8-bit alpha */
3857 s->alpha8[val] = MAKE_ARGB(val, val, val, val);
3858
3859 /* 8-bit intensity */
3860 s->int8[val] = MAKE_ARGB(0xff, val, val, val);
3861
3862 /* 8-bit alpha, intensity */
3863 a = ((val >> 0) & 0xf0) | ((val >> 4) & 0x0f);
3864 r = ((val << 4) & 0xf0) | ((val << 0) & 0x0f);
3865 s->ai44[val] = MAKE_ARGB(a, r, r, r);
3866 }
3867
3868 /* build static 16-bit texel tables */
3869 for (val = 0; val < 65536; val++) {
3870 int r, g, b, a;
3871
3872 /* table 10 = 16-bit RGB (5-6-5) */
3873 EXTRACT_565_TO_888(val, r, g, b);
3874 s->rgb565[val] = MAKE_ARGB(0xff, r, g, b);
3875
3876 /* table 11 = 16 ARGB (1-5-5-5) */
3877 EXTRACT_1555_TO_8888(val, a, r, g, b);
3878 s->argb1555[val] = MAKE_ARGB(a, r, g, b);
3879
3880 /* table 12 = 16-bit ARGB (4-4-4-4) */
3881 EXTRACT_4444_TO_8888(val, a, r, g, b);
3882 s->argb4444[val] = MAKE_ARGB(a, r, g, b);
3883 }
3884 }
3885
3886 #define SETUP_BITBLT(num, name, flags) \
3887 do { \
3888 v->banshee.blt.rop_handler[0][num] = bitblt_rop_fwd_##name; \
3889 v->banshee.blt.rop_handler[1][num] = bitblt_rop_bkwd_##name; \
3890 v->banshee.blt.rop_flags[num] = flags; \
3891 } while (0);
3892
3893 void banshee_bitblt_init()
3894 {
3895 for (int i = 0; i < 0x100; i++) {
3896 SETUP_BITBLT(i, nop, BX_ROP_PATTERN);
3897 }
3898 SETUP_BITBLT(0x00, 0, 0); // 0
3899 SETUP_BITBLT(0x05, notsrc_and_notdst, BX_ROP_PATTERN); // PSan
3900 SETUP_BITBLT(0x0a, notsrc_and_dst, BX_ROP_PATTERN); // DPna
3901 SETUP_BITBLT(0x0f, notsrc, BX_ROP_PATTERN); // Pn
3902 SETUP_BITBLT(0x11, notsrc_and_notdst, 0); // DSon
3903 SETUP_BITBLT(0x22, notsrc_and_dst, 0); // DSna
3904 SETUP_BITBLT(0x33, notsrc, 0); // Sn
3905 SETUP_BITBLT(0x44, src_and_notdst, 0); // SDna
3906 SETUP_BITBLT(0x50, src_and_notdst, 0); // PDna
3907 SETUP_BITBLT(0x55, notdst, 0); // Dn
3908 SETUP_BITBLT(0x5a, src_xor_dst, BX_ROP_PATTERN); // DPx
3909 SETUP_BITBLT(0x5f, notsrc_or_notdst, BX_ROP_PATTERN); // DSan
3910 SETUP_BITBLT(0x66, src_xor_dst, 0); // DSx
3911 SETUP_BITBLT(0x77, notsrc_or_notdst, 0); // DSan
3912 SETUP_BITBLT(0x88, src_and_dst, 0); // DSa
3913 SETUP_BITBLT(0x99, src_notxor_dst, 0); // DSxn
3914 SETUP_BITBLT(0xaa, nop, 0); // D
3915 SETUP_BITBLT(0xad, src_and_dst, BX_ROP_PATTERN); // DPa
3916 SETUP_BITBLT(0xaf, notsrc_or_dst, BX_ROP_PATTERN); // DPno
3917 SETUP_BITBLT(0xbb, notsrc_or_dst, 0); // DSno
3918 SETUP_BITBLT(0xcc, src, 0); // S
3919 SETUP_BITBLT(0xdd, src_and_notdst, 0); // SDna
3920 SETUP_BITBLT(0xee, src_or_dst, 0); // DSo
3921 SETUP_BITBLT(0xf0, src, BX_ROP_PATTERN); // P
3922 SETUP_BITBLT(0xf5, src_or_notdst, BX_ROP_PATTERN); // PDno
3923 SETUP_BITBLT(0xfa, src_or_dst, BX_ROP_PATTERN); // DPo
3924 SETUP_BITBLT(0xff, 1, 0); // 1
3925 }
3926
3927 void voodoo_init(Bit8u _type)
3928 {
3929 int pen;
3930 int val;
3931
3932 v->reg[lfbMode].u = 0;
3933 v->reg[fbiInit0].u = (1 << 4) | (0x10 << 6);
3934 v->reg[fbiInit1].u = (1 << 1) | (1 << 8) | (1 << 12) | (2 << 20);
3935 v->reg[fbiInit2].u = (1 << 6) | (0x100 << 23);
3936 v->reg[fbiInit3].u = (2 << 13) | (0xf << 17);
3937 v->reg[fbiInit4].u = (1 << 0);
3938 v->type = _type;
3939 v->chipmask = 0x01 | 0x02 | 0x04 | 0x08;
3940 switch (v->type) {
3941 case VOODOO_1:
3942 v->regaccess = voodoo_register_access;
3943 v->regnames = voodoo_reg_name;
3944 v->alt_regmap = 0;
3945 v->fbi.lfb_stride = 10;
3946 break;
3947
3948 case VOODOO_2:
3949 v->regaccess = voodoo2_register_access;
3950 v->regnames = voodoo_reg_name;
3951 v->alt_regmap = 0;
3952 v->fbi.lfb_stride = 10;
3953 break;
3954
3955 case VOODOO_BANSHEE:
3956 v->regaccess = banshee_register_access;
3957 v->regnames = banshee_reg_name;
3958 v->alt_regmap = 1;
3959 v->fbi.lfb_stride = 11;
3960 v->chipmask = 0x01 | 0x02;
3961 break;
3962
3963 case VOODOO_3:
3964 v->regaccess = banshee_register_access;
3965 v->regnames = banshee_reg_name;
3966 v->alt_regmap = 1;
3967 v->fbi.lfb_stride = 11;
3968 v->chipmask = 0x01 | 0x02 | 0x04;
3969 break;
3970 }
3971 memset(v->dac.reg, 0, sizeof(v->dac.reg));
3972 v->dac.read_result = 0;
3973 v->dac.clk0_m = 0x37;
3974 v->dac.clk0_n = 0x02;
3975 v->dac.clk0_p = 0x03;
3976
3977 /* set up the PCI FIFO */
3978 v->pci.fifo.base = v->pci.fifo_mem;
3979 v->pci.fifo.size = 64*2;
3980 v->pci.fifo.in = v->pci.fifo.out = 0;
3981
3982 /* create a table of precomputed 1/n and log2(n) values */
3983 /* n ranges from 1.0000 to 2.0000 */
3984 for (val = 0; val <= (1 << RECIPLOG_LOOKUP_BITS); val++) {
3985 Bit32u value = (1 << RECIPLOG_LOOKUP_BITS) + val;
3986 voodoo_reciplog[val*2 + 0] = (1 << (RECIPLOG_LOOKUP_PREC + RECIPLOG_LOOKUP_BITS)) / value;
3987 voodoo_reciplog[val*2 + 1] = (Bit32u)(LOGB2((double)value / (double)(1 << RECIPLOG_LOOKUP_BITS)) * (double)(1 << RECIPLOG_LOOKUP_PREC));
3988 }
3989
3990 /* create dithering tables */
3991 for (int val = 0; val < 256*16*2; val++) {
3992 int g = (val >> 0) & 1;
3993 int x = (val >> 1) & 3;
3994 int color = (val >> 3) & 0xff;
3995 int y = (val >> 11) & 3;
3996
3997 if (!g) {
3998 dither4_lookup[val] = DITHER_RB(color, dither_matrix_4x4[y * 4 + x]) >> 3;
3999 dither2_lookup[val] = DITHER_RB(color, dither_matrix_2x2[y * 4 + x]) >> 3;
4000 } else {
4001 dither4_lookup[val] = DITHER_G(color, dither_matrix_4x4[y * 4 + x]) >> 2;
4002 dither2_lookup[val] = DITHER_G(color, dither_matrix_2x2[y * 4 + x]) >> 2;
4003 }
4004 }
4005
4006 /* init the pens */
4007 v->fbi.clut_dirty = 1;
4008 if (v->type <= VOODOO_2) {
4009 for (pen = 0; pen < 32; pen++)
4010 v->fbi.clut[pen] = MAKE_ARGB(pen, pal5bit(pen), pal5bit(pen), pal5bit(pen));
4011 v->fbi.clut[32] = MAKE_ARGB(32,0xff,0xff,0xff);
4012 } else {
4013 for (pen = 0; pen < 512; pen++)
4014 v->fbi.clut[pen] = MAKE_RGB(pen,pen,pen);
4015 }
4016 if (v->type < VOODOO_BANSHEE) {
4017 v->fbi.ram = (Bit8u*)malloc(4<<20);
4018 v->fbi.mask = (4<<20)-1;
4019 } else {
4020 v->fbi.ram = (Bit8u*)malloc(16<<20);
4021 v->fbi.mask = (16<<20)-1;
4022 }
4023 v->fbi.frontbuf = 0;
4024 v->fbi.backbuf = 1;
4025 v->fbi.width = 640;
4026 v->fbi.height = 480;
4027 v->fbi.rowpixels = v->fbi.width;
4028 v->fbi.fogdelta_mask = (v->type < VOODOO_2) ? 0xff : 0xfc;
4029
4030 /* build shared TMU tables */
4031 init_tmu_shared(&v->tmushare);
4032
4033 init_tmu(v, &v->tmu[0], &v->reg[0x100], 0, 4 << 20);
4034 init_tmu(v, &v->tmu[1], &v->reg[0x200], 0, 4 << 20);
4035
4036 v->tmu[0].reg = &v->reg[0x100];
4037 v->tmu[1].reg = &v->reg[0x200];
4038
4039 if (v->type < VOODOO_BANSHEE) {
4040 v->tmu[0].ram = (Bit8u*)malloc(4<<20);
4041 v->tmu[1].ram = (Bit8u*)malloc(4<<20);
4042 v->tmu[0].mask = (4<<20)-1;
4043 v->tmu[1].mask = (4<<20)-1;
4044 } else {
4045 v->tmu[0].ram = v->fbi.ram;
4046 v->tmu[1].ram = v->fbi.ram;
4047 v->tmu[0].mask = (16<<20)-1;
4048 v->tmu[1].mask = (16<<20)-1;
4049 }
4050
4051 v->tmu_config = 64;
4052
4053 v->thread_stats = new stats_block[16];
4054
4055 soft_reset(v);
4056 }
4057
4058 void update_pens(void)
4059 {
4060 int x, y;
4061
4062 /* if the CLUT is dirty, recompute the pens array */
4063 if (v->fbi.clut_dirty) {
4064 Bit8u rtable[32], gtable[64], btable[32];
4065
4066 /* Voodoo/Voodoo-2 have an internal 33-entry CLUT */
4067 if (v->type <= VOODOO_2) {
4068 /* kludge: some of the Midway games write 0 to the last entry when they obviously mean FF */
4069 if ((v->fbi.clut[32] & 0xffffff) == 0 && (v->fbi.clut[31] & 0xffffff) != 0)
4070 v->fbi.clut[32] = 0x20ffffff;
4071
4072 /* compute the R/G/B pens first */
4073 for (x = 0; x < 32; x++) {
4074 /* treat X as a 5-bit value, scale up to 8 bits, and linear interpolate for red/blue */
4075 y = (x << 3) | (x >> 2);
4076 rtable[x] = (RGB_RED(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_RED(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4077 btable[x] = (RGB_BLUE(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_BLUE(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4078
4079 /* treat X as a 6-bit value with LSB=0, scale up to 8 bits, and linear interpolate */
4080 y = (x * 2) + 0;
4081 y = (y << 2) | (y >> 4);
4082 gtable[x*2+0] = (RGB_GREEN(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_GREEN(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4083
4084 /* treat X as a 6-bit value with LSB=1, scale up to 8 bits, and linear interpolate */
4085 y = (x * 2) + 1;
4086 y = (y << 2) | (y >> 4);
4087 gtable[x*2+1] = (RGB_GREEN(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_GREEN(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4088 }
4089 }
4090
4091 /* Banshee and later have a 512-entry CLUT that can be bypassed */
4092 else
4093 {
4094 int mode3d = (v->banshee.io[io_vidProcCfg] >> 8) & 1;
4095 int which = (v->banshee.io[io_vidProcCfg] >> (12 + mode3d)) & 1;
4096 int bypass = (v->banshee.io[io_vidProcCfg] >> (10 + mode3d)) & 1;
4097
4098 /* compute R/G/B pens first */
4099 for (x = 0; x < 32; x++) {
4100 /* treat X as a 5-bit value, scale up to 8 bits */
4101 y = (x << 3) | (x >> 2);
4102 rtable[x] = bypass ? y : RGB_RED(v->fbi.clut[which * 256 + y]);
4103 btable[x] = bypass ? y : RGB_BLUE(v->fbi.clut[which * 256 + y]);
4104
4105 /* treat X as a 6-bit value with LSB=0, scale up to 8 bits */
4106 y = (x * 2) + 0;
4107 y = (y << 2) | (y >> 4);
4108 gtable[x*2+0] = bypass ? y : RGB_GREEN(v->fbi.clut[which * 256 + y]);
4109
4110 /* treat X as a 6-bit value with LSB=1, scale up to 8 bits, and linear interpolate */
4111 y = (x * 2) + 1;
4112 y = (y << 2) | (y >> 4);
4113 gtable[x*2+1] = bypass ? y : RGB_GREEN(v->fbi.clut[which * 256 + y]);
4114 }
4115 }
4116
4117 /* now compute the actual pens array */
4118 for (x = 0; x < 65536; x++) {
4119 int r = rtable[(x >> 11) & 0x1f];
4120 int g = gtable[(x >> 5) & 0x3f];
4121 int b = btable[x & 0x1f];
4122 v->fbi.pen[x] = MAKE_RGB(r, g, b);
4123 }
4124 /* no longer dirty */
4125 v->fbi.clut_dirty = 0;
4126 }
4127 }
4128