1 /////////////////////////////////////////////////////////////////////////
2 // $Id: voodoo_func.h 14297 2021-07-01 19:32:28Z vruppert $
3 /////////////////////////////////////////////////////////////////////////
4 /*
5  *  Portion of this software comes with the following license
6  */
7 
8 /***************************************************************************
9 
10     Copyright Aaron Giles
11     All rights reserved.
12 
13     Redistribution and use in source and binary forms, with or without
14     modification, are permitted provided that the following conditions are
15     met:
16 
17         * Redistributions of source code must retain the above copyright
18           notice, this list of conditions and the following disclaimer.
19         * Redistributions in binary form must reproduce the above copyright
20           notice, this list of conditions and the following disclaimer in
21           the documentation and/or other materials provided with the
22           distribution.
23         * Neither the name 'MAME' nor the names of its contributors may be
24           used to endorse or promote products derived from this software
25           without specific prior written permission.
26 
27     THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR
28     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30     DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT,
31     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36     IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37     POSSIBILITY OF SUCH DAMAGE.
38 
39 ***************************************************************************/
40 
41 Bit32u voodoo_last_msg = 255;
42 
43 
44 #define poly_wait(x,y)
45 #define cpu_eat_cycles(x,y)
46 
47 #define DEBUG_DEPTH     (0)
48 #define DEBUG_LOD     (0)
49 
50 #define LOG_VBLANK_SWAP   (0)
51 #define LOG_FIFO      (0)
52 #define LOG_FIFO_VERBOSE  (0)
53 #define LOG_REGISTERS   (0)
54 #define LOG_WAITS     (0)
55 #define LOG_LFB       (0)
56 #define LOG_TEXTURE_RAM   (0)
57 #define LOG_RASTERIZERS   (0)
58 #define LOG_CMDFIFO     (0)
59 #define LOG_CMDFIFO_VERBOSE (0)
60 
61 #define MODIFY_PIXEL(VV)
62 
63 /* fifo thread variable */
64 BX_THREAD_VAR(fifo_thread_var);
65 /* CMDFIFO thread mutex (Voodoo2) */
66 BX_MUTEX(cmdfifo_mutex);
67 /* render mutex (Banshee) */
68 BX_MUTEX(render_mutex);
69 /* FIFO event stuff */
70 BX_MUTEX(fifo_mutex);
71 bx_thread_sem_t fifo_wakeup;
72 bx_thread_sem_t fifo_not_full;
73 static bx_thread_sem_t vertical_sem;
74 
75 /* fast dither lookup */
76 static Bit8u dither4_lookup[256*16*2];
77 static Bit8u dither2_lookup[256*16*2];
78 
79 /* fast reciprocal+log2 lookup */
80 Bit32u voodoo_reciplog[(2 << RECIPLOG_LOOKUP_BITS) + 2];
81 
82 
raster_function(int tmus,void * destbase,Bit32s y,const poly_extent * extent,const void * extradata,int threadid)83 void raster_function(int tmus, void *destbase, Bit32s y, const poly_extent *extent, const void *extradata, int threadid) {
84 	const poly_extra_data *extra = (const poly_extra_data *) extradata;
85 	voodoo_state *v = extra->state;
86 	stats_block *stats = &v->thread_stats[threadid];
87 	DECLARE_DITHER_POINTERS;
88 	Bit32s startx = extent->startx;
89 	Bit32s stopx = extent->stopx;
90 	Bit32s iterr, iterg, iterb, itera;
91 	Bit32s iterz;
92 	Bit64s iterw, iterw0 = 0, iterw1 = 0;
93 	Bit64s iters0 = 0, iters1 = 0;
94 	Bit64s itert0 = 0, itert1 = 0;
95 	Bit16u *depth;
96 	Bit16u *dest;
97 	Bit32s dx, dy;
98 	Bit32s scry;
99 	Bit32s x;
100 
101 	Bit32u fbzcolorpath= v->reg[fbzColorPath].u;
102 	Bit32u fbzmode= v->reg[fbzMode].u;
103 	Bit32u alphamode= v->reg[alphaMode].u;
104 	Bit32u fogmode= v->reg[fogMode].u;
105 	Bit32u texmode0= (tmus==0? 0 : v->tmu[0].reg[textureMode].u);
106 	Bit32u texmode1= (tmus<=1? 0 : v->tmu[1].reg[textureMode].u);
107 
108 	/* determine the screen Y */
109 	scry = y;
110 	if (FBZMODE_Y_ORIGIN(fbzmode))
111 		scry = (v->fbi.yorigin - y) & 0x3ff;
112 
113 	/* compute dithering */
114 	COMPUTE_DITHER_POINTERS(fbzmode, y);
115 
116 	/* apply clipping */
117 	if (FBZMODE_ENABLE_CLIPPING(fbzmode)) {
118 		Bit32s tempclip;
119 
120 		/* Y clipping buys us the whole scanline */
121 		if (scry < (Bit32s) ((v->reg[clipLowYHighY].u >> 16) & 0x3ff)
122 				|| scry >= (Bit32s) (v->reg[clipLowYHighY].u & 0x3ff)) {
123 			stats->pixels_in += stopx - startx;
124 			stats->clip_fail += stopx - startx;
125 			return;
126 		}
127 
128 		/* X clipping */
129 		tempclip = (v->reg[clipLeftRight].u >> 16) & 0x3ff;
130 		if (startx < tempclip) {
131 			stats->pixels_in += tempclip - startx;
132 			startx = tempclip;
133 		}
134 		tempclip = v->reg[clipLeftRight].u & 0x3ff;
135 		if (stopx >= tempclip) {
136 			stats->pixels_in += stopx - tempclip;
137 			stopx = tempclip - 1;
138 		}
139 	}
140 
141 	/* get pointers to the target buffer and depth buffer */
142 	dest = (Bit16u *) destbase + scry * v->fbi.rowpixels;
143 	depth =
144 			(v->fbi.auxoffs != (Bit32u) ~0) ?
145 					((Bit16u *) (v->fbi.ram + v->fbi.auxoffs)
146 							+ scry * v->fbi.rowpixels) :
147 					NULL;
148 
149 	/* compute the starting parameters */
150 	dx = startx - (extra->ax >> 4);
151 	dy = y - (extra->ay >> 4);
152 	iterr = extra->startr + dy * extra->drdy + dx * extra->drdx;
153 	iterg = extra->startg + dy * extra->dgdy + dx * extra->dgdx;
154 	iterb = extra->startb + dy * extra->dbdy + dx * extra->dbdx;
155 	itera = extra->starta + dy * extra->dady + dx * extra->dadx;
156 	iterz = extra->startz + dy * extra->dzdy + dx * extra->dzdx;
157 	iterw = extra->startw + dy * extra->dwdy + dx * extra->dwdx;
158 	if (tmus >= 1) {
159 		iterw0 = extra->startw0 + dy * extra->dw0dy + dx * extra->dw0dx;
160 		iters0 = extra->starts0 + dy * extra->ds0dy + dx * extra->ds0dx;
161 		itert0 = extra->startt0 + dy * extra->dt0dy + dx * extra->dt0dx;
162 	}
163 	if (tmus >= 2) {
164 		iterw1 = extra->startw1 + dy * extra->dw1dy + dx * extra->dw1dx;
165 		iters1 = extra->starts1 + dy * extra->ds1dy + dx * extra->ds1dx;
166 		itert1 = extra->startt1 + dy * extra->dt1dy + dx * extra->dt1dx;
167 	}
168 
169 	/* loop in X */
170 	for (x = startx; x < stopx; x++) {
171 		rgb_union iterargb = { 0 };
172 		rgb_union texel = { 0 };
173 
174 		/* pixel pipeline part 1 handles depth testing and stippling */
175 		PIXEL_PIPELINE_BEGIN(v, stats, x, y, fbzcolorpath, fbzmode,
176 				iterz, iterw)
177 			;
178 
179 			/* run the texture pipeline on TMU1 to produce a value in texel */
180 			/* note that they set LOD min to 8 to "disable" a TMU */
181 			if (tmus >= 2 && v->tmu[1].lodmin < (8 << 8))
182 				TEXTURE_PIPELINE(&v->tmu[1], x, dither4, texmode1, texel,
183 						v->tmu[1].lookup, extra->lodbase1, iters1, itert1,
184 						iterw1, texel);
185 
186 			/* run the texture pipeline on TMU0 to produce a final */
187 			/* result in texel */
188 			/* note that they set LOD min to 8 to "disable" a TMU */
189 			if (tmus >= 1 && v->tmu[0].lodmin < (8 << 8)) {
190 				if (v->send_config == 0)
191 					TEXTURE_PIPELINE(&v->tmu[0], x, dither4, texmode0, texel,
192 							v->tmu[0].lookup, extra->lodbase0, iters0, itert0,
193 							iterw0, texel);
194 				/* send config data to the frame buffer */
195 				else
196 					texel.u = v->tmu_config;
197 			}
198 			/* colorpath pipeline selects source colors and does blending */
199 			CLAMPED_ARGB(iterr, iterg, iterb, itera, fbzcolorpath, iterargb);
200 			COLORPATH_PIPELINE(v, stats, fbzcolorpath, fbzmode, alphamode,
201 					texel, iterz, iterw, iterargb);
202 
203 			/* pixel pipeline part 2 handles fog, alpha, and final output */
204 			PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x,
205 					dest, depth, fbzmode, fbzcolorpath, alphamode, fogmode,
206 					iterz, iterw, iterargb);
207 
208 		/* update the iterated parameters */
209 		iterr += extra->drdx;
210 		iterg += extra->dgdx;
211 		iterb += extra->dbdx;
212 		itera += extra->dadx;
213 		iterz += extra->dzdx;
214 		iterw += extra->dwdx;
215 		if (tmus >= 1) {
216 			iterw0 += extra->dw0dx;
217 			iters0 += extra->ds0dx;
218 			itert0 += extra->dt0dx;
219 		}
220 		if (tmus >= 2) {
221 			iterw1 += extra->dw1dx;
222 			iters1 += extra->ds1dx;
223 			itert1 += extra->dt1dx;
224 		}
225 	}
226 }
227 
228 /*************************************
229  *
230  *  NCC table management
231  *
232  *************************************/
233 
ncc_table_write(ncc_table * n,offs_t regnum,Bit32u data)234 void ncc_table_write(ncc_table *n, offs_t regnum, Bit32u data)
235 {
236   /* I/Q entries reference the plaette if the high bit is set */
237   if (regnum >= 4 && (data & 0x80000000) && n->palette)
238   {
239     int index = ((data >> 23) & 0xfe) | (regnum & 1);
240 
241     /* set the ARGB for this palette index */
242     n->palette[index] = 0xff000000 | data;
243 
244     /* if we have an ARGB palette as well, compute its value */
245     if (n->palettea)
246     {
247       int a = ((data >> 16) & 0xfc) | ((data >> 22) & 0x03);
248       int r = ((data >> 10) & 0xfc) | ((data >> 16) & 0x03);
249       int g = ((data >>  4) & 0xfc) | ((data >> 10) & 0x03);
250       int b = ((data <<  2) & 0xfc) | ((data >>  4) & 0x03);
251       n->palettea[index] = MAKE_ARGB(a, r, g, b);
252     }
253 
254     /* this doesn't dirty the table or go to the registers, so bail */
255     return;
256   }
257 
258   /* if the register matches, don't update */
259   if (data == n->reg[regnum].u)
260     return;
261   n->reg[regnum].u = data;
262 
263   /* first four entries are packed Y values */
264   if (regnum < 4)
265   {
266     regnum *= 4;
267     n->y[regnum+0] = (data >>  0) & 0xff;
268     n->y[regnum+1] = (data >>  8) & 0xff;
269     n->y[regnum+2] = (data >> 16) & 0xff;
270     n->y[regnum+3] = (data >> 24) & 0xff;
271   }
272 
273   /* the second four entries are the I RGB values */
274   else if (regnum < 8)
275   {
276     regnum &= 3;
277     n->ir[regnum] = (Bit32s)(data <<  5) >> 23;
278     n->ig[regnum] = (Bit32s)(data << 14) >> 23;
279     n->ib[regnum] = (Bit32s)(data << 23) >> 23;
280   }
281 
282   /* the final four entries are the Q RGB values */
283   else
284   {
285     regnum &= 3;
286     n->qr[regnum] = (Bit32s)(data <<  5) >> 23;
287     n->qg[regnum] = (Bit32s)(data << 14) >> 23;
288     n->qb[regnum] = (Bit32s)(data << 23) >> 23;
289   }
290 
291   /* mark the table dirty */
292   n->dirty = 1;
293 }
294 
295 
ncc_table_update(ncc_table * n)296 void ncc_table_update(ncc_table *n)
297 {
298   int r, g, b, i;
299 
300   /* generte all 256 possibilities */
301   for (i = 0; i < 256; i++)
302   {
303     int vi = (i >> 2) & 0x03;
304     int vq = (i >> 0) & 0x03;
305 
306     /* start with the intensity */
307     r = g = b = n->y[(i >> 4) & 0x0f];
308 
309     /* add the coloring */
310     r += n->ir[vi] + n->qr[vq];
311     g += n->ig[vi] + n->qg[vq];
312     b += n->ib[vi] + n->qb[vq];
313 
314     /* clamp */
315     CLAMP(r, 0, 255);
316     CLAMP(g, 0, 255);
317     CLAMP(b, 0, 255);
318 
319     /* fill in the table */
320     n->texel[i] = MAKE_ARGB(0xff, r, g, b);
321   }
322 
323   /* no longer dirty */
324   n->dirty = 0;
325 }
326 
recompute_texture_params(tmu_state * t)327 void recompute_texture_params(tmu_state *t)
328 {
329   int bppscale;
330   Bit32u base;
331   int lod;
332   static Bit32u count = 0;
333 
334   /* Unimplemented switch */
335   if (TEXLOD_LOD_ZEROFRAC(t->reg[tLOD].u)) {
336     if (count < 50) BX_ERROR(("TEXLOD_LOD_ZEROFRAC not implemented yet"));
337     count++;
338   }
339   /* Banshee: unimplemented switches */
340   if (TEXLOD_TMIRROR_S(t->reg[tLOD].u)) {
341     BX_ERROR(("TEXLOD_TMIRROR_S not implemented yet"));
342   }
343   if (TEXLOD_TMIRROR_T(t->reg[tLOD].u)) {
344     BX_ERROR(("TEXLOD_TMIRROR_T not implemented yet"));
345   }
346   /* extract LOD parameters */
347   t->lodmin = TEXLOD_LODMIN(t->reg[tLOD].u) << 6;
348   t->lodmax = TEXLOD_LODMAX(t->reg[tLOD].u) << 6;
349   t->lodbias = (Bit8s)(TEXLOD_LODBIAS(t->reg[tLOD].u) << 2) << 4;
350 
351   /* determine which LODs are present */
352   t->lodmask = 0x1ff;
353   if (TEXLOD_LOD_TSPLIT(t->reg[tLOD].u))
354   {
355     if (!TEXLOD_LOD_ODD(t->reg[tLOD].u))
356       t->lodmask = 0x155;
357     else
358       t->lodmask = 0x0aa;
359   }
360 
361   /* determine base texture width/height */
362   t->wmask = t->hmask = 0xff;
363   if (TEXLOD_LOD_S_IS_WIDER(t->reg[tLOD].u))
364     t->hmask >>= TEXLOD_LOD_ASPECT(t->reg[tLOD].u);
365   else
366     t->wmask >>= TEXLOD_LOD_ASPECT(t->reg[tLOD].u);
367 
368   /* determine the bpp of the texture */
369   bppscale = TEXMODE_FORMAT(t->reg[textureMode].u) >> 3;
370 
371   /* start with the base of LOD 0 */
372   if (t->texaddr_shift == 0 && (t->reg[texBaseAddr].u & 1))
373     BX_DEBUG(("Tiled texture"));
374   base = (t->reg[texBaseAddr].u & t->texaddr_mask) << t->texaddr_shift;
375   t->lodoffset[0] = base & t->mask;
376 
377   /* LODs 1-3 are different depending on whether we are in multitex mode */
378   /* Several Voodoo 2 games leave the upper bits of TLOD == 0xff, meaning we think */
379   /* they want multitex mode when they really don't -- disable for now */
380   if (TEXLOD_TMULTIBASEADDR(t->reg[tLOD].u)) {
381     BX_ERROR(("TEXLOD_TMULTIBASEADDR disabled for now"));
382   }
383   if (0)//TEXLOD_TMULTIBASEADDR(t->reg[tLOD].u))
384   {
385     base = (t->reg[texBaseAddr_1].u & t->texaddr_mask) << t->texaddr_shift;
386     t->lodoffset[1] = base & t->mask;
387     base = (t->reg[texBaseAddr_2].u & t->texaddr_mask) << t->texaddr_shift;
388     t->lodoffset[2] = base & t->mask;
389     base = (t->reg[texBaseAddr_3_8].u & t->texaddr_mask) << t->texaddr_shift;
390     t->lodoffset[3] = base & t->mask;
391   }
392   else
393   {
394     if (t->lodmask & (1 << 0))
395       base += (((t->wmask >> 0) + 1) * ((t->hmask >> 0) + 1)) << bppscale;
396     t->lodoffset[1] = base & t->mask;
397     if (t->lodmask & (1 << 1))
398       base += (((t->wmask >> 1) + 1) * ((t->hmask >> 1) + 1)) << bppscale;
399     t->lodoffset[2] = base & t->mask;
400     if (t->lodmask & (1 << 2))
401       base += (((t->wmask >> 2) + 1) * ((t->hmask >> 2) + 1)) << bppscale;
402     t->lodoffset[3] = base & t->mask;
403   }
404 
405   /* remaining LODs make sense */
406   for (lod = 4; lod <= 8; lod++)
407   {
408     if (t->lodmask & (1 << (lod - 1)))
409     {
410       Bit32u size = ((t->wmask >> (lod - 1)) + 1) * ((t->hmask >> (lod - 1)) + 1);
411       if (size < 4) size = 4;
412       base += size << bppscale;
413     }
414     t->lodoffset[lod] = base & t->mask;
415   }
416 
417   /* set the NCC lookup appropriately */
418   t->texel[1] = t->texel[9] = t->ncc[TEXMODE_NCC_TABLE_SELECT(t->reg[textureMode].u)].texel;
419 
420   /* pick the lookup table */
421   t->lookup = t->texel[TEXMODE_FORMAT(t->reg[textureMode].u)];
422 
423   /* compute the detail parameters */
424   t->detailmax = TEXDETAIL_DETAIL_MAX(t->reg[tDetail].u);
425   t->detailbias = (Bit8s)(TEXDETAIL_DETAIL_BIAS(t->reg[tDetail].u) << 2) << 6;
426   t->detailscale = TEXDETAIL_DETAIL_SCALE(t->reg[tDetail].u);
427 
428   /* no longer dirty */
429   t->regdirty = 0;
430 
431   /* check for separate RGBA filtering */
432   if (TEXDETAIL_SEPARATE_RGBA_FILTER(t->reg[tDetail].u))
433     BX_PANIC(("Separate RGBA filters!"));
434 }
435 
prepare_tmu(tmu_state * t)436 BX_CPP_INLINE Bit32s prepare_tmu(tmu_state *t)
437 {
438   Bit64s texdx, texdy;
439   Bit32s lodbase;
440 
441   /* if the texture parameters are dirty, update them */
442   if (t->regdirty) {
443     recompute_texture_params(t);
444 
445     /* ensure that the NCC tables are up to date */
446     if ((TEXMODE_FORMAT(t->reg[textureMode].u) & 7) == 1)
447     {
448       ncc_table *n = &t->ncc[TEXMODE_NCC_TABLE_SELECT(t->reg[textureMode].u)];
449       t->texel[1] = t->texel[9] = n->texel;
450       if (n->dirty)
451         ncc_table_update(n);
452     }
453   }
454 
455   /* compute (ds^2 + dt^2) in both X and Y as 28.36 numbers */
456   texdx = (Bit64s)(t->dsdx >> 14) * (Bit64s)(t->dsdx >> 14) + (Bit64s)(t->dtdx >> 14) * (Bit64s)(t->dtdx >> 14);
457   texdy = (Bit64s)(t->dsdy >> 14) * (Bit64s)(t->dsdy >> 14) + (Bit64s)(t->dtdy >> 14) * (Bit64s)(t->dtdy >> 14);
458 
459   /* pick whichever is larger and shift off some high bits -> 28.20 */
460   if (texdx < texdy)
461     texdx = texdy;
462   texdx >>= 16;
463 
464   /* use our fast reciprocal/log on this value; it expects input as a */
465   /* 16.32 number, and returns the log of the reciprocal, so we have to */
466   /* adjust the result: negative to get the log of the original value */
467   /* plus 12 to account for the extra exponent, and divided by 2 to */
468   /* get the log of the square root of texdx */
469   (void)fast_reciplog(texdx, &lodbase);
470   return (-lodbase + (12 << 8)) / 2;
471 }
472 
473 
round_coordinate(float value)474 BX_CPP_INLINE Bit32s round_coordinate(float value)
475 {
476   Bit32s result = (Bit32s)floor(value);
477   return result + (value - (float)result > 0.5f);
478 }
479 
poly_render_triangle(void * dest,const rectangle * cliprect,int texcount,int paramcount,const poly_vertex * v1,const poly_vertex * v2,const poly_vertex * v3,poly_extra_data * extra)480 Bit32u poly_render_triangle(void *dest, const rectangle *cliprect, int texcount, int paramcount, const poly_vertex *v1, const poly_vertex *v2, const poly_vertex *v3, poly_extra_data *extra)
481 {
482   float dxdy_v1v2, dxdy_v1v3, dxdy_v2v3;
483   const poly_vertex *tv;
484   Bit32s curscan, scaninc=1;
485 
486   Bit32s v1yclip, v3yclip;
487   Bit32s v1y, v3y;
488   Bit32s pixels = 0;
489 
490   /* first sort by Y */
491   if (v2->y < v1->y)
492   {
493     tv = v1;
494     v1 = v2;
495     v2 = tv;
496   }
497   if (v3->y < v2->y)
498   {
499     tv = v2;
500     v2 = v3;
501     v3 = tv;
502     if (v2->y < v1->y)
503     {
504       tv = v1;
505       v1 = v2;
506       v2 = tv;
507     }
508   }
509 
510   /* compute some integral X/Y vertex values */
511   v1y = round_coordinate(v1->y);
512   v3y = round_coordinate(v3->y);
513 
514   /* clip coordinates */
515   v1yclip = v1y;
516   v3yclip = v3y;
517   if (cliprect != NULL)
518   {
519     v1yclip = MAX(v1yclip, cliprect->min_y);
520     v3yclip = MIN(v3yclip, cliprect->max_y + 1);
521   }
522   if (v3yclip - v1yclip <= 0)
523     return 0;
524 
525   /* compute the slopes for each portion of the triangle */
526   dxdy_v1v2 = (v2->y == v1->y) ? 0.0f : (v2->x - v1->x) / (v2->y - v1->y);
527   dxdy_v1v3 = (v3->y == v1->y) ? 0.0f : (v3->x - v1->x) / (v3->y - v1->y);
528   dxdy_v2v3 = (v3->y == v2->y) ? 0.0f : (v3->x - v2->x) / (v3->y - v2->y);
529 
530   /* compute the X extents for each scanline */
531   poly_extent extent;
532   int extnum=0;
533   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
534   {
535     {
536       float fully = (float)(curscan + extnum) + 0.5f;
537       float startx = v1->x + (fully - v1->y) * dxdy_v1v3;
538       float stopx;
539       Bit32s istartx, istopx;
540 
541       /* compute the ending X based on which part of the triangle we're in */
542       if (fully < v2->y)
543         stopx = v1->x + (fully - v1->y) * dxdy_v1v2;
544       else
545         stopx = v2->x + (fully - v2->y) * dxdy_v2v3;
546 
547       /* clamp to full pixels */
548       istartx = round_coordinate(startx);
549       istopx = round_coordinate(stopx);
550 
551       /* force start < stop */
552       if (istartx > istopx)
553       {
554         Bit32s temp = istartx;
555         istartx = istopx;
556         istopx = temp;
557       }
558 
559       /* apply left/right clipping */
560       if (cliprect != NULL)
561       {
562         if (istartx < cliprect->min_x)
563           istartx = cliprect->min_x;
564         if (istopx > cliprect->max_x)
565           istopx = cliprect->max_x + 1;
566       }
567 
568       /* set the extent and update the total pixel count */
569       if (istartx >= istopx)
570         istartx = istopx = 0;
571       extent.startx = istartx;
572       extent.stopx = istopx;
573       raster_function(texcount,dest,curscan,&extent,extra,0);
574 
575       pixels += istopx - istartx;
576     }
577   }
578 
579   return pixels;
580 }
581 
triangle_create_work_item(Bit16u * drawbuf,int texcount)582 Bit32s triangle_create_work_item(Bit16u *drawbuf, int texcount)
583 {
584   poly_extra_data extra;
585   poly_vertex vert[3];
586   Bit32u retval;
587 
588   /* fill in the vertex data */
589   vert[0].x = (float)v->fbi.ax * (1.0f / 16.0f);
590   vert[0].y = (float)v->fbi.ay * (1.0f / 16.0f);
591   vert[1].x = (float)v->fbi.bx * (1.0f / 16.0f);
592   vert[1].y = (float)v->fbi.by * (1.0f / 16.0f);
593   vert[2].x = (float)v->fbi.cx * (1.0f / 16.0f);
594   vert[2].y = (float)v->fbi.cy * (1.0f / 16.0f);
595 
596   /* fill in the extra data */
597   extra.state = v;
598 
599   /* fill in triangle parameters */
600   extra.ax = v->fbi.ax;
601   extra.ay = v->fbi.ay;
602   extra.startr = v->fbi.startr;
603   extra.startg = v->fbi.startg;
604   extra.startb = v->fbi.startb;
605   extra.starta = v->fbi.starta;
606   extra.startz = v->fbi.startz;
607   extra.startw = v->fbi.startw;
608   extra.drdx = v->fbi.drdx;
609   extra.dgdx = v->fbi.dgdx;
610   extra.dbdx = v->fbi.dbdx;
611   extra.dadx = v->fbi.dadx;
612   extra.dzdx = v->fbi.dzdx;
613   extra.dwdx = v->fbi.dwdx;
614   extra.drdy = v->fbi.drdy;
615   extra.dgdy = v->fbi.dgdy;
616   extra.dbdy = v->fbi.dbdy;
617   extra.dady = v->fbi.dady;
618   extra.dzdy = v->fbi.dzdy;
619   extra.dwdy = v->fbi.dwdy;
620 
621   /* fill in texture 0 parameters */
622   if (texcount > 0)
623   {
624     extra.starts0 = v->tmu[0].starts;
625     extra.startt0 = v->tmu[0].startt;
626     extra.startw0 = v->tmu[0].startw;
627     extra.ds0dx = v->tmu[0].dsdx;
628     extra.dt0dx = v->tmu[0].dtdx;
629     extra.dw0dx = v->tmu[0].dwdx;
630     extra.ds0dy = v->tmu[0].dsdy;
631     extra.dt0dy = v->tmu[0].dtdy;
632     extra.dw0dy = v->tmu[0].dwdy;
633     extra.lodbase0 = prepare_tmu(&v->tmu[0]);
634 
635     /* fill in texture 1 parameters */
636     if (texcount > 1)
637     {
638       extra.starts1 = v->tmu[1].starts;
639       extra.startt1 = v->tmu[1].startt;
640       extra.startw1 = v->tmu[1].startw;
641       extra.ds1dx = v->tmu[1].dsdx;
642       extra.dt1dx = v->tmu[1].dtdx;
643       extra.dw1dx = v->tmu[1].dwdx;
644       extra.ds1dy = v->tmu[1].dsdy;
645       extra.dt1dy = v->tmu[1].dtdy;
646       extra.dw1dy = v->tmu[1].dwdy;
647       extra.lodbase1 = prepare_tmu(&v->tmu[1]);
648     }
649   }
650 
651   /* farm the rasterization out to other threads */
652   retval = poly_render_triangle(drawbuf, NULL, texcount, 0, &vert[0], &vert[1], &vert[2], &extra);
653 
654   return retval;
655 }
656 
657 
triangle()658 Bit32s triangle()
659 {
660   int texcount = 0;
661   Bit16u *drawbuf;
662   int destbuf;
663   int pixels;
664 
665   /* determine the number of TMUs involved */
666   texcount = 0;
667   if (!FBIINIT3_DISABLE_TMUS(v->reg[fbiInit3].u) && FBZCP_TEXTURE_ENABLE(v->reg[fbzColorPath].u))
668   {
669     texcount = 1;
670     if (v->chipmask & 0x04)
671       texcount = 2;
672   }
673 
674   /* perform subpixel adjustments */
675   if (FBZCP_CCA_SUBPIXEL_ADJUST(v->reg[fbzColorPath].u))
676   {
677     Bit32s dx = 8 - (v->fbi.ax & 15);
678     Bit32s dy = 8 - (v->fbi.ay & 15);
679 
680     /* adjust iterated R,G,B,A and W/Z */
681     v->fbi.startr += (dy * v->fbi.drdy + dx * v->fbi.drdx) >> 4;
682     v->fbi.startg += (dy * v->fbi.dgdy + dx * v->fbi.dgdx) >> 4;
683     v->fbi.startb += (dy * v->fbi.dbdy + dx * v->fbi.dbdx) >> 4;
684     v->fbi.starta += (dy * v->fbi.dady + dx * v->fbi.dadx) >> 4;
685     v->fbi.startw += (dy * v->fbi.dwdy + dx * v->fbi.dwdx) >> 4;
686     v->fbi.startz += mul_32x32_shift(dy, v->fbi.dzdy, 4) + mul_32x32_shift(dx, v->fbi.dzdx, 4);
687 
688     /* adjust iterated W/S/T for TMU 0 */
689     if (texcount >= 1)
690     {
691       v->tmu[0].startw += (dy * v->tmu[0].dwdy + dx * v->tmu[0].dwdx) >> 4;
692       v->tmu[0].starts += (dy * v->tmu[0].dsdy + dx * v->tmu[0].dsdx) >> 4;
693       v->tmu[0].startt += (dy * v->tmu[0].dtdy + dx * v->tmu[0].dtdx) >> 4;
694 
695       /* adjust iterated W/S/T for TMU 1 */
696       if (texcount >= 2)
697       {
698         v->tmu[1].startw += (dy * v->tmu[1].dwdy + dx * v->tmu[1].dwdx) >> 4;
699         v->tmu[1].starts += (dy * v->tmu[1].dsdy + dx * v->tmu[1].dsdx) >> 4;
700         v->tmu[1].startt += (dy * v->tmu[1].dtdy + dx * v->tmu[1].dtdx) >> 4;
701       }
702     }
703   }
704 
705   /* determine the draw buffer */
706   destbuf = (v->type >= VOODOO_BANSHEE) ? 1 : FBZMODE_DRAW_BUFFER(v->reg[fbzMode].u);
707   switch (destbuf)
708   {
709     case 0:   /* front buffer */
710       drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
711       v->fbi.video_changed = 1;
712       break;
713 
714     case 1:   /* back buffer */
715       drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
716       break;
717 
718     default:  /* reserved */
719       return TRIANGLE_SETUP_CLOCKS;
720   }
721 
722   /* find a rasterizer that matches our current state */
723   pixels = triangle_create_work_item(/*v, */drawbuf, texcount);
724 
725   /* update stats */
726   v->reg[fbiTrianglesOut].u++;
727 
728   /* 1 pixel per clock, plus some setup time */
729   if (LOG_REGISTERS) BX_DEBUG(("cycles = %d", TRIANGLE_SETUP_CLOCKS + pixels));
730   return TRIANGLE_SETUP_CLOCKS + pixels;
731 }
732 
733 
setup_and_draw_triangle()734 static Bit32s setup_and_draw_triangle()
735 {
736   float dx1, dy1, dx2, dy2;
737   float divisor, tdiv;
738 
739   /* grab the X/Ys at least */
740   v->fbi.ax = (Bit16s)(v->fbi.svert[0].x * 16.0);
741   v->fbi.ay = (Bit16s)(v->fbi.svert[0].y * 16.0);
742   v->fbi.bx = (Bit16s)(v->fbi.svert[1].x * 16.0);
743   v->fbi.by = (Bit16s)(v->fbi.svert[1].y * 16.0);
744   v->fbi.cx = (Bit16s)(v->fbi.svert[2].x * 16.0);
745   v->fbi.cy = (Bit16s)(v->fbi.svert[2].y * 16.0);
746 
747   /* compute the divisor */
748   divisor = 1.0f / ((v->fbi.svert[0].x - v->fbi.svert[1].x) * (v->fbi.svert[0].y - v->fbi.svert[2].y) -
749             (v->fbi.svert[0].x - v->fbi.svert[2].x) * (v->fbi.svert[0].y - v->fbi.svert[1].y));
750 
751   /* backface culling */
752   if (v->reg[sSetupMode].u & 0x20000)
753   {
754     int culling_sign = (v->reg[sSetupMode].u >> 18) & 1;
755     int divisor_sign = (divisor < 0);
756 
757     /* if doing strips and ping pong is enabled, apply the ping pong */
758     if ((v->reg[sSetupMode].u & 0x90000) == 0x00000)
759       culling_sign ^= (v->fbi.sverts - 3) & 1;
760 
761     /* if our sign matches the culling sign, we're done for */
762     if (divisor_sign == culling_sign)
763       return TRIANGLE_SETUP_CLOCKS;
764   }
765 
766   /* compute the dx/dy values */
767   dx1 = v->fbi.svert[0].y - v->fbi.svert[2].y;
768   dx2 = v->fbi.svert[0].y - v->fbi.svert[1].y;
769   dy1 = v->fbi.svert[0].x - v->fbi.svert[1].x;
770   dy2 = v->fbi.svert[0].x - v->fbi.svert[2].x;
771 
772   /* set up R,G,B */
773   tdiv = divisor * 4096.0f;
774   if (v->reg[sSetupMode].u & (1 << 0))
775   {
776     v->fbi.startr = (Bit32s)(v->fbi.svert[0].r * 4096.0f);
777     v->fbi.drdx = (Bit32s)(((v->fbi.svert[0].r - v->fbi.svert[1].r) * dx1 - (v->fbi.svert[0].r - v->fbi.svert[2].r) * dx2) * tdiv);
778     v->fbi.drdy = (Bit32s)(((v->fbi.svert[0].r - v->fbi.svert[2].r) * dy1 - (v->fbi.svert[0].r - v->fbi.svert[1].r) * dy2) * tdiv);
779     v->fbi.startg = (Bit32s)(v->fbi.svert[0].g * 4096.0f);
780     v->fbi.dgdx = (Bit32s)(((v->fbi.svert[0].g - v->fbi.svert[1].g) * dx1 - (v->fbi.svert[0].g - v->fbi.svert[2].g) * dx2) * tdiv);
781     v->fbi.dgdy = (Bit32s)(((v->fbi.svert[0].g - v->fbi.svert[2].g) * dy1 - (v->fbi.svert[0].g - v->fbi.svert[1].g) * dy2) * tdiv);
782     v->fbi.startb = (Bit32s)(v->fbi.svert[0].b * 4096.0f);
783     v->fbi.dbdx = (Bit32s)(((v->fbi.svert[0].b - v->fbi.svert[1].b) * dx1 - (v->fbi.svert[0].b - v->fbi.svert[2].b) * dx2) * tdiv);
784     v->fbi.dbdy = (Bit32s)(((v->fbi.svert[0].b - v->fbi.svert[2].b) * dy1 - (v->fbi.svert[0].b - v->fbi.svert[1].b) * dy2) * tdiv);
785   }
786 
787   /* set up alpha */
788   if (v->reg[sSetupMode].u & (1 << 1))
789   {
790     v->fbi.starta = (Bit32s)(v->fbi.svert[0].a * 4096.0);
791     v->fbi.dadx = (Bit32s)(((v->fbi.svert[0].a - v->fbi.svert[1].a) * dx1 - (v->fbi.svert[0].a - v->fbi.svert[2].a) * dx2) * tdiv);
792     v->fbi.dady = (Bit32s)(((v->fbi.svert[0].a - v->fbi.svert[2].a) * dy1 - (v->fbi.svert[0].a - v->fbi.svert[1].a) * dy2) * tdiv);
793   }
794 
795   /* set up Z */
796   if (v->reg[sSetupMode].u & (1 << 2))
797   {
798     v->fbi.startz = (Bit32s)(v->fbi.svert[0].z * 4096.0);
799     v->fbi.dzdx = (Bit32s)(((v->fbi.svert[0].z - v->fbi.svert[1].z) * dx1 - (v->fbi.svert[0].z - v->fbi.svert[2].z) * dx2) * tdiv);
800     v->fbi.dzdy = (Bit32s)(((v->fbi.svert[0].z - v->fbi.svert[2].z) * dy1 - (v->fbi.svert[0].z - v->fbi.svert[1].z) * dy2) * tdiv);
801   }
802 
803   /* set up Wb */
804   tdiv = divisor * 65536.0f * 65536.0f;
805   if (v->reg[sSetupMode].u & (1 << 3))
806   {
807     v->fbi.startw = v->tmu[0].startw = v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].wb * 65536.0f * 65536.0f);
808     v->fbi.dwdx = v->tmu[0].dwdx = v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].wb - v->fbi.svert[1].wb) * dx1 - (v->fbi.svert[0].wb - v->fbi.svert[2].wb) * dx2) * tdiv);
809     v->fbi.dwdy = v->tmu[0].dwdy = v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].wb - v->fbi.svert[2].wb) * dy1 - (v->fbi.svert[0].wb - v->fbi.svert[1].wb) * dy2) * tdiv);
810   }
811 
812   /* set up W0 */
813   if (v->reg[sSetupMode].u & (1 << 4))
814   {
815     v->tmu[0].startw = v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].w0 * 65536.0f * 65536.0f);
816     v->tmu[0].dwdx = v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].w0 - v->fbi.svert[1].w0) * dx1 - (v->fbi.svert[0].w0 - v->fbi.svert[2].w0) * dx2) * tdiv);
817     v->tmu[0].dwdy = v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].w0 - v->fbi.svert[2].w0) * dy1 - (v->fbi.svert[0].w0 - v->fbi.svert[1].w0) * dy2) * tdiv);
818   }
819 
820   /* set up S0,T0 */
821   if (v->reg[sSetupMode].u & (1 << 5))
822   {
823     v->tmu[0].starts = v->tmu[1].starts = (Bit64s)(v->fbi.svert[0].s0 * 65536.0f * 65536.0f);
824     v->tmu[0].dsdx = v->tmu[1].dsdx = (Bit64s)(((v->fbi.svert[0].s0 - v->fbi.svert[1].s0) * dx1 - (v->fbi.svert[0].s0 - v->fbi.svert[2].s0) * dx2) * tdiv);
825     v->tmu[0].dsdy = v->tmu[1].dsdy = (Bit64s)(((v->fbi.svert[0].s0 - v->fbi.svert[2].s0) * dy1 - (v->fbi.svert[0].s0 - v->fbi.svert[1].s0) * dy2) * tdiv);
826     v->tmu[0].startt = v->tmu[1].startt = (Bit64s)(v->fbi.svert[0].t0 * 65536.0f * 65536.0f);
827     v->tmu[0].dtdx = v->tmu[1].dtdx = (Bit64s)(((v->fbi.svert[0].t0 - v->fbi.svert[1].t0) * dx1 - (v->fbi.svert[0].t0 - v->fbi.svert[2].t0) * dx2) * tdiv);
828     v->tmu[0].dtdy = v->tmu[1].dtdy = (Bit64s)(((v->fbi.svert[0].t0 - v->fbi.svert[2].t0) * dy1 - (v->fbi.svert[0].t0 - v->fbi.svert[1].t0) * dy2) * tdiv);
829   }
830 
831   /* set up W1 */
832   if (v->reg[sSetupMode].u & (1 << 6))
833   {
834     v->tmu[1].startw = (Bit64s)(v->fbi.svert[0].w1 * 65536.0f * 65536.0f);
835     v->tmu[1].dwdx = (Bit64s)(((v->fbi.svert[0].w1 - v->fbi.svert[1].w1) * dx1 - (v->fbi.svert[0].w1 - v->fbi.svert[2].w1) * dx2) * tdiv);
836     v->tmu[1].dwdy = (Bit64s)(((v->fbi.svert[0].w1 - v->fbi.svert[2].w1) * dy1 - (v->fbi.svert[0].w1 - v->fbi.svert[1].w1) * dy2) * tdiv);
837   }
838 
839   /* set up S1,T1 */
840   if (v->reg[sSetupMode].u & (1 << 7))
841   {
842     v->tmu[1].starts = (Bit64s)(v->fbi.svert[0].s1 * 65536.0f * 65536.0f);
843     v->tmu[1].dsdx = (Bit64s)(((v->fbi.svert[0].s1 - v->fbi.svert[1].s1) * dx1 - (v->fbi.svert[0].s1 - v->fbi.svert[2].s1) * dx2) * tdiv);
844     v->tmu[1].dsdy = (Bit64s)(((v->fbi.svert[0].s1 - v->fbi.svert[2].s1) * dy1 - (v->fbi.svert[0].s1 - v->fbi.svert[1].s1) * dy2) * tdiv);
845     v->tmu[1].startt = (Bit64s)(v->fbi.svert[0].t1 * 65536.0f * 65536.0f);
846     v->tmu[1].dtdx = (Bit64s)(((v->fbi.svert[0].t1 - v->fbi.svert[1].t1) * dx1 - (v->fbi.svert[0].t1 - v->fbi.svert[2].t1) * dx2) * tdiv);
847     v->tmu[1].dtdy = (Bit64s)(((v->fbi.svert[0].t1 - v->fbi.svert[2].t1) * dy1 - (v->fbi.svert[0].t1 - v->fbi.svert[1].t1) * dy2) * tdiv);
848   }
849 
850   /* draw the triangle */
851   v->fbi.cheating_allowed = 1;
852   return triangle();
853 }
854 
855 
begin_triangle()856 static Bit32s begin_triangle()
857 {
858   setup_vertex *sv = &v->fbi.svert[2];
859 
860   /* extract all the data from registers */
861   sv->x = v->reg[sVx].f;
862   sv->y = v->reg[sVy].f;
863   sv->wb = v->reg[sWb].f;
864   sv->w0 = v->reg[sWtmu0].f;
865   sv->s0 = v->reg[sS_W0].f;
866   sv->t0 = v->reg[sT_W0].f;
867   sv->w1 = v->reg[sWtmu1].f;
868   sv->s1 = v->reg[sS_Wtmu1].f;
869   sv->t1 = v->reg[sT_Wtmu1].f;
870   sv->a = v->reg[sAlpha].f;
871   sv->r = v->reg[sRed].f;
872   sv->g = v->reg[sGreen].f;
873   sv->b = v->reg[sBlue].f;
874 
875   /* spread it across all three verts and reset the count */
876   v->fbi.svert[0] = v->fbi.svert[1] = v->fbi.svert[2];
877   v->fbi.sverts = 1;
878 
879   return 0;
880 }
881 
882 
draw_triangle()883 static Bit32s draw_triangle()
884 {
885   setup_vertex *sv = &v->fbi.svert[2];
886   int cycles = 0;
887 
888   /* for strip mode, shuffle vertex 1 down to 0 */
889   if (!(v->reg[sSetupMode].u & (1 << 16)))
890     v->fbi.svert[0] = v->fbi.svert[1];
891 
892   /* copy 2 down to 1 regardless */
893   v->fbi.svert[1] = v->fbi.svert[2];
894 
895   /* extract all the data from registers */
896   sv->x = v->reg[sVx].f;
897   sv->y = v->reg[sVy].f;
898   sv->wb = v->reg[sWb].f;
899   sv->w0 = v->reg[sWtmu0].f;
900   sv->s0 = v->reg[sS_W0].f;
901   sv->t0 = v->reg[sT_W0].f;
902   sv->w1 = v->reg[sWtmu1].f;
903   sv->s1 = v->reg[sS_Wtmu1].f;
904   sv->t1 = v->reg[sT_Wtmu1].f;
905   sv->a = v->reg[sAlpha].f;
906   sv->r = v->reg[sRed].f;
907   sv->g = v->reg[sGreen].f;
908   sv->b = v->reg[sBlue].f;
909 
910   /* if we have enough verts, go ahead and draw */
911   if (++v->fbi.sverts >= 3)
912     cycles = setup_and_draw_triangle();
913 
914   return cycles;
915 }
916 
917 
raster_fastfill(void * destbase,Bit32s y,const poly_extent * extent,const void * extradata,int threadid)918 static void raster_fastfill(void *destbase, Bit32s y, const poly_extent *extent, const void *extradata, int threadid)
919 {
920   const poly_extra_data *extra = (const poly_extra_data *)extradata;
921   voodoo_state *v = extra->state;
922   stats_block *stats = &v->thread_stats[threadid];
923   Bit32s startx = extent->startx;
924   Bit32s stopx = extent->stopx;
925   int scry, x;
926 
927   /* determine the screen Y */
928   scry = y;
929   if (FBZMODE_Y_ORIGIN(v->reg[fbzMode].u))
930     scry = (v->fbi.yorigin - y) & 0x3ff;
931 
932   /* fill this RGB row */
933   if (FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u))
934   {
935     const Bit16u *ditherow = &extra->dither[(y & 3) * 4];
936     Bit64u expanded = *(Bit64u *)ditherow;
937     Bit16u *dest = (Bit16u *)destbase + scry * v->fbi.rowpixels;
938 
939     for (x = startx; x < stopx && (x & 3) != 0; x++)
940       dest[x] = ditherow[x & 3];
941     for ( ; x < (stopx & ~3); x += 4)
942       *(Bit64u *)&dest[x] = expanded;
943     for ( ; x < stopx; x++)
944       dest[x] = ditherow[x & 3];
945     stats->pixels_out += stopx - startx;
946   }
947 
948   /* fill this dest buffer row */
949   if (FBZMODE_AUX_BUFFER_MASK(v->reg[fbzMode].u) && v->fbi.auxoffs != (Bit32u)~0)
950   {
951     Bit16u color = v->reg[zaColor].u;
952     Bit64u expanded = ((Bit64u)color << 48) | ((Bit64u)color << 32) | (color << 16) | color;
953     Bit16u *dest = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs) + scry * v->fbi.rowpixels;
954 
955     for (x = startx; x < stopx && (x & 3) != 0; x++)
956       dest[x] = color;
957     for ( ; x < (stopx & ~3); x += 4)
958       *(Bit64u *)&dest[x] = expanded;
959     for ( ; x < stopx; x++)
960       dest[x] = color;
961   }
962 }
963 
964 
poly_render_triangle_custom(void * dest,const rectangle * cliprect,int startscanline,int numscanlines,const poly_extent * extents,poly_extra_data * extra)965 Bit32u poly_render_triangle_custom(void *dest, const rectangle *cliprect, int startscanline, int numscanlines, const poly_extent *extents, poly_extra_data *extra)
966 {
967   Bit32s curscan, scaninc;
968   Bit32s v1yclip, v3yclip;
969   Bit32s pixels = 0;
970 
971   /* clip coordinates */
972   if (cliprect != NULL)
973   {
974     v1yclip = MAX(startscanline, cliprect->min_y);
975     v3yclip = MIN(startscanline + numscanlines, cliprect->max_y + 1);
976   }
977   else
978   {
979     v1yclip = startscanline;
980     v3yclip = startscanline + numscanlines;
981   }
982   if (v3yclip - v1yclip <= 0)
983     return 0;
984 
985   /* compute the X extents for each scanline */
986   for (curscan = v1yclip; curscan < v3yclip; curscan += scaninc)
987   {
988     int extnum=0;
989 
990     /* determine how much to advance to hit the next bucket */
991     scaninc = 1;
992 
993     /* iterate over extents */
994     {
995       const poly_extent *extent = &extents[(curscan + extnum) - startscanline];
996       Bit32s istartx = extent->startx, istopx = extent->stopx;
997 
998       /* force start < stop */
999       if (istartx > istopx)
1000       {
1001         Bit32s temp = istartx;
1002         istartx = istopx;
1003         istopx = temp;
1004       }
1005 
1006       /* apply left/right clipping */
1007       if (cliprect != NULL)
1008       {
1009         if (istartx < cliprect->min_x)
1010           istartx = cliprect->min_x;
1011         if (istopx > cliprect->max_x)
1012           istopx = cliprect->max_x + 1;
1013       }
1014 
1015       /* set the extent and update the total pixel count */
1016       raster_fastfill(dest,curscan,extent,extra,0);
1017       if (istartx < istopx)
1018         pixels += istopx - istartx;
1019     }
1020   }
1021 #if KEEP_STATISTICS
1022   poly->unit_max = MAX(poly->unit_max, poly->unit_next);
1023 #endif
1024 
1025   return pixels;
1026 }
1027 
fastfill(voodoo_state * v)1028 Bit32s fastfill(voodoo_state *v)
1029 {
1030   int sx = (v->reg[clipLeftRight].u >> 16) & 0x3ff;
1031   int ex = (v->reg[clipLeftRight].u >> 0) & 0x3ff;
1032   int sy = (v->reg[clipLowYHighY].u >> 16) & 0x3ff;
1033   int ey = (v->reg[clipLowYHighY].u >> 0) & 0x3ff;
1034   poly_extent extents[64];
1035   Bit16u dithermatrix[16];
1036   Bit16u *drawbuf = NULL;
1037   Bit32u pixels = 0;
1038   int extnum, x, y;
1039 
1040   /* if we're not clearing either, take no time */
1041   if (!FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u) && !FBZMODE_AUX_BUFFER_MASK(v->reg[fbzMode].u))
1042     return 0;
1043 
1044   /* are we clearing the RGB buffer? */
1045   if (FBZMODE_RGB_BUFFER_MASK(v->reg[fbzMode].u))
1046   {
1047     /* determine the draw buffer */
1048     int destbuf = (v->type >= VOODOO_BANSHEE) ? 1 : FBZMODE_DRAW_BUFFER(v->reg[fbzMode].u);
1049     switch (destbuf)
1050     {
1051       case 0:   /* front buffer */
1052         drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
1053         break;
1054 
1055       case 1:   /* back buffer */
1056         drawbuf = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
1057         break;
1058 
1059       default:  /* reserved */
1060         break;
1061     }
1062 
1063     /* determine the dither pattern */
1064     for (y = 0; y < 4; y++)
1065     {
1066       DECLARE_DITHER_POINTERS;
1067       UNUSED(dither);
1068       COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
1069       for (x = 0; x < 4; x++)
1070       {
1071         int r = v->reg[color1].rgb.r;
1072         int g = v->reg[color1].rgb.g;
1073         int b = v->reg[color1].rgb.b;
1074 
1075         APPLY_DITHER(v->reg[fbzMode].u, x, dither_lookup, r, g, b);
1076         dithermatrix[y*4 + x] = (r << 11) | (g << 5) | b;
1077       }
1078     }
1079   }
1080 
1081   /* fill in a block of extents */
1082   extents[0].startx = sx;
1083   extents[0].stopx = ex;
1084   for (extnum = 1; extnum < (int)ARRAY_LENGTH(extents); extnum++)
1085     extents[extnum] = extents[0];
1086 
1087   poly_extra_data extra;
1088   /* iterate over blocks of extents */
1089   for (y = sy; y < ey; y += ARRAY_LENGTH(extents))
1090   {
1091     int count = MIN(ey - y, (int) ARRAY_LENGTH(extents));
1092 
1093     extra.state = v;
1094     memcpy(extra.dither, dithermatrix, sizeof(extra.dither));
1095 
1096     pixels += poly_render_triangle_custom(drawbuf, NULL, y, count, extents, &extra);
1097   }
1098 
1099   /* 2 pixels per clock */
1100   return pixels / 2;
1101 }
1102 
swap_buffers(voodoo_state * v)1103 void swap_buffers(voodoo_state *v)
1104 {
1105   int count;
1106 
1107   /* force a partial update */
1108   v->fbi.video_changed = 1;
1109 
1110   /* keep a history of swap intervals */
1111   count = v->fbi.vblank_count;
1112   if (count > 15)
1113     count = 15;
1114   v->reg[fbiSwapHistory].u = (v->reg[fbiSwapHistory].u << 4) | count;
1115 
1116   /* rotate the buffers */
1117   if (v->type <= VOODOO_2)
1118   {
1119     if (v->type < VOODOO_2 || !v->fbi.vblank_dont_swap)
1120     {
1121       if (v->fbi.rgboffs[2] == (Bit32u)~0)
1122       {
1123         v->fbi.frontbuf = 1 - v->fbi.frontbuf;
1124         v->fbi.backbuf = 1 - v->fbi.frontbuf;
1125       }
1126       else
1127       {
1128         v->fbi.frontbuf = (v->fbi.frontbuf + 1) % 3;
1129         v->fbi.backbuf = (v->fbi.frontbuf + 1) % 3;
1130       }
1131     }
1132   }
1133   else
1134     v->fbi.rgboffs[0] = v->reg[leftOverlayBuf].u & v->fbi.mask & ~0x0f;
1135 
1136   /* decrement the pending count and reset our state */
1137   if (v->fbi.swaps_pending)
1138     v->fbi.swaps_pending--;
1139   v->fbi.vblank_count = 0;
1140   v->fbi.vblank_swap_pending = 0;
1141 }
1142 
1143 /*-------------------------------------------------
1144     swapbuffer - execute the 'swapbuffer'
1145     command
1146 -------------------------------------------------*/
swapbuffer(voodoo_state * v,Bit32u data)1147 Bit32s swapbuffer(voodoo_state *v, Bit32u data)
1148 {
1149   /* set the don't swap value for Voodoo 2 */
1150   v->fbi.vblank_swap_pending = 1;
1151   v->fbi.vblank_swap = (data >> 1) & 0xff;
1152   v->fbi.vblank_dont_swap = (data >> 9) & 1;
1153 
1154   /* if we're not syncing to the retrace, process the command immediately */
1155   if (!(data & 1))
1156   {
1157     BX_LOCK(fifo_mutex);
1158     swap_buffers(v);
1159     BX_UNLOCK(fifo_mutex);
1160     return 0;
1161   } else {
1162     if (v->vtimer_running) {
1163       bx_wait_sem(&vertical_sem);
1164     }
1165   }
1166 
1167   /* determine how many cycles to wait; we deliberately overshoot here because */
1168   /* the final count gets updated on the VBLANK */
1169   return (v->fbi.vblank_swap + 1) * v->freq / 30;
1170 }
1171 
1172 
1173 /*************************************
1174  *
1175  *  Statistics management
1176  *
1177  *************************************/
1178 
accumulate_statistics(voodoo_state * v,const stats_block * stats)1179 static void accumulate_statistics(voodoo_state *v, const stats_block *stats)
1180 {
1181   /* apply internal voodoo statistics */
1182   v->reg[fbiPixelsIn].u += stats->pixels_in;
1183   v->reg[fbiPixelsOut].u += stats->pixels_out;
1184   v->reg[fbiChromaFail].u += stats->chroma_fail;
1185   v->reg[fbiZfuncFail].u += stats->zfunc_fail;
1186   v->reg[fbiAfuncFail].u += stats->afunc_fail;
1187 }
1188 
update_statistics(voodoo_state * v,int accumulate)1189 static void update_statistics(voodoo_state *v, int accumulate)
1190 {
1191   int threadnum;
1192 
1193   /* accumulate/reset statistics from all units */
1194   for (threadnum = 0; threadnum < WORK_MAX_THREADS; threadnum++)
1195   {
1196     if (accumulate)
1197       accumulate_statistics(v, &v->thread_stats[threadnum]);
1198     memset(&v->thread_stats[threadnum], 0, sizeof(v->thread_stats[threadnum]));
1199   }
1200 
1201   /* accumulate/reset statistics from the LFB */
1202   if (accumulate)
1203     accumulate_statistics(v, &v->fbi.lfb_stats);
1204   memset(&v->fbi.lfb_stats, 0, sizeof(v->fbi.lfb_stats));
1205 }
1206 
reset_counters(voodoo_state * v)1207 void reset_counters(voodoo_state *v)
1208 {
1209   update_statistics(v, FALSE);
1210   v->reg[fbiPixelsIn].u = 0;
1211   v->reg[fbiChromaFail].u = 0;
1212   v->reg[fbiZfuncFail].u = 0;
1213   v->reg[fbiAfuncFail].u = 0;
1214   v->reg[fbiPixelsOut].u = 0;
1215 }
1216 
1217 
soft_reset(voodoo_state * v)1218 void soft_reset(voodoo_state *v)
1219 {
1220   reset_counters(v);
1221   v->reg[fbiTrianglesOut].u = 0;
1222   fifo_reset(&v->fbi.fifo);
1223   fifo_reset(&v->pci.fifo);
1224   v->pci.op_pending = 0;
1225 }
1226 
1227 
recompute_video_memory(voodoo_state * v)1228 void recompute_video_memory(voodoo_state *v)
1229 {
1230   Bit32u buffer_pages = FBIINIT2_VIDEO_BUFFER_OFFSET(v->reg[fbiInit2].u);
1231   Bit32u fifo_start_page = FBIINIT4_MEMORY_FIFO_START_ROW(v->reg[fbiInit4].u);
1232   Bit32u fifo_last_page = FBIINIT4_MEMORY_FIFO_STOP_ROW(v->reg[fbiInit4].u);
1233   Bit32u memory_config;
1234   int buf;
1235 
1236   BX_DEBUG(("buffer_pages 0x%x", buffer_pages));
1237   /* memory config is determined differently between V1 and V2 */
1238   memory_config = FBIINIT2_ENABLE_TRIPLE_BUF(v->reg[fbiInit2].u);
1239   if (v->type == VOODOO_2 && memory_config == 0)
1240     memory_config = FBIINIT5_BUFFER_ALLOCATION(v->reg[fbiInit5].u);
1241 
1242   /* tiles are 64x16/32; x_tiles specifies how many half-tiles */
1243   v->fbi.tile_width = (v->type == VOODOO_1) ? 64 : 32;
1244   v->fbi.tile_height = (v->type == VOODOO_1) ? 16 : 32;
1245   v->fbi.x_tiles = FBIINIT1_X_VIDEO_TILES(v->reg[fbiInit1].u);
1246   if (v->type == VOODOO_2)
1247   {
1248     v->fbi.x_tiles = (v->fbi.x_tiles << 1) |
1249             (FBIINIT1_X_VIDEO_TILES_BIT5(v->reg[fbiInit1].u) << 5) |
1250             (FBIINIT6_X_VIDEO_TILES_BIT0(v->reg[fbiInit6].u));
1251   }
1252   v->fbi.rowpixels = v->fbi.tile_width * v->fbi.x_tiles;
1253 
1254   /* first RGB buffer always starts at 0 */
1255   v->fbi.rgboffs[0] = 0;
1256 
1257   if (buffer_pages>0) {
1258     /* second RGB buffer starts immediately afterwards */
1259     v->fbi.rgboffs[1] = buffer_pages * 0x1000;
1260 
1261     /* remaining buffers are based on the config */
1262     switch (memory_config) {
1263       case 3: /* reserved */
1264         BX_ERROR(("Unexpected memory configuration in recompute_video_memory!"));
1265         break;
1266 
1267       case 0: /* 2 color buffers, 1 aux buffer */
1268         v->fbi.rgboffs[2] = ~0;
1269         v->fbi.auxoffs = 2 * buffer_pages * 0x1000;
1270         break;
1271 
1272       case 1: /* 3 color buffers, 0 aux buffers */
1273         v->fbi.rgboffs[2] = 2 * buffer_pages * 0x1000;
1274         v->fbi.auxoffs = 3 * buffer_pages * 0x1000;
1275         break;
1276 
1277       case 2: /* 3 color buffers, 1 aux buffers */
1278         v->fbi.rgboffs[2] = 2 * buffer_pages * 0x1000;
1279         v->fbi.auxoffs = 3 * buffer_pages * 0x1000;
1280         break;
1281     }
1282   }
1283 
1284   /* clamp the RGB buffers to video memory */
1285   for (buf = 0; buf < 3; buf++)
1286     if (v->fbi.rgboffs[buf] != (Bit32u)~0 && v->fbi.rgboffs[buf] > v->fbi.mask)
1287       v->fbi.rgboffs[buf] = v->fbi.mask;
1288 
1289   /* clamp the aux buffer to video memory */
1290   if (v->fbi.auxoffs != (Bit32u)~0 && v->fbi.auxoffs > v->fbi.mask)
1291     v->fbi.auxoffs = v->fbi.mask;
1292 
1293   /* compute the memory FIFO location and size */
1294   if (fifo_last_page > v->fbi.mask / 0x1000)
1295     fifo_last_page = v->fbi.mask / 0x1000;
1296 
1297   /* is it valid and enabled? */
1298   if ((fifo_start_page <= fifo_last_page) && v->fbi.fifo.enabled)
1299   {
1300     v->fbi.fifo.base = (Bit32u *)(v->fbi.ram + fifo_start_page * 0x1000);
1301     v->fbi.fifo.size = (fifo_last_page + 1 - fifo_start_page) * 0x1000 / 4;
1302     if (v->fbi.fifo.size > 65536*2)
1303       v->fbi.fifo.size = 65536*2;
1304   }
1305 
1306   /* if not, disable the FIFO */
1307   else
1308   {
1309     v->fbi.fifo.base = NULL;
1310     v->fbi.fifo.size = 0;
1311   }
1312 
1313   /* reset the FIFO */
1314   fifo_reset(&v->fbi.fifo);
1315   if (fifo_empty_locked(&v->pci.fifo)) v->pci.op_pending = 0;
1316 
1317   /* reset our front/back buffers if they are out of range */
1318   if (v->fbi.rgboffs[2] == (Bit32u)~0)
1319   {
1320     if (v->fbi.frontbuf == 2)
1321       v->fbi.frontbuf = 0;
1322     if (v->fbi.backbuf == 2)
1323       v->fbi.backbuf = 0;
1324   }
1325 }
1326 
1327 
voodoo2_bitblt_mux(Bit8u rop,Bit8u * dst_ptr,Bit8u * src_ptr,int dpxsize)1328 void voodoo2_bitblt_mux(Bit8u rop, Bit8u *dst_ptr, Bit8u *src_ptr, int dpxsize)
1329 {
1330   Bit8u mask, inbits, outbits;
1331 
1332   for (int i = 0; i < dpxsize; i++) {
1333     mask = 0x80;
1334     outbits = 0;
1335     for (int b = 7; b >= 0; b--) {
1336       inbits = (*dst_ptr & mask) > 0;
1337       inbits |= ((*src_ptr & mask) > 0) << 1;
1338       outbits |= ((rop & (1 << inbits)) > 0) << b;
1339       mask >>= 1;
1340     }
1341     *dst_ptr++ = outbits;
1342     src_ptr++;
1343   }
1344 }
1345 
1346 #define BLT v->blt
1347 
clip_check(Bit16u x,Bit16u y)1348 bool clip_check(Bit16u x, Bit16u y)
1349 {
1350   if (!BLT.clip_en)
1351     return 1;
1352   if ((x >= BLT.clipx0) && (x < BLT.clipx1) &&
1353       (y >= BLT.clipy0) && (y < BLT.clipy1)) {
1354     return 1;
1355   }
1356   return 0;
1357 }
1358 
1359 
chroma_check(Bit8u * ptr,Bit16u min,Bit16u max,bool dst)1360 Bit8u chroma_check(Bit8u *ptr, Bit16u min, Bit16u max, bool dst)
1361 {
1362   Bit8u pass = 0;
1363   Bit32u color;
1364   Bit8u r, g, b, rmin, rmax, gmin, gmax, bmin, bmax;
1365 
1366   color = *ptr;
1367   color |= *(ptr + 1) << 8;
1368   r = (color >> 11);
1369   g = (color >> 5) & 0x3f;
1370   b = color & 0x1f;
1371   rmin = (min >> 11) & 0x1f;
1372   rmax = (max >> 11) & 0x1f;
1373   gmin = (min >> 5) & 0x3f;
1374   gmax = (max >> 5) & 0x3f;
1375   bmin = min & 0x1f;
1376   bmax = max & 0x1f;
1377   pass = ((r >= rmin) && (r <= rmax) && (g >= gmin) && (g <= gmax) &&
1378           (b >= bmin) && (b <= bmax));
1379   if (!dst) pass <<= 1;
1380   return pass;
1381 }
1382 
voodoo2_bitblt(void)1383 void voodoo2_bitblt(void)
1384 {
1385   Bit8u cmd, rop = 0, *dst_ptr, *src_ptr;
1386   Bit16u c, cols, src_x, src_y, r, rows, size, x;
1387   Bit32u src_base, doffset, soffset, dstride, sstride;
1388   bool src_tiled, dst_tiled, x_dir, y_dir;
1389   int tmpval;
1390 
1391   cmd = (Bit8u)(v->reg[bltCommand].u & 0x07);
1392   BLT.src_fmt = (Bit8u)((v->reg[bltCommand].u >> 3) & 0x1f);
1393   BLT.src_swizzle = (Bit8u)((v->reg[bltCommand].u >> 8) & 0x03);
1394   BLT.chroma_en = (Bit8u)((v->reg[bltCommand].u >> 10) & 0x01);
1395   BLT.chroma_en |= (Bit8u)((v->reg[bltCommand].u >> 11) & 0x02);
1396   src_tiled = ((v->reg[bltCommand].u >> 14) & 0x01);
1397   dst_tiled = ((v->reg[bltCommand].u >> 15) & 0x01);
1398   BLT.clip_en = ((v->reg[bltCommand].u >> 16) & 0x01);
1399   BLT.transp = ((v->reg[bltCommand].u >> 17) & 0x01);
1400   BLT.dst_w = (v->reg[bltSize].u & 0x7ff) + 1;
1401   x_dir = (v->reg[bltSize].u >> 11) & 1;
1402   tmpval = (v->reg[bltSize].u & 0xfff);
1403   if (x_dir && ((cmd == 0) || (cmd == 2))) {
1404     tmpval |= 0xfffff000;
1405   }
1406   BLT.dst_w = abs(tmpval) + 1;
1407   y_dir = (v->reg[bltSize].u >> 27) & 1;
1408   tmpval = ((v->reg[bltSize].u >> 16) & 0xfff);
1409   if (y_dir && ((cmd == 0) || (cmd == 2))) {
1410     tmpval |= 0xfffff000;
1411   }
1412   BLT.dst_h = abs(tmpval) + 1;
1413   BLT.dst_x = (Bit16u)(v->reg[bltDstXY].u & 0x7ff);
1414   BLT.dst_y = (Bit16u)((v->reg[bltDstXY].u >> 16) & 0x7ff);
1415   if (src_tiled) {
1416     src_base = (v->reg[bltSrcBaseAddr].u & 0x3ff) << 12;
1417     sstride = (v->reg[bltXYStrides].u & 0x3f) << 6;
1418   } else {
1419     src_base = v->reg[bltSrcBaseAddr].u & 0x3ffff8;
1420     sstride = v->reg[bltXYStrides].u & 0xff8;
1421   }
1422   if (dst_tiled) {
1423     BLT.dst_base = (v->reg[bltDstBaseAddr].u & 0x3ff) << 12;
1424     BLT.dst_pitch = (v->reg[bltXYStrides].u >> 10) & 0xfc0;
1425   } else {
1426     BLT.dst_base = v->reg[bltDstBaseAddr].u & 0x3ffff8;
1427     BLT.dst_pitch = (v->reg[bltXYStrides].u >> 16) & 0xff8;
1428   }
1429   BLT.h2s_mode = 0;
1430   switch (cmd) {
1431     case 0:
1432       BX_DEBUG(("Screen-to-Screen bitBLT: w = %d, h = %d, rop0 = %d",
1433                 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1434       src_x = (Bit16u)(v->reg[bltSrcXY].u & 0x7ff);
1435       src_y = (Bit16u)((v->reg[bltSrcXY].u >> 16) & 0x7ff);
1436       cols = BLT.dst_w;
1437       rows = BLT.dst_h;
1438       dstride = BLT.dst_pitch;
1439       doffset = BLT.dst_base + BLT.dst_y * dstride + BLT.dst_x * 2;
1440       soffset = src_base + src_y * sstride + src_x * 2;
1441       for (r = 0; r <= rows; r++) {
1442         dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1443         src_ptr = &v->fbi.ram[soffset & v->fbi.mask];
1444         x = BLT.dst_x;
1445         for (c = 0; c < cols; c++) {
1446           if (clip_check(x, BLT.dst_y)) {
1447             if (BLT.chroma_en & 1) {
1448               rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1449             }
1450             if (BLT.chroma_en & 2) {
1451               rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1452             }
1453             voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1454           }
1455           if (x_dir) {
1456             dst_ptr -= 2;
1457             src_ptr -= 2;
1458             x--;
1459           } else {
1460             dst_ptr += 2;
1461             src_ptr += 2;
1462             x++;
1463           }
1464         }
1465         if (y_dir) {
1466           doffset -= dstride;
1467           soffset -= sstride;
1468           BLT.dst_y--;
1469         } else {
1470           doffset += dstride;
1471           soffset += sstride;
1472           BLT.dst_y++;
1473         }
1474       }
1475       break;
1476     case 1:
1477       BX_DEBUG(("CPU-to-Screen bitBLT: w = %d, h = %d, rop0 = %d",
1478                 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1479       BLT.h2s_mode = 1;
1480       BLT.cur_x = BLT.dst_x;
1481       break;
1482     case 2:
1483       BX_DEBUG(("Rectangle fill: w = %d, h = %d, rop0 = %d",
1484                 BLT.dst_w, BLT.dst_h, BLT.rop[0]));
1485       cols = BLT.dst_w;
1486       rows = BLT.dst_h;
1487       dstride = BLT.dst_pitch;
1488       doffset = BLT.dst_base + BLT.dst_y * dstride + BLT.dst_x * 2;
1489       src_ptr = BLT.fgcolor;
1490       for (r = 0; r <= rows; r++) {
1491         dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1492         x = BLT.dst_x;
1493         for (c = 0; c < cols; c++) {
1494           if (clip_check(x, BLT.dst_y)) {
1495             if (BLT.chroma_en & 2) {
1496               rop = chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1497             }
1498             voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1499           }
1500           if (x_dir) {
1501             dst_ptr -= 2;
1502             x--;
1503           } else {
1504             dst_ptr += 2;
1505             x++;
1506           }
1507         }
1508         if (y_dir) {
1509           doffset -= dstride;
1510           BLT.dst_y--;
1511         } else {
1512           doffset += dstride;
1513           BLT.dst_y++;
1514         }
1515       }
1516       break;
1517     case 3:
1518       BLT.dst_x = (Bit16u)(v->reg[bltDstXY].u & 0x1ff);
1519       BLT.dst_y = (Bit16u)((v->reg[bltDstXY].u >> 16) & 0x3ff);
1520       cols = (Bit16u)(v->reg[bltSize].u & 0x1ff);
1521       rows = (Bit16u)((v->reg[bltSize].u >> 16) & 0x3ff);
1522       BX_DEBUG(("SGRAM fill: x = %d y = %d w = %d h = %d color = 0x%02x%02x",
1523                 BLT.dst_x, BLT.dst_y, cols, rows, BLT.fgcolor[1], BLT.fgcolor[0]));
1524       dstride = (1 << 12);
1525       doffset = BLT.dst_y * dstride;
1526       for (r = 0; r <= rows; r++) {
1527         if (r == 0) {
1528           dst_ptr = &v->fbi.ram[(doffset + BLT.dst_x * 8) & v->fbi.mask];
1529           size = dstride / 2 - (BLT.dst_x * 4);
1530         } else {
1531           dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1532           if (r == rows) {
1533             size = cols * 4;
1534           } else {
1535             size = dstride / 2;
1536           }
1537         }
1538         for (c = 0; c < size; c++) {
1539           *dst_ptr = BLT.fgcolor[0];
1540           *(dst_ptr + 1) = BLT.fgcolor[1];
1541           dst_ptr += 2;
1542         }
1543         doffset += dstride;
1544       }
1545       break;
1546     default:
1547       BX_ERROR(("Voodoo bitBLT: unknown command %d)", cmd));
1548   }
1549   v->fbi.video_changed = 1;
1550 }
1551 
voodoo2_bitblt_cpu_to_screen(Bit32u data)1552 void voodoo2_bitblt_cpu_to_screen(Bit32u data)
1553 {
1554   Bit8u rop = 0, *dst_ptr, *dst_ptr1, *src_ptr, color[2];
1555   Bit8u b, c, g, i, j, r;
1556   bool set;
1557   Bit8u colfmt = BLT.src_fmt & 7, rgbfmt = BLT.src_fmt >> 3;
1558   Bit16u count = BLT.dst_x + BLT.dst_w - BLT.cur_x;
1559   Bit32u doffset = BLT.dst_base + BLT.dst_y * BLT.dst_pitch + BLT.cur_x * 2;
1560   dst_ptr = &v->fbi.ram[doffset & v->fbi.mask];
1561 
1562   if (BLT.src_swizzle & 1) {
1563     data = bx_bswap32(data);
1564   }
1565   if (BLT.src_swizzle & 2) {
1566     data = (data >> 16) | (data << 16);
1567   }
1568   if ((colfmt == 0) || (colfmt == 1)) {
1569     if (colfmt == 0) {
1570       c = (count > 32) ? 32 : count;
1571       r = 1;
1572     } else {
1573       c = (count > 8) ? 8 : count;
1574       r = (BLT.dst_h > 4) ? 4 : BLT.dst_h;
1575     }
1576     for (j = 0; j < r; j++) {
1577       dst_ptr1 = dst_ptr;
1578       for (i = 0; i < c; i++) {
1579         b = (i & 0x18) + (7 - (i & 7));
1580         set = (data & (1U << b)) > 0;
1581         if (set) {
1582           src_ptr = BLT.fgcolor;
1583         } else {
1584           src_ptr = BLT.bgcolor;
1585         }
1586         if (set || !BLT.transp) {
1587           if (clip_check(BLT.cur_x + i, BLT.dst_y + j)) {
1588             if (BLT.chroma_en & 2) {
1589               rop = chroma_check(dst_ptr1, BLT.dst_col_min, BLT.dst_col_max, 1);
1590             }
1591             voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr1, src_ptr, 2);
1592           }
1593         }
1594         dst_ptr1 += 2;
1595       }
1596       if (colfmt == 0) {
1597         if (c < count) {
1598           BLT.cur_x += c;
1599         } else {
1600           BLT.cur_x = BLT.dst_x;
1601           if (BLT.dst_h > 1) {
1602             BLT.dst_y++;
1603             BLT.dst_h--;
1604           } else {
1605             BLT.h2s_mode = 0;
1606           }
1607         }
1608       } else {
1609         data >>= 8;
1610         dst_ptr += BLT.dst_pitch;
1611       }
1612     }
1613     if (colfmt == 1) {
1614       if (c < count) {
1615         BLT.cur_x += c;
1616       } else {
1617         BLT.cur_x = BLT.dst_x;
1618         if (BLT.dst_h > 4) {
1619           BLT.dst_y += 4;
1620           BLT.dst_h -= 4;
1621         } else {
1622           BLT.h2s_mode = 0;
1623         }
1624       }
1625     }
1626   } else if (colfmt == 2) {
1627     if (rgbfmt & 1) {
1628       BX_ERROR(("Voodoo bitBLT: color order other than RGB not supported yet"));
1629     }
1630 #if BX_BIG_ENDIAN
1631     data = bx_bswap32(data);
1632 #endif
1633     src_ptr = (Bit8u*)&data;
1634     c = (count > 2) ? 2 : count;
1635     for (i = 0; i < c; i++) {
1636       if (clip_check(BLT.cur_x, BLT.dst_y)) {
1637         if (BLT.chroma_en & 1) {
1638           rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1639         }
1640         if (BLT.chroma_en & 2) {
1641           rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1642         }
1643         voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1644       }
1645       dst_ptr += 2;
1646       src_ptr += 2;
1647       BLT.cur_x++;
1648       if (--count == 0) {
1649         BLT.cur_x = BLT.dst_x;
1650         BLT.dst_y++;
1651         if (--BLT.dst_h == 0) {
1652           BLT.h2s_mode = 0;
1653         }
1654       }
1655     }
1656   } else if ((colfmt >= 3) && (colfmt <= 5)) {
1657     if (colfmt > 3) {
1658       BX_ERROR(("Voodoo bitBLT: 24 bpp source dithering not supported yet"));
1659       colfmt = 3;
1660     }
1661     switch (rgbfmt) {
1662       case 1:
1663         r = (Bit8u)((data >> 3) & 0x1f);
1664         g = (Bit8u)((data >> 10) & 0x3f);
1665         b = (Bit8u)((data >> 19) & 0x1f);
1666         break;
1667       case 2:
1668         r = (Bit8u)((data >> 27) & 0x1f);
1669         g = (Bit8u)((data >> 18) & 0x3f);
1670         b = (Bit8u)((data >> 11) & 0x1f);
1671         break;
1672       case 3:
1673         r = (Bit8u)((data >> 11) & 0x1f);
1674         g = (Bit8u)((data >> 18) & 0x3f);
1675         b = (Bit8u)((data >> 27) & 0x1f);
1676         break;
1677       default:
1678         r = (Bit8u)((data >> 19) & 0x1f);
1679         g = (Bit8u)((data >> 10) & 0x3f);
1680         b = (Bit8u)((data >> 3) & 0x1f);
1681     }
1682     color[0] = (Bit8u)((g << 5) | b);
1683     color[1] = (r << 3) | (g >> 3);
1684     src_ptr = color;
1685     if (clip_check(BLT.cur_x, BLT.dst_y)) {
1686       if (BLT.chroma_en & 1) {
1687         rop = chroma_check(src_ptr, BLT.src_col_min, BLT.src_col_max, 0);
1688       }
1689       if (BLT.chroma_en & 2) {
1690         rop |= chroma_check(dst_ptr, BLT.dst_col_min, BLT.dst_col_max, 1);
1691       }
1692       voodoo2_bitblt_mux(BLT.rop[rop], dst_ptr, src_ptr, 2);
1693     }
1694     BLT.cur_x++;
1695     if (--count == 0) {
1696       BLT.cur_x = BLT.dst_x;
1697       BLT.dst_y++;
1698       if (--BLT.dst_h == 0) {
1699         BLT.h2s_mode = 0;
1700       }
1701     }
1702   } else {
1703     BX_ERROR(("CPU-to-Screen bitBLT: unknown color format 0x%02x", colfmt));
1704   }
1705   v->fbi.video_changed = 1;
1706 }
1707 
1708 
dacdata_w(dac_state * d,Bit8u regnum,Bit8u data)1709 void dacdata_w(dac_state *d, Bit8u regnum, Bit8u data)
1710 {
1711   d->reg[regnum] = data;
1712 
1713   /* switch off the DAC register requested */
1714   switch (regnum) {
1715     case 4: // PLLWMA
1716     case 7: // PLLRMA
1717       if (data == 0x0e) {
1718         d->data_size = 1;
1719       } else {
1720         d->data_size = 2;
1721       }
1722       break;
1723     case 5: // PLLDATA
1724       switch (d->reg[4]) { // PLLWMA
1725         case 0x00:
1726           if (d->data_size == 2) {
1727             d->clk0_m = data;
1728           } else if (d->data_size == 1) {
1729             d->clk0_n = data & 0x1f;
1730             d->clk0_p = data >> 5;
1731           }
1732           break;
1733         case 0x0e:
1734           if ((d->data_size == 1) && (data == 0xf8)) {
1735             v->vidclk = 14318184.0f * ((float)(d->clk0_m + 2) / (float)(d->clk0_n + 2)) / (float)(1 << d->clk0_p);
1736             Bit8u dacr6 = d->reg[6] & 0xf0;
1737             if ((dacr6 == 0x20) || (dacr6 == 0x60) || (dacr6 == 0x70)) {
1738               v->vidclk /= 2.0f;
1739             }
1740             Voodoo_update_timing();
1741           }
1742           break;
1743       }
1744       d->data_size--;
1745       break;
1746   }
1747 }
1748 
1749 
dacdata_r(dac_state * d,Bit8u regnum)1750 void dacdata_r(dac_state *d, Bit8u regnum)
1751 {
1752   Bit8u result = 0xff;
1753 
1754   /* switch off the DAC register requested */
1755   switch (regnum) {
1756     case 5: // PLLDATA
1757       switch (d->reg[7]) { // PLLRMA
1758         case 0x00:
1759           if (d->data_size == 2) {
1760             result = d->clk0_m;
1761           } else if (d->data_size == 1) {
1762             result = d->clk0_n | (d->clk0_p << 5);
1763           }
1764           break;
1765         /* this is just to make startup happy */
1766         case 0x01:  result = 0x55; break;
1767         case 0x07:  result = 0x71; break;
1768         case 0x0b:  result = 0x79; break;
1769       }
1770       d->data_size--;
1771       break;
1772 
1773     default:
1774       result = d->reg[regnum];
1775       break;
1776   }
1777 
1778   /* remember the read result; it is fetched elsewhere */
1779   d->read_result = result;
1780 }
1781 
register_w(Bit32u offset,Bit32u data,bool log)1782 void register_w(Bit32u offset, Bit32u data, bool log)
1783 {
1784   Bit32u regnum  = (offset) & 0xff;
1785   Bit32u chips   = (offset>>8) & 0xf;
1786   Bit64s data64;
1787   static Bit32u count = 0;
1788 
1789   if (chips == 0)
1790     chips = 0xf;
1791 
1792   /* the first 64 registers can be aliased differently */
1793   if ((offset & 0x800c0) == 0x80000 && v->alt_regmap)
1794     regnum = register_alias_map[offset & 0x3f];
1795   else
1796     regnum = offset & 0xff;
1797 
1798   if (log)
1799     BX_DEBUG(("write chip 0x%x reg 0x%x value 0x%08x(%s)", chips, regnum<<2, data, v->regnames[regnum]));
1800 
1801   switch (regnum) {
1802     /* Vertex data is 12.4 formatted fixed point */
1803     case fvertexAx:
1804       data = float_to_Bit32s(data, 4);
1805     case vertexAx:
1806       if (chips & 1) v->fbi.ax = (Bit16s)data;
1807       break;
1808 
1809     case fvertexAy:
1810       data = float_to_Bit32s(data, 4);
1811     case vertexAy:
1812       if (chips & 1) v->fbi.ay = (Bit16s)data;
1813       break;
1814 
1815     case fvertexBx:
1816       data = float_to_Bit32s(data, 4);
1817     case vertexBx:
1818       if (chips & 1) v->fbi.bx = (Bit16s)data;
1819       break;
1820 
1821     case fvertexBy:
1822       data = float_to_Bit32s(data, 4);
1823     case vertexBy:
1824       if (chips & 1) v->fbi.by = (Bit16s)data;
1825       break;
1826 
1827     case fvertexCx:
1828       data = float_to_Bit32s(data, 4);
1829     case vertexCx:
1830       if (chips & 1) v->fbi.cx = (Bit16s)data;
1831       break;
1832 
1833     case fvertexCy:
1834       data = float_to_Bit32s(data, 4);
1835     case vertexCy:
1836       if (chips & 1) v->fbi.cy = (Bit16s)data;
1837       break;
1838 
1839     /* RGB data is 12.12 formatted fixed point */
1840     case fstartR:
1841       data = float_to_Bit32s(data, 12);
1842     case startR:
1843       if (chips & 1) v->fbi.startr = (Bit32s)(data << 8) >> 8;
1844       break;
1845 
1846     case fstartG:
1847       data = float_to_Bit32s(data, 12);
1848     case startG:
1849       if (chips & 1) v->fbi.startg = (Bit32s)(data << 8) >> 8;
1850       break;
1851 
1852     case fstartB:
1853       data = float_to_Bit32s(data, 12);
1854     case startB:
1855       if (chips & 1) v->fbi.startb = (Bit32s)(data << 8) >> 8;
1856       break;
1857 
1858     case fstartA:
1859       data = float_to_Bit32s(data, 12);
1860     case startA:
1861       if (chips & 1) v->fbi.starta = (Bit32s)(data << 8) >> 8;
1862       break;
1863 
1864     case fdRdX:
1865       data = float_to_Bit32s(data, 12);
1866     case dRdX:
1867       if (chips & 1) v->fbi.drdx = (Bit32s)(data << 8) >> 8;
1868       break;
1869 
1870     case fdGdX:
1871       data = float_to_Bit32s(data, 12);
1872     case dGdX:
1873       if (chips & 1) v->fbi.dgdx = (Bit32s)(data << 8) >> 8;
1874       break;
1875 
1876     case fdBdX:
1877       data = float_to_Bit32s(data, 12);
1878     case dBdX:
1879       if (chips & 1) v->fbi.dbdx = (Bit32s)(data << 8) >> 8;
1880       break;
1881 
1882     case fdAdX:
1883       data = float_to_Bit32s(data, 12);
1884     case dAdX:
1885       if (chips & 1) v->fbi.dadx = (Bit32s)(data << 8) >> 8;
1886       break;
1887 
1888     case fdRdY:
1889       data = float_to_Bit32s(data, 12);
1890     case dRdY:
1891       if (chips & 1) v->fbi.drdy = (Bit32s)(data << 8) >> 8;
1892       break;
1893 
1894     case fdGdY:
1895       data = float_to_Bit32s(data, 12);
1896     case dGdY:
1897       if (chips & 1) v->fbi.dgdy = (Bit32s)(data << 8) >> 8;
1898       break;
1899 
1900     case fdBdY:
1901       data = float_to_Bit32s(data, 12);
1902     case dBdY:
1903       if (chips & 1) v->fbi.dbdy = (Bit32s)(data << 8) >> 8;
1904       break;
1905 
1906     case fdAdY:
1907       data = float_to_Bit32s(data, 12);
1908     case dAdY:
1909       if (chips & 1) v->fbi.dady = (Bit32s)(data << 8) >> 8;
1910       break;
1911 
1912     /* Z data is 20.12 formatted fixed point */
1913     case fstartZ:
1914       data = float_to_Bit32s(data, 12);
1915     case startZ:
1916       if (chips & 1) v->fbi.startz = (Bit32s)data;
1917       break;
1918 
1919     case fdZdX:
1920       data = float_to_Bit32s(data, 12);
1921     case dZdX:
1922       if (chips & 1) v->fbi.dzdx = (Bit32s)data;
1923       break;
1924 
1925     case fdZdY:
1926       data = float_to_Bit32s(data, 12);
1927     case dZdY:
1928       if (chips & 1) v->fbi.dzdy = (Bit32s)data;
1929       break;
1930 
1931     /* S,T data is 14.18 formatted fixed point, converted to 16.32 internally */
1932     case fstartS:
1933       data64 = float_to_Bit64s(data, 32);
1934       if (chips & 2) v->tmu[0].starts = data64;
1935       if (chips & 4) v->tmu[1].starts = data64;
1936       break;
1937     case startS:
1938       if (chips & 2) v->tmu[0].starts = (Bit64s)(Bit32s)data << 14;
1939       if (chips & 4) v->tmu[1].starts = (Bit64s)(Bit32s)data << 14;
1940       break;
1941 
1942     case fstartT:
1943       data64 = float_to_Bit64s(data, 32);
1944       if (chips & 2) v->tmu[0].startt = data64;
1945       if (chips & 4) v->tmu[1].startt = data64;
1946       break;
1947     case startT:
1948       if (chips & 2) v->tmu[0].startt = (Bit64s)(Bit32s)data << 14;
1949       if (chips & 4) v->tmu[1].startt = (Bit64s)(Bit32s)data << 14;
1950       break;
1951 
1952     case fdSdX:
1953       data64 = float_to_Bit64s(data, 32);
1954       if (chips & 2) v->tmu[0].dsdx = data64;
1955       if (chips & 4) v->tmu[1].dsdx = data64;
1956       break;
1957     case dSdX:
1958       if (chips & 2) v->tmu[0].dsdx = (Bit64s)(Bit32s)data << 14;
1959       if (chips & 4) v->tmu[1].dsdx = (Bit64s)(Bit32s)data << 14;
1960       break;
1961 
1962     case fdTdX:
1963       data64 = float_to_Bit64s(data, 32);
1964       if (chips & 2) v->tmu[0].dtdx = data64;
1965       if (chips & 4) v->tmu[1].dtdx = data64;
1966       break;
1967     case dTdX:
1968       if (chips & 2) v->tmu[0].dtdx = (Bit64s)(Bit32s)data << 14;
1969       if (chips & 4) v->tmu[1].dtdx = (Bit64s)(Bit32s)data << 14;
1970       break;
1971 
1972     case fdSdY:
1973       data64 = float_to_Bit64s(data, 32);
1974       if (chips & 2) v->tmu[0].dsdy = data64;
1975       if (chips & 4) v->tmu[1].dsdy = data64;
1976       break;
1977     case dSdY:
1978       if (chips & 2) v->tmu[0].dsdy = (Bit64s)(Bit32s)data << 14;
1979       if (chips & 4) v->tmu[1].dsdy = (Bit64s)(Bit32s)data << 14;
1980       break;
1981 
1982     case fdTdY:
1983       data64 = float_to_Bit64s(data, 32);
1984       if (chips & 2) v->tmu[0].dtdy = data64;
1985       if (chips & 4) v->tmu[1].dtdy = data64;
1986       break;
1987     case dTdY:
1988       if (chips & 2) v->tmu[0].dtdy = (Bit64s)(Bit32s)data << 14;
1989       if (chips & 4) v->tmu[1].dtdy = (Bit64s)(Bit32s)data << 14;
1990       break;
1991 
1992     /* W data is 2.30 formatted fixed point, converted to 16.32 internally */
1993     case fstartW:
1994       data64 = float_to_Bit64s(data, 32);
1995       if (chips & 1) v->fbi.startw = data64;
1996       if (chips & 2) v->tmu[0].startw = data64;
1997       if (chips & 4) v->tmu[1].startw = data64;
1998       break;
1999     case startW:
2000       if (chips & 1) v->fbi.startw = (Bit64s)(Bit32s)data << 2;
2001       if (chips & 2) v->tmu[0].startw = (Bit64s)(Bit32s)data << 2;
2002       if (chips & 4) v->tmu[1].startw = (Bit64s)(Bit32s)data << 2;
2003       break;
2004 
2005     case fdWdX:
2006       data64 = float_to_Bit64s(data, 32);
2007       if (chips & 1) v->fbi.dwdx = data64;
2008       if (chips & 2) v->tmu[0].dwdx = data64;
2009       if (chips & 4) v->tmu[1].dwdx = data64;
2010       break;
2011     case dWdX:
2012       if (chips & 1) v->fbi.dwdx = (Bit64s)(Bit32s)data << 2;
2013       if (chips & 2) v->tmu[0].dwdx = (Bit64s)(Bit32s)data << 2;
2014       if (chips & 4) v->tmu[1].dwdx = (Bit64s)(Bit32s)data << 2;
2015       break;
2016 
2017     case fdWdY:
2018       data64 = float_to_Bit64s(data, 32);
2019       if (chips & 1) v->fbi.dwdy = data64;
2020       if (chips & 2) v->tmu[0].dwdy = data64;
2021       if (chips & 4) v->tmu[1].dwdy = data64;
2022       break;
2023     case dWdY:
2024       if (chips & 1) v->fbi.dwdy = (Bit64s)(Bit32s)data << 2;
2025       if (chips & 2) v->tmu[0].dwdy = (Bit64s)(Bit32s)data << 2;
2026       if (chips & 4) v->tmu[1].dwdy = (Bit64s)(Bit32s)data << 2;
2027       break;
2028     /* setup bits */
2029     case sARGB:
2030       if (chips & 1)
2031       {
2032         v->reg[sAlpha].f = (float)RGB_ALPHA(data);
2033         v->reg[sRed].f = (float)RGB_RED(data);
2034         v->reg[sGreen].f = (float)RGB_GREEN(data);
2035         v->reg[sBlue].f = (float)RGB_BLUE(data);
2036       }
2037       break;
2038 
2039     /* mask off invalid bits for different cards */
2040     case fbzColorPath:
2041       poly_wait(v->poly, v->regnames[regnum]);
2042       if (v->type < VOODOO_2)
2043         data &= 0x0fffffff;
2044       if (chips & 1) v->reg[fbzColorPath].u = data;
2045       break;
2046 
2047     case fbzMode:
2048       poly_wait(v->poly, v->regnames[regnum]);
2049       if (v->type < VOODOO_2)
2050         data &= 0x001fffff;
2051       if (chips & 1) v->reg[fbzMode].u = data;
2052       break;
2053 
2054     case fogMode:
2055       poly_wait(v->poly, v->regnames[regnum]);
2056       if (v->type < VOODOO_2)
2057         data &= 0x0000003f;
2058       if (chips & 1) v->reg[fogMode].u = data;
2059       break;
2060 
2061     /* triangle drawing */
2062     case triangleCMD:
2063       v->fbi.cheating_allowed = (v->fbi.ax != 0 || v->fbi.ay != 0 || v->fbi.bx > 50 || v->fbi.by != 0 || v->fbi.cx != 0 || v->fbi.cy > 50);
2064       v->fbi.sign = data;
2065       triangle();
2066       break;
2067 
2068     case ftriangleCMD:
2069       v->fbi.cheating_allowed = 1;
2070       v->fbi.sign = data;
2071       triangle();
2072       break;
2073 
2074     case sBeginTriCMD:
2075       begin_triangle();
2076       break;
2077 
2078     case sDrawTriCMD:
2079       draw_triangle();
2080       break;
2081 
2082     /* other commands */
2083     case nopCMD:
2084       poly_wait(v->poly, v->regnames[regnum]);
2085       if (data & 1)
2086         reset_counters(v);
2087       if (data & 2)
2088         v->reg[fbiTrianglesOut].u = 0;
2089       break;
2090 
2091     case fastfillCMD:
2092       fastfill(v);
2093       break;
2094 
2095     case swapbufferCMD:
2096       poly_wait(v->poly, v->regnames[regnum]);
2097       swapbuffer(v, data);
2098       break;
2099     /* gamma table access -- Voodoo/Voodoo2 only */
2100     case clutData:
2101       if (v->type <= VOODOO_2 && (chips & 1))
2102       {
2103         poly_wait(v->poly, v->regnames[regnum]);
2104         if (!FBIINIT1_VIDEO_TIMING_RESET(v->reg[fbiInit1].u))
2105         {
2106           int index = data >> 24;
2107           if (index <= 32)
2108           {
2109             v->fbi.clut[index] = data;
2110             v->fbi.clut_dirty = 1;
2111           }
2112         }
2113         else
2114           BX_DEBUG(("clutData ignored because video timing reset = 1"));
2115       }
2116       break;
2117     /* nccTable entries are processed and expanded immediately */
2118     case nccTable+0:
2119     case nccTable+1:
2120     case nccTable+2:
2121     case nccTable+3:
2122     case nccTable+4:
2123     case nccTable+5:
2124     case nccTable+6:
2125     case nccTable+7:
2126     case nccTable+8:
2127     case nccTable+9:
2128     case nccTable+10:
2129     case nccTable+11:
2130       poly_wait(v->poly, v->regnames[regnum]);
2131       if (chips & 2) ncc_table_write(&v->tmu[0].ncc[0], regnum - nccTable, data);
2132       if (chips & 4) ncc_table_write(&v->tmu[1].ncc[0], regnum - nccTable, data);
2133       break;
2134 
2135     case nccTable+12:
2136     case nccTable+13:
2137     case nccTable+14:
2138     case nccTable+15:
2139     case nccTable+16:
2140     case nccTable+17:
2141     case nccTable+18:
2142     case nccTable+19:
2143     case nccTable+20:
2144     case nccTable+21:
2145     case nccTable+22:
2146     case nccTable+23:
2147       poly_wait(v->poly, v->regnames[regnum]);
2148       if (chips & 2) ncc_table_write(&v->tmu[0].ncc[1], regnum - (nccTable+12), data);
2149       if (chips & 4) ncc_table_write(&v->tmu[1].ncc[1], regnum - (nccTable+12), data);
2150       break;
2151 
2152     /* fogTable entries are processed and expanded immediately */
2153     case fogTable+0:
2154     case fogTable+1:
2155     case fogTable+2:
2156     case fogTable+3:
2157     case fogTable+4:
2158     case fogTable+5:
2159     case fogTable+6:
2160     case fogTable+7:
2161     case fogTable+8:
2162     case fogTable+9:
2163     case fogTable+10:
2164     case fogTable+11:
2165     case fogTable+12:
2166     case fogTable+13:
2167     case fogTable+14:
2168     case fogTable+15:
2169     case fogTable+16:
2170     case fogTable+17:
2171     case fogTable+18:
2172     case fogTable+19:
2173     case fogTable+20:
2174     case fogTable+21:
2175     case fogTable+22:
2176     case fogTable+23:
2177     case fogTable+24:
2178     case fogTable+25:
2179     case fogTable+26:
2180     case fogTable+27:
2181     case fogTable+28:
2182     case fogTable+29:
2183     case fogTable+30:
2184     case fogTable+31:
2185       poly_wait(v->poly, v->regnames[regnum]);
2186       if (chips & 1)
2187       {
2188         int base = 2 * (regnum - fogTable);
2189         v->fbi.fogdelta[base + 0] = (data >> 0) & 0xff;
2190         v->fbi.fogblend[base + 0] = (data >> 8) & 0xff;
2191         v->fbi.fogdelta[base + 1] = (data >> 16) & 0xff;
2192         v->fbi.fogblend[base + 1] = (data >> 24) & 0xff;
2193       }
2194       break;
2195 
2196     /* texture modifications cause us to recompute everything */
2197     case textureMode:
2198       if (((chips & 6) > 0) && TEXMODE_TRILINEAR(data)) {
2199         if (count < 50) BX_INFO(("Trilinear textures not implemented yet"));
2200         count++;
2201       }
2202     case tLOD:
2203     case tDetail:
2204     case texBaseAddr:
2205     case texBaseAddr_1:
2206     case texBaseAddr_2:
2207     case texBaseAddr_3_8:
2208       poly_wait(v->poly, v->regnames[regnum]);
2209       if (chips & 2)
2210       {
2211         v->tmu[0].reg[regnum].u = data;
2212         v->tmu[0].regdirty = 1;
2213       }
2214       if (chips & 4)
2215       {
2216         v->tmu[1].reg[regnum].u = data;
2217         v->tmu[1].regdirty = 1;
2218       }
2219       break;
2220 
2221     case trexInit1:
2222       /* send tmu config data to the frame buffer */
2223       v->send_config = TREXINIT_SEND_TMU_CONFIG(data);
2224       goto default_case;
2225       break;
2226 
2227     case userIntrCMD:
2228       BX_ERROR(("Writing to register %s not supported yet", v->regnames[regnum]));
2229       v->reg[regnum].u = data;
2230       break;
2231 
2232     case bltData:
2233       v->reg[regnum].u = data;
2234       if (BLT.h2s_mode) {
2235         voodoo2_bitblt_cpu_to_screen(data);
2236       } else {
2237         BX_ERROR(("Write to register %s ignored", v->regnames[regnum]));
2238       }
2239       break;
2240 
2241     case bltSrcChromaRange:
2242       v->reg[regnum].u = data;
2243       BLT.src_col_min = (Bit16u)data;
2244       BLT.src_col_max = (Bit16u)(data >> 16);
2245       break;
2246 
2247     case bltDstChromaRange:
2248       v->reg[regnum].u = data;
2249       BLT.dst_col_min = (Bit16u)data;
2250       BLT.dst_col_max = (Bit16u)(data >> 16);
2251       break;
2252 
2253     case bltClipX:
2254       v->reg[regnum].u = data;
2255       BLT.clipx0 = (Bit16u)(data >> 16);
2256       BLT.clipx1 = (Bit16u)(data & 0x0fff);
2257       break;
2258 
2259     case bltClipY:
2260       v->reg[regnum].u = data;
2261       BLT.clipy0 = (Bit16u)(data >> 16);
2262       BLT.clipy1 = (Bit16u)(data & 0x0fff);
2263       break;
2264 
2265     case bltRop:
2266       v->reg[regnum].u = data;
2267       BLT.rop[0] = (Bit8u)(data & 0x0f);
2268       BLT.rop[1] = (Bit8u)((data >> 4) & 0x0f);
2269       BLT.rop[2] = (Bit8u)((data >> 8) & 0x0f);
2270       BLT.rop[3] = (Bit8u)((data >> 12) & 0x0f);
2271       break;
2272 
2273     case bltColor:
2274       v->reg[regnum].u = data;
2275       BLT.fgcolor[0] = (Bit8u)data;
2276       BLT.fgcolor[1] = (Bit8u)(data >> 8);
2277       BLT.bgcolor[0] = (Bit8u)(data >> 16);
2278       BLT.bgcolor[1] = (Bit8u)(data >> 24);
2279       break;
2280 
2281     case bltDstXY:
2282     case bltSize:
2283     case bltCommand:
2284       v->reg[regnum].u = data;
2285       if ((data >> 31) & 1) {
2286         voodoo2_bitblt();
2287       }
2288       break;
2289 
2290     case colBufferAddr: /* Banshee */
2291       if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2292         v->fbi.rgboffs[1] = data & v->fbi.mask & ~0x0f;
2293       }
2294       break;
2295 
2296     case colBufferStride: /* Banshee */
2297       if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2298         if (data & 0x8000)
2299           v->fbi.rowpixels = (data & 0x7f) << 6;
2300         else
2301           v->fbi.rowpixels = (data & 0x3fff) >> 1;
2302       }
2303       break;
2304 
2305     case auxBufferAddr: /* Banshee */
2306       if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2307         v->fbi.auxoffs = data & v->fbi.mask & ~0x0f;
2308       }
2309       break;
2310 
2311     case auxBufferStride: /* Banshee */
2312       if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
2313         Bit32u rowpixels;
2314 
2315         if (data & 0x8000)
2316           rowpixels = (data & 0x7f) << 6;
2317         else
2318           rowpixels = (data & 0x3fff) >> 1;
2319         if (v->fbi.rowpixels != rowpixels)
2320           BX_ERROR(("aux buffer stride differs from color buffer stride"));
2321       }
2322       break;
2323 
2324     /* these registers are referenced in the renderer; we must wait for pending work before changing */
2325     case chromaRange:
2326     case chromaKey:
2327     case alphaMode:
2328     case fogColor:
2329     case stipple:
2330     case zaColor:
2331     case color1:
2332     case color0:
2333     case clipLowYHighY:
2334     case clipLeftRight:
2335       poly_wait(v->poly, v->regnames[regnum]);
2336       /* fall through to default implementation */
2337 
2338     /* by default, just feed the data to the chips */
2339     default:
2340 default_case:
2341       if (chips & 1) v->reg[0x000 + regnum].u = data;
2342       if (chips & 2) v->reg[0x100 + regnum].u = data;
2343       if (chips & 4) v->reg[0x200 + regnum].u = data;
2344       if (chips & 8) v->reg[0x300 + regnum].u = data;
2345       break;
2346   }
2347 }
2348 
texture_w(Bit32u offset,Bit32u data)2349 Bit32s texture_w(Bit32u offset, Bit32u data)
2350 {
2351   int tmunum = (offset >> 19) & 0x03;
2352   BX_DEBUG(("write TMU%d offset 0x%x value 0x%x", tmunum, offset, data));
2353 
2354   tmu_state *t;
2355 
2356   /* point to the right TMU */
2357   if (!(v->chipmask & (2 << tmunum)) || (tmunum >= MAX_TMU))
2358     return 0;
2359   t = &v->tmu[tmunum];
2360 
2361   if (TEXLOD_TDIRECT_WRITE(t->reg[tLOD].u))
2362     BX_PANIC(("Texture direct write!"));
2363 
2364   /* wait for any outstanding work to finish */
2365   poly_wait(v->poly, "Texture write");
2366 
2367   /* update texture info if dirty */
2368   if (t->regdirty)
2369     recompute_texture_params(t);
2370 
2371   /* swizzle the data */
2372   if (TEXLOD_TDATA_SWIZZLE(t->reg[tLOD].u))
2373     data = bx_bswap32(data);
2374   if (TEXLOD_TDATA_SWAP(t->reg[tLOD].u))
2375     data = (data >> 16) | (data << 16);
2376 
2377   /* 8-bit texture case */
2378   if (TEXMODE_FORMAT(t->reg[textureMode].u) < 8)
2379   {
2380     int lod, tt, ts;
2381     Bit32u tbaseaddr;
2382     Bit8u *dest;
2383 
2384     /* extract info */
2385     if (v->type <= VOODOO_2)
2386     {
2387       lod = (offset >> 15) & 0x0f;
2388       tt = (offset >> 7) & 0xff;
2389 
2390       /* old code has a bit about how this is broken in gauntleg unless we always look at TMU0 */
2391       if (TEXMODE_SEQ_8_DOWNLD(v->tmu[0].reg[textureMode].u))
2392         ts = (offset << 2) & 0xfc;
2393       else
2394         ts = (offset << 1) & 0xfc;
2395 
2396       /* validate parameters */
2397       if (lod > 8)
2398         return 0;
2399 
2400       /* compute the base address */
2401       tbaseaddr = t->lodoffset[lod];
2402       tbaseaddr += tt * ((t->wmask >> lod) + 1) + ts;
2403 
2404       if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 8-bit w: lod=%d s=%d t=%d data=0x%08x", lod, ts, tt, data));
2405     }
2406     else
2407     {
2408       tbaseaddr = t->lodoffset[0] + offset*4;
2409 
2410       if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: offset=0x%x data=0x%08x", offset*4, data));
2411     }
2412 
2413     /* write the four bytes in little-endian order */
2414     dest = t->ram;
2415     tbaseaddr &= t->mask;
2416     dest[BYTE4_XOR_LE(tbaseaddr + 0)] = (data >> 0) & 0xff;
2417     dest[BYTE4_XOR_LE(tbaseaddr + 1)] = (data >> 8) & 0xff;
2418     dest[BYTE4_XOR_LE(tbaseaddr + 2)] = (data >> 16) & 0xff;
2419     dest[BYTE4_XOR_LE(tbaseaddr + 3)] = (data >> 24) & 0xff;
2420   }
2421 
2422   /* 16-bit texture case */
2423   else
2424   {
2425     int lod, tt, ts;
2426     Bit32u tbaseaddr;
2427     Bit16u *dest;
2428 
2429     /* extract info */
2430     if (v->type <= VOODOO_2)
2431     {
2432       tmunum = (offset >> 19) & 0x03;
2433       lod = (offset >> 15) & 0x0f;
2434       tt = (offset >> 7) & 0xff;
2435       ts = (offset << 1) & 0xfe;
2436 
2437       /* validate parameters */
2438       if (lod > 8)
2439         return 0;
2440 
2441       /* compute the base address */
2442       tbaseaddr = t->lodoffset[lod];
2443       tbaseaddr += 2 * (tt * ((t->wmask >> lod) + 1) + ts);
2444 
2445       if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: lod=%d s=%d t=%d data=%08X", lod, ts, tt, data));
2446     }
2447     else
2448     {
2449       tbaseaddr = t->lodoffset[0] + offset*4;
2450 
2451       if (LOG_TEXTURE_RAM) BX_DEBUG(("Texture 16-bit w: offset=0x%x data=0x%08x", offset*4, data));
2452     }
2453 
2454     /* write the two words in little-endian order */
2455     dest = (Bit16u *)t->ram;
2456     tbaseaddr &= t->mask;
2457     tbaseaddr >>= 1;
2458     dest[BYTE_XOR_LE(tbaseaddr + 0)] = (data >> 0) & 0xffff;
2459     dest[BYTE_XOR_LE(tbaseaddr + 1)] = (data >> 16) & 0xffff;
2460   }
2461 
2462   return 0;
2463 }
2464 
lfb_w(Bit32u offset,Bit32u data,Bit32u mem_mask)2465 Bit32u lfb_w(Bit32u offset, Bit32u data, Bit32u mem_mask)
2466 {
2467   Bit16u *dest, *depth;
2468   Bit32u destmax, depthmax;
2469   Bit32u forcefront=0;
2470 
2471   int sr[2], sg[2], sb[2], sa[2], sw[2];
2472   int x, y, scry, mask;
2473   int pix, destbuf;
2474 
2475   BX_DEBUG(("write LFB offset 0x%x value 0x%08x", offset, data));
2476 
2477   /* byte swizzling */
2478   if (LFBMODE_BYTE_SWIZZLE_WRITES(v->reg[lfbMode].u))
2479   {
2480     data = bx_bswap32(data);
2481     mem_mask = bx_bswap32(mem_mask);
2482   }
2483 
2484   /* word swapping */
2485   if (LFBMODE_WORD_SWAP_WRITES(v->reg[lfbMode].u))
2486   {
2487     data = (data << 16) | (data >> 16);
2488     mem_mask = (mem_mask << 16) | (mem_mask >> 16);
2489   }
2490 
2491   /* extract default depth and alpha values */
2492   sw[0] = sw[1] = v->reg[zaColor].u & 0xffff;
2493   sa[0] = sa[1] = v->reg[zaColor].u >> 24;
2494 
2495   /* first extract A,R,G,B from the data */
2496   switch (LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u) + 16 * LFBMODE_RGBA_LANES(v->reg[lfbMode].u))
2497   {
2498     case 16*0 + 0:    /* ARGB, 16-bit RGB 5-6-5 */
2499     case 16*2 + 0:    /* RGBA, 16-bit RGB 5-6-5 */
2500       EXTRACT_565_TO_888(data, sr[0], sg[0], sb[0]);
2501       EXTRACT_565_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2502       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2503       offset <<= 1;
2504       break;
2505     case 16*1 + 0:    /* ABGR, 16-bit RGB 5-6-5 */
2506     case 16*3 + 0:    /* BGRA, 16-bit RGB 5-6-5 */
2507       EXTRACT_565_TO_888(data, sb[0], sg[0], sr[0]);
2508       EXTRACT_565_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2509       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2510       offset <<= 1;
2511       break;
2512 
2513     case 16*0 + 1:    /* ARGB, 16-bit RGB x-5-5-5 */
2514       EXTRACT_x555_TO_888(data, sr[0], sg[0], sb[0]);
2515       EXTRACT_x555_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2516       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2517       offset <<= 1;
2518       break;
2519     case 16*1 + 1:    /* ABGR, 16-bit RGB x-5-5-5 */
2520       EXTRACT_x555_TO_888(data, sb[0], sg[0], sr[0]);
2521       EXTRACT_x555_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2522       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2523       offset <<= 1;
2524       break;
2525     case 16*2 + 1:    /* RGBA, 16-bit RGB x-5-5-5 */
2526       EXTRACT_555x_TO_888(data, sr[0], sg[0], sb[0]);
2527       EXTRACT_555x_TO_888(data >> 16, sr[1], sg[1], sb[1]);
2528       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2529       offset <<= 1;
2530       break;
2531     case 16*3 + 1:    /* BGRA, 16-bit RGB x-5-5-5 */
2532       EXTRACT_555x_TO_888(data, sb[0], sg[0], sr[0]);
2533       EXTRACT_555x_TO_888(data >> 16, sb[1], sg[1], sr[1]);
2534       mask = LFB_RGB_PRESENT | (LFB_RGB_PRESENT << 4);
2535       offset <<= 1;
2536       break;
2537 
2538     case 16*0 + 2:    /* ARGB, 16-bit ARGB 1-5-5-5 */
2539       EXTRACT_1555_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2540       EXTRACT_1555_TO_8888(data >> 16, sa[1], sr[1], sg[1], sb[1]);
2541       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2542       offset <<= 1;
2543       break;
2544     case 16*1 + 2:    /* ABGR, 16-bit ARGB 1-5-5-5 */
2545       EXTRACT_1555_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2546       EXTRACT_1555_TO_8888(data >> 16, sa[1], sb[1], sg[1], sr[1]);
2547       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2548       offset <<= 1;
2549       break;
2550     case 16*2 + 2:    /* RGBA, 16-bit ARGB 1-5-5-5 */
2551       EXTRACT_5551_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2552       EXTRACT_5551_TO_8888(data >> 16, sr[1], sg[1], sb[1], sa[1]);
2553       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2554       offset <<= 1;
2555       break;
2556     case 16*3 + 2:    /* BGRA, 16-bit ARGB 1-5-5-5 */
2557       EXTRACT_5551_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2558       EXTRACT_5551_TO_8888(data >> 16, sb[1], sg[1], sr[1], sa[1]);
2559       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | ((LFB_RGB_PRESENT | LFB_ALPHA_PRESENT) << 4);
2560       offset <<= 1;
2561       break;
2562 
2563     case 16*0 + 4:    /* ARGB, 32-bit RGB x-8-8-8 */
2564       EXTRACT_x888_TO_888(data, sr[0], sg[0], sb[0]);
2565       mask = LFB_RGB_PRESENT;
2566       break;
2567     case 16*1 + 4:    /* ABGR, 32-bit RGB x-8-8-8 */
2568       EXTRACT_x888_TO_888(data, sb[0], sg[0], sr[0]);
2569       mask = LFB_RGB_PRESENT;
2570       break;
2571     case 16*2 + 4:    /* RGBA, 32-bit RGB x-8-8-8 */
2572       EXTRACT_888x_TO_888(data, sr[0], sg[0], sb[0]);
2573       mask = LFB_RGB_PRESENT;
2574       break;
2575     case 16*3 + 4:    /* BGRA, 32-bit RGB x-8-8-8 */
2576       EXTRACT_888x_TO_888(data, sb[0], sg[0], sr[0]);
2577       mask = LFB_RGB_PRESENT;
2578       break;
2579 
2580     case 16*0 + 5:    /* ARGB, 32-bit ARGB 8-8-8-8 */
2581       EXTRACT_8888_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2582       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2583       break;
2584     case 16*1 + 5:    /* ABGR, 32-bit ARGB 8-8-8-8 */
2585       EXTRACT_8888_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2586       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2587       break;
2588     case 16*2 + 5:    /* RGBA, 32-bit ARGB 8-8-8-8 */
2589       EXTRACT_8888_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2590       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2591       break;
2592     case 16*3 + 5:    /* BGRA, 32-bit ARGB 8-8-8-8 */
2593       EXTRACT_8888_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2594       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT;
2595       break;
2596 
2597     case 16*0 + 12:   /* ARGB, 32-bit depth+RGB 5-6-5 */
2598     case 16*2 + 12:   /* RGBA, 32-bit depth+RGB 5-6-5 */
2599       sw[0] = data >> 16;
2600       EXTRACT_565_TO_888(data, sr[0], sg[0], sb[0]);
2601       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2602       break;
2603     case 16*1 + 12:   /* ABGR, 32-bit depth+RGB 5-6-5 */
2604     case 16*3 + 12:   /* BGRA, 32-bit depth+RGB 5-6-5 */
2605       sw[0] = data >> 16;
2606       EXTRACT_565_TO_888(data, sb[0], sg[0], sr[0]);
2607       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2608       break;
2609 
2610     case 16*0 + 13:   /* ARGB, 32-bit depth+RGB x-5-5-5 */
2611       sw[0] = data >> 16;
2612       EXTRACT_x555_TO_888(data, sr[0], sg[0], sb[0]);
2613       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2614       break;
2615     case 16*1 + 13:   /* ABGR, 32-bit depth+RGB x-5-5-5 */
2616       sw[0] = data >> 16;
2617       EXTRACT_x555_TO_888(data, sb[0], sg[0], sr[0]);
2618       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2619       break;
2620     case 16*2 + 13:   /* RGBA, 32-bit depth+RGB x-5-5-5 */
2621       sw[0] = data >> 16;
2622       EXTRACT_555x_TO_888(data, sr[0], sg[0], sb[0]);
2623       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2624       break;
2625     case 16*3 + 13:   /* BGRA, 32-bit depth+RGB x-5-5-5 */
2626       sw[0] = data >> 16;
2627       EXTRACT_555x_TO_888(data, sb[0], sg[0], sr[0]);
2628       mask = LFB_RGB_PRESENT | LFB_DEPTH_PRESENT_MSW;
2629       break;
2630 
2631     case 16*0 + 14:   /* ARGB, 32-bit depth+ARGB 1-5-5-5 */
2632       sw[0] = data >> 16;
2633       EXTRACT_1555_TO_8888(data, sa[0], sr[0], sg[0], sb[0]);
2634       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2635       break;
2636     case 16*1 + 14:   /* ABGR, 32-bit depth+ARGB 1-5-5-5 */
2637       sw[0] = data >> 16;
2638       EXTRACT_1555_TO_8888(data, sa[0], sb[0], sg[0], sr[0]);
2639       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2640       break;
2641     case 16*2 + 14:   /* RGBA, 32-bit depth+ARGB 1-5-5-5 */
2642       sw[0] = data >> 16;
2643       EXTRACT_5551_TO_8888(data, sr[0], sg[0], sb[0], sa[0]);
2644       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2645       break;
2646     case 16*3 + 14:   /* BGRA, 32-bit depth+ARGB 1-5-5-5 */
2647       sw[0] = data >> 16;
2648       EXTRACT_5551_TO_8888(data, sb[0], sg[0], sr[0], sa[0]);
2649       mask = LFB_RGB_PRESENT | LFB_ALPHA_PRESENT | LFB_DEPTH_PRESENT_MSW;
2650       break;
2651 
2652     case 16*0 + 15:   /* ARGB, 16-bit depth */
2653     case 16*1 + 15:   /* ARGB, 16-bit depth */
2654     case 16*2 + 15:   /* ARGB, 16-bit depth */
2655     case 16*3 + 15:   /* ARGB, 16-bit depth */
2656       sw[0] = data & 0xffff;
2657       sw[1] = data >> 16;
2658       mask = LFB_DEPTH_PRESENT | (LFB_DEPTH_PRESENT << 4);
2659       offset <<= 1;
2660       break;
2661 
2662     default:      /* reserved */
2663       return 0;
2664   }
2665 
2666   /* compute X,Y */
2667   x = (offset << 0) & ((1 << v->fbi.lfb_stride) - 1);
2668   y = (offset >> v->fbi.lfb_stride) & 0x7ff;
2669 
2670   /* adjust the mask based on which half of the data is written */
2671   if (!ACCESSING_BITS_0_15)
2672     mask &= ~(0x0f - LFB_DEPTH_PRESENT_MSW);
2673   if (!ACCESSING_BITS_16_31)
2674     mask &= ~(0xf0 + LFB_DEPTH_PRESENT_MSW);
2675 
2676   /* select the target buffer */
2677   destbuf = (v->type >= VOODOO_BANSHEE) ? (!forcefront) : LFBMODE_WRITE_BUFFER_SELECT(v->reg[lfbMode].u);
2678   switch (destbuf)
2679   {
2680     case 0:     /* front buffer */
2681       dest = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
2682       destmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.frontbuf]) / 2;
2683       v->fbi.video_changed = 1;
2684       break;
2685 
2686     case 1:     /* back buffer */
2687       dest = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
2688       destmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.backbuf]) / 2;
2689       break;
2690 
2691     default:    /* reserved */
2692       return 0;
2693   }
2694   depth = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs);
2695   depthmax = (v->fbi.mask + 1 - v->fbi.auxoffs) / 2;
2696 
2697   /* wait for any outstanding work to finish */
2698   poly_wait(v->poly, "LFB Write");
2699 
2700   /* simple case: no pipeline */
2701   if (!LFBMODE_ENABLE_PIXEL_PIPELINE(v->reg[lfbMode].u))
2702   {
2703     DECLARE_DITHER_POINTERS;
2704     UNUSED(dither);
2705     Bit32u bufoffs;
2706 
2707     if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:write raw mode %X (%d,%d) = %08X & %08X", v->index, LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u), x, y, data, mem_mask));
2708 
2709     /* determine the screen Y */
2710     scry = y;
2711     if (LFBMODE_Y_ORIGIN(v->reg[lfbMode].u))
2712       scry = (v->fbi.yorigin - y) & 0x3ff;
2713 
2714     /* advance pointers to the proper row */
2715     bufoffs = scry * v->fbi.rowpixels + x;
2716 
2717     /* compute dithering */
2718     COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
2719 
2720     /* loop over up to two pixels */
2721     for (pix = 0; mask; pix++)
2722     {
2723       /* make sure we care about this pixel */
2724       if (mask & 0x0f)
2725       {
2726         /* write to the RGB buffer */
2727         if ((mask & LFB_RGB_PRESENT) && bufoffs < destmax)
2728         {
2729           /* apply dithering and write to the screen */
2730           APPLY_DITHER(v->reg[fbzMode].u, x, dither_lookup, sr[pix], sg[pix], sb[pix]);
2731           dest[bufoffs] = (sr[pix] << 11) | (sg[pix] << 5) | sb[pix];
2732         }
2733 
2734         /* make sure we have an aux buffer to write to */
2735         if (depth && bufoffs < depthmax)
2736         {
2737           /* write to the alpha buffer */
2738           if ((mask & LFB_ALPHA_PRESENT) && FBZMODE_ENABLE_ALPHA_PLANES(v->reg[fbzMode].u))
2739             depth[bufoffs] = sa[pix];
2740 
2741           /* write to the depth buffer */
2742           if ((mask & (LFB_DEPTH_PRESENT | LFB_DEPTH_PRESENT_MSW)) && !FBZMODE_ENABLE_ALPHA_PLANES(v->reg[fbzMode].u))
2743             depth[bufoffs] = sw[pix];
2744         }
2745 
2746         /* track pixel writes to the frame buffer regardless of mask */
2747         v->reg[fbiPixelsOut].u++;
2748       }
2749 
2750       /* advance our pointers */
2751       bufoffs++;
2752       x++;
2753       mask >>= 4;
2754     }
2755   }
2756   /* tricky case: run the full pixel pipeline on the pixel */
2757   else
2758   {
2759     DECLARE_DITHER_POINTERS;
2760 
2761     if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:write pipelined mode %X (%d,%d) = %08X & %08X", v->index, LFBMODE_WRITE_FORMAT(v->reg[lfbMode].u), x, y, data, mem_mask));
2762 
2763     /* determine the screen Y */
2764     scry = y;
2765     if (FBZMODE_Y_ORIGIN(v->reg[fbzMode].u))
2766       scry = (v->fbi.yorigin - y) & 0x3ff;
2767 
2768     /* advance pointers to the proper row */
2769     dest += scry * v->fbi.rowpixels;
2770     if (depth)
2771       depth += scry * v->fbi.rowpixels;
2772 
2773     /* compute dithering */
2774     COMPUTE_DITHER_POINTERS(v->reg[fbzMode].u, y);
2775 
2776     /* loop over up to two pixels */
2777     for (pix = 0; mask; pix++)
2778     {
2779       /* make sure we care about this pixel */
2780       if (mask & 0x0f)
2781       {
2782         stats_block *stats = &v->fbi.lfb_stats;
2783         Bit64s iterw = sw[pix] << (30-16);
2784         Bit32s iterz = sw[pix] << 12;
2785         rgb_union color;
2786 
2787         /* apply clipping */
2788         if (FBZMODE_ENABLE_CLIPPING(v->reg[fbzMode].u))
2789         {
2790           if (x < (int)((v->reg[clipLeftRight].u >> 16) & 0x3ff) ||
2791             x >= (int)(v->reg[clipLeftRight].u & 0x3ff) ||
2792             scry < (int)((v->reg[clipLowYHighY].u >> 16) & 0x3ff) ||
2793             scry >= (int)(v->reg[clipLowYHighY].u & 0x3ff))
2794           {
2795             stats->pixels_in++;
2796             stats->clip_fail++;
2797             goto nextpixel;
2798           }
2799         }
2800 
2801         /* pixel pipeline part 1 handles depth testing and stippling */
2802         PIXEL_PIPELINE_BEGIN(v, stats, x, y, v->reg[fbzColorPath].u, v->reg[fbzMode].u, iterz, iterw);
2803 
2804         /* use the RGBA we stashed above */
2805         color.rgb.r = r = sr[pix];
2806         color.rgb.g = g = sg[pix];
2807         color.rgb.b = b = sb[pix];
2808         color.rgb.a = a = sa[pix];
2809 
2810         /* apply chroma key, alpha mask, and alpha testing */
2811         APPLY_CHROMAKEY(v, stats, v->reg[fbzMode].u, color);
2812         APPLY_ALPHAMASK(v, stats, v->reg[fbzMode].u, color.rgb.a);
2813         APPLY_ALPHATEST(v, stats, v->reg[alphaMode].u, color.rgb.a);
2814 
2815         /* pixel pipeline part 2 handles color combine, fog, alpha, and final output */
2816         PIXEL_PIPELINE_END(v, stats, dither, dither4, dither_lookup, x, dest, depth, v->reg[fbzMode].u, v->reg[fbzColorPath].u, v->reg[alphaMode].u, v->reg[fogMode].u, iterz, iterw, v->reg[zaColor]);
2817       }
2818 nextpixel:
2819       /* advance our pointers */
2820       x++;
2821       mask >>= 4;
2822     }
2823   }
2824 
2825   return 0;
2826 }
2827 
cmdfifo_calc_depth_needed(cmdfifo_info * f)2828 Bit32u cmdfifo_calc_depth_needed(cmdfifo_info *f)
2829 {
2830   Bit32u command, needed = BX_MAX_BIT32U;
2831   Bit8u type;
2832   int i, count = 0;
2833 
2834   if (f->depth == 0)
2835     return needed;
2836   command = *(Bit32u*)(&v->fbi.ram[f->rdptr & v->fbi.mask]);
2837   type = (Bit8u)(command & 0x07);
2838   switch (type) {
2839     case 0:
2840       if (((command >> 3) & 7) == 4) {
2841         needed = 2;
2842       } else {
2843         needed = 1;
2844       }
2845       break;
2846     case 1:
2847       needed = 1 + (command >> 16);
2848       break;
2849     case 2:
2850       for (i = 3; i <= 31; i++)
2851         if (command & (1 << i)) count++;
2852       needed = 1 + count;
2853       break;
2854     case 3:
2855       count = 2;                             /* X/Y */
2856       if (command & (1 << 28)) {
2857         if (command & (3 << 10)) count++;    /* ARGB */
2858       } else {
2859         if (command & (1 << 10)) count += 3; /* RGB */
2860         if (command & (1 << 11)) count++;    /* A */
2861       }
2862       if (command & (1 << 12)) count++;      /* Z */
2863       if (command & (1 << 13)) count++;      /* Wb */
2864       if (command & (1 << 14)) count++;      /* W0 */
2865       if (command & (1 << 15)) count += 2;   /* S0/T0 */
2866       if (command & (1 << 16)) count++;      /* W1 */
2867       if (command & (1 << 17)) count += 2;   /* S1/T1 */
2868       count *= (command >> 6) & 15;          /* numverts */
2869       needed = 1 + count + (command >> 29);
2870       break;
2871     case 4:
2872       for (i = 15; i <= 28; i++)
2873         if (command & (1 << i)) count++;
2874       needed = 1 + count + (command >> 29);
2875       break;
2876     case 5:
2877       needed = 2 + ((command >> 3) & 0x7ffff);
2878       break;
2879     default:
2880       BX_ERROR(("CMDFIFO: unsupported packet type %d", type));
2881   }
2882   return needed;
2883 }
2884 
cmdfifo_w(cmdfifo_info * f,Bit32u fbi_offset,Bit32u data)2885 void cmdfifo_w(cmdfifo_info *f, Bit32u fbi_offset, Bit32u data)
2886 {
2887   BX_LOCK(cmdfifo_mutex);
2888   *(Bit32u*)(&v->fbi.ram[fbi_offset]) = data;
2889   /* count holes? */
2890   if (f->count_holes) {
2891     if ((f->holes == 0) && (fbi_offset == (f->amin + 4))) {
2892       /* in-order, no holes */
2893       f->amin = f->amax = fbi_offset;
2894       f->depth++;
2895     } else if (fbi_offset < f->amin) {
2896       /* out-of-order, below the minimum */
2897       if (f->holes != 0) {
2898         BX_ERROR(("Unexpected CMDFIFO: AMin=0x%08x AMax=0x%08x Holes=%d WroteTo:0x%08x RdPtr:0x%08x",
2899                   f->amin, f->amax, f->holes, fbi_offset, f->rdptr));
2900       }
2901       f->amin = f->amax = fbi_offset;
2902       f->depth++;
2903     } else if (fbi_offset < f->amax) {
2904       /* out-of-order, but within the min-max range */
2905       f->holes--;
2906       if (f->holes == 0) {
2907         f->depth += (f->amax - f->amin) / 4;
2908         f->amin = f->amax;
2909       }
2910     } else {
2911       /* out-of-order, bumping max */
2912       f->holes += (fbi_offset - f->amax) / 4 - 1;
2913       f->amax = fbi_offset;
2914     }
2915   }
2916   if (f->depth_needed == BX_MAX_BIT32U) {
2917     f->depth_needed = cmdfifo_calc_depth_needed(f);
2918   }
2919   if (f->depth >= f->depth_needed) {
2920     f->cmd_ready = 1;
2921     if (!v->vtimer_running) {
2922       bx_set_sem(&fifo_wakeup);
2923     }
2924   }
2925   BX_UNLOCK(cmdfifo_mutex);
2926 }
2927 
cmdfifo_r(cmdfifo_info * f)2928 Bit32u cmdfifo_r(cmdfifo_info *f)
2929 {
2930   Bit32u data;
2931 
2932   data = *(Bit32u*)(&v->fbi.ram[f->rdptr & v->fbi.mask]);
2933   f->rdptr += 4;
2934   if (f->rdptr >= f->end) {
2935     BX_INFO(("CMDFIFO RdPtr rollover"));
2936     f->rdptr = f->base;
2937   }
2938   f->depth--;
2939   return data;
2940 }
2941 
cmdfifo_process(cmdfifo_info * f)2942 void cmdfifo_process(cmdfifo_info *f)
2943 {
2944   Bit32u command, data, mask, nwords, regaddr;
2945   Bit8u type, code, nvertex, smode, disbytes;
2946   bool inc, pcolor;
2947   voodoo_reg reg;
2948   int i, w0, wn;
2949   setup_vertex svert = {0};
2950 
2951   command = cmdfifo_r(f);
2952   type = (Bit8u)(command & 0x07);
2953   switch (type) {
2954     case 0:
2955       code = (Bit8u)((command >> 3) & 0x07);
2956       switch (code) {
2957         case 0: // NOP
2958           break;
2959         case 3: // JMP
2960           f->rdptr = (command >> 4) & 0xfffffc;
2961           if (f->count_holes) {
2962             BX_DEBUG(("cmdfifo_process(): JMP 0x%08x", f->rdptr));
2963           }
2964           break;
2965         case 4: // TODO: JMP AGP
2966           data = cmdfifo_r(f);
2967         default:
2968           BX_ERROR(("CMDFIFO packet type 0: unsupported code %d", code));
2969       }
2970       break;
2971     case 1:
2972       nwords = (command >> 16);
2973       regaddr = (command & 0x7ff8) >> 3;
2974       inc = (command >> 15) & 1;
2975       for (i = 0; i < (int)nwords; i++) {
2976         data = cmdfifo_r(f);
2977         BX_UNLOCK(cmdfifo_mutex);
2978         Voodoo_reg_write(regaddr, data);
2979         BX_LOCK(cmdfifo_mutex);
2980         if (inc) regaddr++;
2981       }
2982       break;
2983     case 2:
2984       mask = (command >> 3);
2985       if (v->type < VOODOO_BANSHEE) {
2986         regaddr = bltSrcBaseAddr;
2987       } else {
2988         regaddr = blt_clip0Min;
2989       }
2990       while (mask) {
2991         if (mask & 1) {
2992           data = cmdfifo_r(f);
2993           BX_UNLOCK(cmdfifo_mutex);
2994           if (v->type < VOODOO_BANSHEE) {
2995             register_w(regaddr, data, 1);
2996           } else {
2997             Banshee_2D_write(regaddr, data);
2998           }
2999           BX_LOCK(cmdfifo_mutex);
3000         }
3001         regaddr++;
3002         mask >>= 1;
3003       }
3004       break;
3005     case 3:
3006       nwords = (command >> 29);
3007       pcolor = (command >> 28) & 1;
3008       smode = (command >> 22) & 0x3f;
3009       mask = (command >> 10) & 0xff;
3010       nvertex = (command >> 6) & 0x0f;
3011       code = (command >> 3) & 0x07;
3012       /* copy relevant bits into the setup mode register */
3013       v->reg[sSetupMode].u = ((smode << 16) | mask);
3014       /* loop over triangles */
3015       for (i = 0; i < nvertex; i++) {
3016         reg.u = cmdfifo_r(f);
3017         svert.x = reg.f;
3018         reg.u = cmdfifo_r(f);
3019         svert.y = reg.f;
3020         if (pcolor) {
3021           if (mask & 0x03) {
3022             data = cmdfifo_r(f);
3023             if (mask & 0x01) {
3024               svert.r = (float)RGB_RED(data);
3025               svert.g = (float)RGB_GREEN(data);
3026               svert.b = (float)RGB_BLUE(data);
3027             }
3028             if (mask & 0x02) {
3029               svert.a = (float)RGB_ALPHA(data);
3030             }
3031           }
3032         } else {
3033           if (mask & 0x01) {
3034             reg.u = cmdfifo_r(f);
3035             svert.r = reg.f;
3036             reg.u = cmdfifo_r(f);
3037             svert.g = reg.f;
3038             reg.u = cmdfifo_r(f);
3039             svert.b = reg.f;
3040           }
3041           if (mask & 0x02) {
3042             reg.u = cmdfifo_r(f);
3043             svert.a = reg.f;
3044           }
3045         }
3046         if (mask & 0x04) {
3047           reg.u = cmdfifo_r(f);
3048           svert.z = reg.f;
3049         }
3050         if (mask & 0x08) {
3051           reg.u = cmdfifo_r(f);
3052           svert.wb = reg.f;
3053         }
3054         if (mask & 0x10) {
3055           reg.u = cmdfifo_r(f);
3056           svert.w0 = reg.f;
3057         }
3058         if (mask & 0x20) {
3059           reg.u = cmdfifo_r(f);
3060           svert.s0 = reg.f;
3061           reg.u = cmdfifo_r(f);
3062           svert.t0 = reg.f;
3063         }
3064         if (mask & 0x40) {
3065           reg.u = cmdfifo_r(f);
3066           svert.w1 = reg.f;
3067         }
3068         if (mask & 0x80) {
3069           reg.u = cmdfifo_r(f);
3070           svert.s1 = reg.f;
3071           reg.u = cmdfifo_r(f);
3072           svert.t1 = reg.f;
3073         }
3074         /* if we're starting a new strip, or if this is the first of a set of verts */
3075         /* for a series of individual triangles, initialize all the verts */
3076         if ((code == 1 && i == 0) || (code == 0 && i % 3 == 0)) {
3077           v->fbi.sverts = 1;
3078           v->fbi.svert[0] = v->fbi.svert[1] = v->fbi.svert[2] = svert;
3079         } else { /* otherwise, add this to the list */
3080           /* for strip mode, shuffle vertex 1 down to 0 */
3081           if (!(smode & 1))
3082             v->fbi.svert[0] = v->fbi.svert[1];
3083 
3084           /* copy 2 down to 1 and add our new one regardless */
3085           v->fbi.svert[1] = v->fbi.svert[2];
3086           v->fbi.svert[2] = svert;
3087 
3088           /* if we have enough, draw */
3089           if (++v->fbi.sverts >= 3) {
3090             BX_UNLOCK(cmdfifo_mutex);
3091             setup_and_draw_triangle();
3092             BX_LOCK(cmdfifo_mutex);
3093           }
3094         }
3095       }
3096       while (nwords--) cmdfifo_r(f);
3097       break;
3098     case 4:
3099       nwords = (command >> 29);
3100       mask = (command >> 15) & 0x3fff;
3101       regaddr = (command & 0x7ff8) >> 3;
3102       while (mask) {
3103         if (mask & 1) {
3104           data = cmdfifo_r(f);
3105           BX_UNLOCK(cmdfifo_mutex);
3106           Voodoo_reg_write(regaddr, data);
3107           BX_LOCK(cmdfifo_mutex);
3108         }
3109         regaddr++;
3110         mask >>= 1;
3111       }
3112       while (nwords--) cmdfifo_r(f);
3113       break;
3114     case 5:
3115       nwords = (command >> 3) & 0x7ffff;
3116       regaddr = (cmdfifo_r(f) & 0xffffff) >> 2;
3117       code = (command >> 30);
3118       disbytes = (command >> 22) & 0xff;
3119       if ((disbytes > 0) && (code != 0) && (code != 3)) {
3120         BX_ERROR(("CMDFIFO packet type 5: byte disable not supported yet (dest code = %d disbytes = 0x%02x)", code, disbytes));
3121       }
3122       switch (code) {
3123         case 0:
3124           regaddr <<= 2;
3125           w0 = 0;
3126           wn = nwords;
3127           if ((disbytes & 0xf0) > 0) {
3128             data = cmdfifo_r(f);
3129             if ((disbytes & 0xf0) == 0x30) {
3130               data >>= 16;
3131             } else if ((disbytes & 0xf0) == 0xc0) {
3132               data &= 0xffff;
3133             } else {
3134               BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 0)"));
3135             }
3136             BX_UNLOCK(cmdfifo_mutex);
3137             Banshee_LFB_write(regaddr, data, 2);
3138             BX_LOCK(cmdfifo_mutex);
3139             w0++;
3140             regaddr += 4;
3141           }
3142           for (i = w0; i < wn; i++) {
3143             data = cmdfifo_r(f);
3144             BX_UNLOCK(cmdfifo_mutex);
3145             Banshee_LFB_write(regaddr, data, 4);
3146             BX_LOCK(cmdfifo_mutex);
3147             regaddr += 4;
3148           }
3149           if ((disbytes & 0x0f) > 0) {
3150             BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 0)"));
3151           }
3152           break;
3153         case 2:
3154           for (i = 0; i < (int)nwords; i++) {
3155             data = cmdfifo_r(f);
3156             BX_UNLOCK(cmdfifo_mutex);
3157             lfb_w(regaddr, data, 0xffffffff);
3158             BX_LOCK(cmdfifo_mutex);
3159             regaddr++;
3160           }
3161           break;
3162         case 3:
3163           w0 = 0;
3164           wn = nwords;
3165           if ((disbytes & 0xf0) > 0) {
3166             data = cmdfifo_r(f);
3167             if ((disbytes & 0xf0) == 0x30) {
3168               data >>= 16;
3169             } else if ((disbytes & 0xf0) == 0xc0) {
3170               data &= 0xffff;
3171             } else if ((disbytes & 0xf0) == 0xe0) {
3172               data &= 0xff;
3173             } else {
3174               BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 3)"));
3175             }
3176             BX_UNLOCK(cmdfifo_mutex);
3177             texture_w(regaddr, data);
3178             BX_LOCK(cmdfifo_mutex);
3179             w0++;
3180             regaddr++;
3181           }
3182           for (i = w0; i < wn; i++) {
3183             data = cmdfifo_r(f);
3184             BX_UNLOCK(cmdfifo_mutex);
3185             texture_w(regaddr, data);
3186             BX_LOCK(cmdfifo_mutex);
3187             regaddr++;
3188           }
3189           if ((disbytes & 0x0f) > 0) {
3190             BX_ERROR(("CMDFIFO packet type 5: byte disable not complete (dest code = 3)"));
3191           }
3192           break;
3193         default:
3194           BX_ERROR(("CMDFIFO packet type 5: unsupported destination type %d", code));
3195       }
3196       break;
3197     case 6:
3198       // TODO: AGP to VRAM transfer
3199       cmdfifo_r(f);
3200       cmdfifo_r(f);
3201       cmdfifo_r(f);
3202       cmdfifo_r(f);
3203     default:
3204       BX_ERROR(("CMDFIFO: unsupported packet type %d", type));
3205   }
3206   f->depth_needed = cmdfifo_calc_depth_needed(f);
3207   if (f->depth < f->depth_needed) {
3208     f->cmd_ready = 0;
3209   }
3210 }
3211 
3212 
3213 #define FBI_TRICK 1
3214 #if FBI_TRICK
fifo_add_fbi(Bit32u type_offset,Bit32u data)3215 bool fifo_add_fbi(Bit32u type_offset, Bit32u data)
3216 {
3217   bool ret = 0;
3218 
3219   BX_LOCK(fifo_mutex);
3220   if (v->fbi.fifo.enabled) {
3221     fifo_add(&v->fbi.fifo, type_offset, data);
3222     ret = 1;
3223     if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000)
3224       bx_set_sem(&fifo_wakeup);
3225   }
3226   BX_UNLOCK(fifo_mutex);
3227   return ret;
3228 }
3229 
fifo_add_common(Bit32u type_offset,Bit32u data)3230 bool fifo_add_common(Bit32u type_offset, Bit32u data)
3231 {
3232   bool ret = 0;
3233 
3234   BX_LOCK(fifo_mutex);
3235   if (v->fbi.fifo.enabled) {
3236     fifo_add(&v->fbi.fifo, type_offset, data);
3237     ret = 1;
3238     if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000)
3239       bx_set_sem(&fifo_wakeup);
3240   } else
3241   if (v->pci.fifo.enabled) {
3242     fifo_add(&v->pci.fifo, type_offset, data);
3243     ret = 1;
3244     if ((fifo_space(&v->pci.fifo)/2) <= 16)
3245       bx_set_sem(&fifo_wakeup);
3246   }
3247   BX_UNLOCK(fifo_mutex);
3248   return ret;
3249 }
3250 #else
fifo_add_common(Bit32u type_offset,Bit32u data)3251 bool fifo_add_common(Bit32u type_offset, Bit32u data)
3252 {
3253   bool ret = 0;
3254 
3255   BX_LOCK(fifo_mutex);
3256   if (v->pci.fifo.enabled) {
3257     fifo_add(&v->pci.fifo, type_offset, data);
3258     ret = 1;
3259     if (v->fbi.fifo.enabled) {
3260       if ((fifo_space(&v->pci.fifo)/2) <= 16) {
3261         fifo_move(&v->pci.fifo, &v->fbi.fifo);
3262       }
3263       if ((fifo_space(&v->fbi.fifo)/2) <= 0xe000) {
3264         bx_set_sem(&fifo_wakeup);
3265       }
3266     } else {
3267       if ((fifo_space(&v->pci.fifo)/2) <= 16) {
3268         bx_set_sem(&fifo_wakeup);
3269       }
3270     }
3271   }
3272   BX_UNLOCK(fifo_mutex);
3273   return ret;
3274 }
3275 #endif
3276 
3277 
register_w_common(Bit32u offset,Bit32u data)3278 void register_w_common(Bit32u offset, Bit32u data)
3279 {
3280   Bit32u regnum  = (offset) & 0xff;
3281   Bit32u chips   = (offset>>8) & 0xf;
3282 
3283   /* Voodoo 2 CMDFIFO handling */
3284   if ((v->type == VOODOO_2) && v->fbi.cmdfifo[0].enabled) {
3285     if ((offset & 0x80000) > 0) {
3286       if (!FBIINIT7_CMDFIFO_MEMORY_STORE(v->reg[fbiInit7].u)) {
3287         BX_ERROR(("CMDFIFO-to-FIFO mode not supported yet"));
3288       } else {
3289         Bit32u fbi_offset = (v->fbi.cmdfifo[0].base + ((offset & 0xffff) << 2)) & v->fbi.mask;
3290         if (LOG_CMDFIFO) BX_DEBUG(("CMDFIFO write: FBI offset=0x%08x, data=0x%08x", fbi_offset, data));
3291         cmdfifo_w(&v->fbi.cmdfifo[0], fbi_offset, data);
3292       }
3293       return;
3294     } else {
3295       if (v->regaccess[regnum] & REGISTER_WRITETHRU) {
3296         BX_DEBUG(("Writing to register %s in CMDFIFO mode", v->regnames[regnum]));
3297       } else if (regnum == swapbufferCMD) {
3298         v->fbi.swaps_pending++;
3299         return;
3300       } else {
3301         BX_DEBUG(("Invalid attempt to write %s in CMDFIFO mode", v->regnames[regnum]));
3302         return;
3303       }
3304     }
3305   }
3306 
3307   if (chips == 0)
3308     chips = 0xf;
3309 
3310   /* the first 64 registers can be aliased differently */
3311   if ((offset & 0x800c0) == 0x80000 && v->alt_regmap)
3312     regnum = register_alias_map[offset & 0x3f];
3313   else
3314     regnum = offset & 0xff;
3315 
3316   /* first make sure this register is writable */
3317   if (!(v->regaccess[regnum] & REGISTER_WRITE)) {
3318     BX_DEBUG(("Invalid attempt to write %s", v->regnames[regnum]));
3319     return;
3320   }
3321 
3322   BX_DEBUG(("write chip 0x%x reg 0x%x value 0x%08x(%s)", chips, regnum<<2, data, v->regnames[regnum]));
3323 
3324   switch (regnum) {
3325     /* external DAC access -- Voodoo/Voodoo2 only */
3326     case dacData:
3327       if (v->type <= VOODOO_2 /*&& (chips & 1)*/)
3328       {
3329         poly_wait(v->poly, v->regnames[regnum]);
3330         if (!(data & 0x800))
3331           dacdata_w(&v->dac, (data >> 8) & 7, data & 0xff);
3332         else
3333           dacdata_r(&v->dac, (data >> 8) & 7);
3334       }
3335       break;
3336 
3337     /* vertical sync rate -- Voodoo/Voodoo2 only */
3338     case hSync:
3339     case vSync:
3340     case backPorch:
3341     case videoDimensions:
3342       if (v->type <= VOODOO_2 && (chips & 1))
3343       {
3344         poly_wait(v->poly, v->regnames[regnum]);
3345         v->reg[regnum].u = data;
3346         if (v->reg[hSync].u != 0 && v->reg[vSync].u != 0 && v->reg[videoDimensions].u != 0)
3347         {
3348           int htotal = ((v->reg[hSync].u >> 16) & 0x3ff) + 1 + (v->reg[hSync].u & 0xff) + 1;
3349           int vtotal = ((v->reg[vSync].u >> 16) & 0xfff) + (v->reg[vSync].u & 0xfff);
3350           int hvis = v->reg[videoDimensions].u & 0x3ff;
3351           int vvis = (v->reg[videoDimensions].u >> 16) & 0x3ff;
3352           int hbp = (v->reg[backPorch].u & 0xff) + 2;
3353           int vbp = (v->reg[backPorch].u >> 16) & 0xff;
3354           rectangle visarea;
3355 
3356           /* create a new visarea */
3357           visarea.min_x = hbp;
3358           visarea.max_x = hbp + hvis - 1;
3359           visarea.min_y = vbp;
3360           visarea.max_y = vbp + vvis - 1;
3361 
3362           /* keep within bounds */
3363           visarea.max_x = MIN(visarea.max_x, htotal - 1);
3364           visarea.max_y = MIN(visarea.max_y, vtotal - 1);
3365 
3366           BX_DEBUG(("hSync=%08X  vSync=%08X  backPorch=%08X  videoDimensions=%08X",
3367             v->reg[hSync].u, v->reg[vSync].u, v->reg[backPorch].u, v->reg[videoDimensions].u));
3368           BX_DEBUG(("Horiz: %d-%d (%d total)  Vert: %d-%d (%d total) -- ", visarea.min_x, visarea.max_x, htotal, visarea.min_y, visarea.max_y, vtotal));
3369 
3370           /* configure the new framebuffer info */
3371           v->fbi.width = hvis + 1;
3372           v->fbi.height = vvis;
3373           v->fbi.xoffs = hbp;
3374           v->fbi.yoffs = vbp;
3375           v->fbi.vsyncscan = (v->reg[vSync].u >> 16) & 0xfff;
3376 
3377           /* if changing dimensions, update video memory layout */
3378           if (regnum == videoDimensions)
3379             recompute_video_memory(v);
3380 
3381           Voodoo_UpdateScreenStart();
3382         }
3383       }
3384       break;
3385 
3386     /* fbiInit0 can only be written if initEnable says we can -- Voodoo/Voodoo2 only */
3387     case fbiInit0:
3388       poly_wait(v->poly, v->regnames[regnum]);
3389       if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable)) {
3390         Voodoo_Output_Enable(data & 1);
3391         if (v->fbi.fifo.enabled != FBIINIT0_ENABLE_MEMORY_FIFO(data)) {
3392           v->fbi.fifo.enabled = FBIINIT0_ENABLE_MEMORY_FIFO(data);
3393           BX_INFO(("memory FIFO now %sabled",
3394                    v->fbi.fifo.enabled ? "en" : "dis"));
3395         }
3396         v->reg[fbiInit0].u = data;
3397         if (FBIINIT0_GRAPHICS_RESET(data))
3398           soft_reset(v);
3399         if (FBIINIT0_FIFO_RESET(data))
3400           fifo_reset(&v->pci.fifo);
3401         recompute_video_memory(v);
3402       }
3403       break;
3404 
3405     /* fbiInitX can only be written if initEnable says we can -- Voodoo/Voodoo2 only */
3406     /* most of these affect memory layout, so always recompute that when done */
3407     case fbiInit1:
3408     case fbiInit2:
3409     case fbiInit4:
3410     case fbiInit5:
3411     case fbiInit6:
3412       poly_wait(v->poly, v->regnames[regnum]);
3413 
3414       if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3415       {
3416         v->reg[regnum].u = data;
3417         recompute_video_memory(v);
3418         v->fbi.video_changed = 1;
3419         v->fbi.clut_dirty = 1;
3420       }
3421       break;
3422 
3423     case fbiInit3:
3424       poly_wait(v->poly, v->regnames[regnum]);
3425       if (v->type <= VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3426       {
3427         v->reg[regnum].u = data;
3428         v->alt_regmap = FBIINIT3_TRI_REGISTER_REMAP(data);
3429         v->fbi.yorigin = FBIINIT3_YORIGIN_SUBTRACT(v->reg[fbiInit3].u);
3430         recompute_video_memory(v);
3431       }
3432       break;
3433 
3434     case fbiInit7:
3435 /*  case swapPending: -- Banshee */
3436       poly_wait(v->poly, v->regnames[regnum]);
3437 
3438       if (v->type == VOODOO_2 && (chips & 1) && INITEN_ENABLE_HW_INIT(v->pci.init_enable))
3439       {
3440         v->fbi.cmdfifo[0].count_holes = !FBIINIT7_DISABLE_CMDFIFO_HOLES(data);
3441         if (v->fbi.cmdfifo[0].enabled != FBIINIT7_CMDFIFO_ENABLE(data)) {
3442           v->fbi.cmdfifo[0].enabled = FBIINIT7_CMDFIFO_ENABLE(data);
3443           BX_INFO(("CMDFIFO now %sabled", v->fbi.cmdfifo[0].enabled ? "en" : "dis"));
3444         }
3445         v->reg[regnum].u = data;
3446       } else if (v->type >= VOODOO_BANSHEE) {
3447         v->fbi.swaps_pending++;
3448       }
3449       break;
3450 
3451     case cmdFifoBaseAddr:
3452       BX_LOCK(cmdfifo_mutex);
3453       v->fbi.cmdfifo[0].base = (data & 0x3ff) << 12;
3454       v->fbi.cmdfifo[0].end = (((data >> 16) & 0x3ff) + 1) << 12;
3455       BX_UNLOCK(cmdfifo_mutex);
3456       break;
3457 
3458     case cmdFifoRdPtr:
3459       BX_LOCK(cmdfifo_mutex);
3460       v->fbi.cmdfifo[0].rdptr = data;
3461       BX_UNLOCK(cmdfifo_mutex);
3462       break;
3463 
3464     case cmdFifoAMin:
3465 /*  case colBufferAddr: -- Banshee */
3466       if (v->type == VOODOO_2 && (chips & 1)) {
3467         BX_LOCK(cmdfifo_mutex);
3468         v->fbi.cmdfifo[0].amin = data;
3469         BX_UNLOCK(cmdfifo_mutex);
3470       } else if (v->type >= VOODOO_BANSHEE && (chips & 1))
3471         v->fbi.rgboffs[1] = data & v->fbi.mask & ~0x0f;
3472       break;
3473 
3474     case cmdFifoAMax:
3475 /*  case colBufferStride: -- Banshee */
3476       if (v->type == VOODOO_2 && (chips & 1)) {
3477         BX_LOCK(cmdfifo_mutex);
3478         v->fbi.cmdfifo[0].amax = data;
3479         BX_UNLOCK(cmdfifo_mutex);
3480       } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3481         if (data & 0x8000)
3482           v->fbi.rowpixels = (data & 0x7f) << 6;
3483         else
3484           v->fbi.rowpixels = (data & 0x3fff) >> 1;
3485       }
3486       break;
3487 
3488     case cmdFifoDepth:
3489 /*  case auxBufferAddr: -- Banshee */
3490       if (v->type == VOODOO_2 && (chips & 1)) {
3491         BX_LOCK(cmdfifo_mutex);
3492         v->fbi.cmdfifo[0].depth = data & 0xffff;
3493         v->fbi.cmdfifo[0].depth_needed = BX_MAX_BIT32U;
3494         BX_UNLOCK(cmdfifo_mutex);
3495       } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3496         v->fbi.auxoffs = data & v->fbi.mask & ~0x0f;
3497       }
3498       break;
3499 
3500     case cmdFifoHoles:
3501 /*  case auxBufferStride: -- Banshee */
3502       if (v->type == VOODOO_2 && (chips & 1)) {
3503         BX_LOCK(cmdfifo_mutex);
3504         v->fbi.cmdfifo[0].holes = data;
3505         BX_UNLOCK(cmdfifo_mutex);
3506       } else if (v->type >= VOODOO_BANSHEE && (chips & 1)) {
3507         Bit32u rowpixels;
3508 
3509         if (data & 0x8000)
3510           rowpixels = (data & 0x7f) << 6;
3511         else
3512           rowpixels = (data & 0x3fff) >> 1;
3513         if (v->fbi.rowpixels != rowpixels)
3514           BX_PANIC(("aux buffer stride differs from color buffer stride"));
3515       }
3516       break;
3517 
3518     case intrCtrl:
3519       BX_ERROR(("Writing to register %s not supported yet", v->regnames[regnum]));
3520       break;
3521 
3522     default:
3523       if (fifo_add_common(FIFO_WR_REG | offset, data)) {
3524         BX_LOCK(fifo_mutex);
3525         if ((regnum == triangleCMD) || (regnum == ftriangleCMD) || (regnum == nopCMD) ||
3526             (regnum == fastfillCMD) || (regnum == swapbufferCMD)) {
3527           v->pci.op_pending++;
3528           if (regnum == swapbufferCMD) {
3529             v->fbi.swaps_pending++;
3530           }
3531           bx_set_sem(&fifo_wakeup);
3532         }
3533         BX_UNLOCK(fifo_mutex);
3534       } else {
3535         register_w(offset, data, 0);
3536       }
3537   }
3538 }
3539 
3540 
register_r(Bit32u offset)3541 Bit32u register_r(Bit32u offset)
3542 {
3543   Bit32u regnum  = (offset) & 0xff;
3544   Bit32u chips   = (offset>>8) & 0xf;
3545 
3546   if (!((voodoo_last_msg == regnum) && (regnum == status))) //show status reg only once
3547     BX_DEBUG(("read chip 0x%x reg 0x%x (%s)", chips, regnum<<2, v->regnames[regnum]));
3548   voodoo_last_msg = regnum;
3549 
3550   /* first make sure this register is readable */
3551   if (!(v->regaccess[regnum] & REGISTER_READ)) {
3552     BX_DEBUG(("Invalid attempt to read %s", v->regnames[regnum]));
3553     return 0;
3554   }
3555   if ((v->type == VOODOO_2) && v->fbi.cmdfifo[0].enabled && ((offset & 0x80000) > 0)) {
3556     BX_DEBUG(("Invalid attempt to read from CMDFIFO"));
3557     return 0;
3558   }
3559 
3560   Bit32u result;
3561 
3562   /* default result is the FBI register value */
3563   result = v->reg[regnum].u;
3564 
3565   /* some registers are dynamic; compute them */
3566   switch (regnum) {
3567     case status:
3568 
3569       /* start with a blank slate */
3570       result = 0;
3571 
3572       /* bits 5:0 are the PCI FIFO free space */
3573       if (fifo_empty_locked(&v->pci.fifo))
3574         result |= 0x3f << 0;
3575       else
3576       {
3577         BX_LOCK(fifo_mutex);
3578         int temp = fifo_space(&v->pci.fifo)/2;
3579         BX_UNLOCK(fifo_mutex);
3580         if (temp > 0x3f)
3581           temp = 0x3f;
3582         result |= temp << 0;
3583       }
3584 
3585       /* bit 6 is the vertical retrace */
3586       result |= (Voodoo_get_retrace(0) > 0) << 6;
3587 
3588       /* bit 7 is FBI graphics engine busy */
3589       if (v->pci.op_pending)
3590         result |= 1 << 7;
3591 
3592       /* bit 8 is TREX busy */
3593       if (v->pci.op_pending)
3594         result |= 1 << 8;
3595 
3596       /* bit 9 is overall busy */
3597       if (v->pci.op_pending)
3598         result |= 1 << 9;
3599 
3600       if (v->type == VOODOO_2) {
3601         if (v->fbi.cmdfifo[0].enabled && v->fbi.cmdfifo[0].depth > 0)
3602           result |= 7 << 7;
3603       }
3604       /* Banshee is different starting here */
3605       if (v->type < VOODOO_BANSHEE)
3606       {
3607         /* bits 11:10 specifies which buffer is visible */
3608         result |= v->fbi.frontbuf << 10;
3609 
3610         /* bits 27:12 indicate memory FIFO freespace */
3611         if (!v->fbi.fifo.enabled || fifo_empty_locked(&v->fbi.fifo))
3612           result |= 0xffff << 12;
3613         else
3614         {
3615           BX_LOCK(fifo_mutex);
3616           int temp = fifo_space(&v->fbi.fifo)/2;
3617           BX_UNLOCK(fifo_mutex);
3618           if (temp > 0xffff)
3619             temp = 0xffff;
3620           result |= temp << 12;
3621         }
3622       }
3623       else
3624       {
3625         /* bit 10 is 2D busy */
3626         if (v->banshee.blt.busy)
3627           result |= 3 << 9;
3628 
3629         /* bit 11 is cmd FIFO 0 busy */
3630         if (v->fbi.cmdfifo[0].enabled && v->fbi.cmdfifo[0].depth > 0)
3631           result |= 5 << 9;
3632 
3633         /* bit 12 is cmd FIFO 1 busy */
3634         if (v->fbi.cmdfifo[1].enabled && v->fbi.cmdfifo[1].depth > 0)
3635           result |= 9 << 9;
3636       }
3637 
3638       /* bits 30:28 are the number of pending swaps */
3639       if (v->fbi.swaps_pending > 7)
3640         result |= 7 << 28;
3641       else
3642         result |= v->fbi.swaps_pending << 28;
3643 
3644       /* bit 31 is not used */
3645 
3646       /* eat some cycles since people like polling here */
3647       cpu_eat_cycles(v->cpu, 1000);
3648       break;
3649 
3650     /* bit 2 of the initEnable register maps this to dacRead */
3651     case fbiInit2:
3652       if (INITEN_REMAP_INIT_TO_DAC(v->pci.init_enable))
3653         result = v->dac.read_result;
3654       break;
3655 
3656     case vRetrace:
3657       result = Voodoo_get_retrace(0) & 0x1fff;
3658       break;
3659 
3660     case hvRetrace:
3661       result = Voodoo_get_retrace(1);
3662       break;
3663 
3664     case cmdFifoBaseAddr:
3665       result = (v->fbi.cmdfifo[0].base >> 12) | ((v->fbi.cmdfifo[0].end >> 12) << 16);
3666       break;
3667 
3668     case cmdFifoRdPtr:
3669       result = v->fbi.cmdfifo[0].rdptr;
3670       break;
3671 
3672     case cmdFifoDepth:
3673       result = v->fbi.cmdfifo[0].depth;
3674       break;
3675 
3676     case cmdFifoAMin:
3677       result = v->fbi.cmdfifo[0].amin;
3678       break;
3679 
3680     case cmdFifoAMax:
3681       result = v->fbi.cmdfifo[0].amax;
3682       break;
3683   }
3684 
3685   return result;
3686 }
3687 
lfb_r(Bit32u offset)3688 Bit32u lfb_r(Bit32u offset)
3689 {
3690   Bit16u *buffer;
3691   Bit32u bufmax;
3692   Bit32u bufoffs;
3693   Bit32u data;
3694   bool forcefront=false;
3695   int x, y, scry;
3696   Bit32u destbuf;
3697 
3698   BX_DEBUG(("read LFB offset 0x%x", offset));
3699 
3700   /* compute X,Y */
3701   x = (offset << 1) & 0x3fe;
3702   y = (offset >> 9) & 0x7ff;
3703 
3704   /* select the target buffer */
3705   destbuf = (v->type >= VOODOO_BANSHEE) ? (!forcefront) : LFBMODE_READ_BUFFER_SELECT(v->reg[lfbMode].u);
3706   switch (destbuf)
3707   {
3708     case 0:     /* front buffer */
3709       buffer = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.frontbuf]);
3710       bufmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.frontbuf]) / 2;
3711       break;
3712 
3713     case 1:     /* back buffer */
3714       buffer = (Bit16u *)(v->fbi.ram + v->fbi.rgboffs[v->fbi.backbuf]);
3715       bufmax = (v->fbi.mask + 1 - v->fbi.rgboffs[v->fbi.backbuf]) / 2;
3716       break;
3717 
3718     case 2:     /* aux buffer */
3719       if (v->fbi.auxoffs == (Bit32u)~0)
3720         return 0xffffffff;
3721       buffer = (Bit16u *)(v->fbi.ram + v->fbi.auxoffs);
3722       bufmax = (v->fbi.mask + 1 - v->fbi.auxoffs) / 2;
3723       break;
3724 
3725     default:    /* reserved */
3726       return 0xffffffff;
3727   }
3728 
3729   /* determine the screen Y */
3730   scry = y;
3731   if (LFBMODE_Y_ORIGIN(v->reg[lfbMode].u))
3732     scry = (v->fbi.yorigin - y) & 0x3ff;
3733 
3734   /* advance pointers to the proper row */
3735   bufoffs = scry * v->fbi.rowpixels + x;
3736   if (bufoffs >= bufmax)
3737     return 0xffffffff;
3738 
3739   /* wait for any outstanding work to finish */
3740   poly_wait(v->poly, "LFB read");
3741 
3742   /* compute the data */
3743   data = buffer[bufoffs + 0] | (buffer[bufoffs + 1] << 16);
3744 
3745   /* word swapping */
3746   if (LFBMODE_WORD_SWAP_READS(v->reg[lfbMode].u))
3747     data = (data << 16) | (data >> 16);
3748 
3749   /* byte swizzling */
3750   if (LFBMODE_BYTE_SWIZZLE_READS(v->reg[lfbMode].u))
3751     data = bx_bswap32(data);
3752 
3753   if (LOG_LFB) BX_DEBUG(("VOODOO.%d.LFB:read (%d,%d) = %08X", v->index, x, y, data));
3754   return data;
3755 }
3756 
voodoo_w(Bit32u offset,Bit32u data,Bit32u mask)3757 void voodoo_w(Bit32u offset, Bit32u data, Bit32u mask)
3758 {
3759   Bit32u type;
3760 
3761   if ((offset & (0xc00000/4)) == 0)
3762     register_w_common(offset, data);
3763   else if (offset & (0x800000/4)) {
3764     if (!fifo_add_common(FIFO_WR_TEX | offset, data)) {
3765       texture_w(offset, data);
3766     }
3767   } else {
3768     if (mask == 0xffffffff) {
3769       type = FIFO_WR_FBI_32;
3770     } else if (mask & 1) {
3771       type = FIFO_WR_FBI_16L;
3772     } else {
3773       type = FIFO_WR_FBI_16H;
3774     }
3775 #if FBI_TRICK
3776     if (!fifo_add_fbi(type | offset, data)) {
3777 #else
3778     if (!fifo_add_common(type | offset, data)) {
3779 #endif
3780       lfb_w(offset, data, mask);
3781     }
3782   }
3783 }
3784 
3785 Bit32u voodoo_r(Bit32u offset)
3786 {
3787   if (!(offset & (0xc00000/4)))
3788     return register_r(offset);
3789   else
3790     return lfb_r(offset);
3791 
3792   return 0xffffffff;
3793 }
3794 
3795 void init_tmu(voodoo_state *v, tmu_state *t, voodoo_reg *reg, void *memory, int tmem)
3796 {
3797   /* allocate texture RAM */
3798   t->ram = (Bit8u *)memory;
3799   t->mask = tmem - 1;
3800   t->reg = reg;
3801   t->regdirty = 1;
3802   t->bilinear_mask = (v->type >= VOODOO_2) ? 0xff : 0xf0;
3803 
3804   /* mark the NCC tables dirty and configure their registers */
3805   t->ncc[0].dirty = t->ncc[1].dirty = 1;
3806   t->ncc[0].reg = &t->reg[nccTable+0];
3807   t->ncc[1].reg = &t->reg[nccTable+12];
3808 
3809   /* create pointers to all the tables */
3810   t->texel[0] = v->tmushare.rgb332;
3811   t->texel[1] = t->ncc[0].texel;
3812   t->texel[2] = v->tmushare.alpha8;
3813   t->texel[3] = v->tmushare.int8;
3814   t->texel[4] = v->tmushare.ai44;
3815   t->texel[5] = t->palette;
3816   t->texel[6] = (v->type >= VOODOO_2) ? t->palettea : NULL;
3817   t->texel[7] = NULL;
3818   t->texel[8] = v->tmushare.rgb332;
3819   t->texel[9] = t->ncc[0].texel;
3820   t->texel[10] = v->tmushare.rgb565;
3821   t->texel[11] = v->tmushare.argb1555;
3822   t->texel[12] = v->tmushare.argb4444;
3823   t->texel[13] = v->tmushare.int8;
3824   t->texel[14] = t->palette;
3825   t->texel[15] = NULL;
3826   t->lookup = t->texel[0];
3827 
3828   /* attach the palette to NCC table 0 */
3829   t->ncc[0].palette = t->palette;
3830   if (v->type >= VOODOO_2)
3831     t->ncc[0].palettea = t->palettea;
3832 
3833   /* set up texture address calculations */
3834   if (v->type <= VOODOO_2)
3835   {
3836     t->texaddr_mask = 0x0fffff;
3837     t->texaddr_shift = 3;
3838   } else {
3839     t->texaddr_mask = 0xfffff0;
3840     t->texaddr_shift = 0;
3841   }
3842 }
3843 
3844 void init_tmu_shared(tmu_shared_state *s)
3845 {
3846   int val;
3847 
3848   /* build static 8-bit texel tables */
3849   for (val = 0; val < 256; val++) {
3850     int r, g, b, a;
3851 
3852     /* 8-bit RGB (3-3-2) */
3853     EXTRACT_332_TO_888(val, r, g, b);
3854     s->rgb332[val] = MAKE_ARGB(0xff, r, g, b);
3855 
3856     /* 8-bit alpha */
3857     s->alpha8[val] = MAKE_ARGB(val, val, val, val);
3858 
3859     /* 8-bit intensity */
3860     s->int8[val] = MAKE_ARGB(0xff, val, val, val);
3861 
3862     /* 8-bit alpha, intensity */
3863     a = ((val >> 0) & 0xf0) | ((val >> 4) & 0x0f);
3864     r = ((val << 4) & 0xf0) | ((val << 0) & 0x0f);
3865     s->ai44[val] = MAKE_ARGB(a, r, r, r);
3866   }
3867 
3868   /* build static 16-bit texel tables */
3869   for (val = 0; val < 65536; val++) {
3870     int r, g, b, a;
3871 
3872     /* table 10 = 16-bit RGB (5-6-5) */
3873     EXTRACT_565_TO_888(val, r, g, b);
3874     s->rgb565[val] = MAKE_ARGB(0xff, r, g, b);
3875 
3876     /* table 11 = 16 ARGB (1-5-5-5) */
3877     EXTRACT_1555_TO_8888(val, a, r, g, b);
3878     s->argb1555[val] = MAKE_ARGB(a, r, g, b);
3879 
3880     /* table 12 = 16-bit ARGB (4-4-4-4) */
3881     EXTRACT_4444_TO_8888(val, a, r, g, b);
3882     s->argb4444[val] = MAKE_ARGB(a, r, g, b);
3883   }
3884 }
3885 
3886 #define SETUP_BITBLT(num, name, flags) \
3887   do { \
3888     v->banshee.blt.rop_handler[0][num] = bitblt_rop_fwd_##name; \
3889     v->banshee.blt.rop_handler[1][num] = bitblt_rop_bkwd_##name; \
3890     v->banshee.blt.rop_flags[num] = flags; \
3891   } while (0);
3892 
3893 void banshee_bitblt_init()
3894 {
3895   for (int i = 0; i < 0x100; i++) {
3896     SETUP_BITBLT(i, nop, BX_ROP_PATTERN);
3897   }
3898   SETUP_BITBLT(0x00, 0, 0);                              // 0
3899   SETUP_BITBLT(0x05, notsrc_and_notdst, BX_ROP_PATTERN); // PSan
3900   SETUP_BITBLT(0x0a, notsrc_and_dst, BX_ROP_PATTERN);    // DPna
3901   SETUP_BITBLT(0x0f, notsrc, BX_ROP_PATTERN);            // Pn
3902   SETUP_BITBLT(0x11, notsrc_and_notdst, 0);              // DSon
3903   SETUP_BITBLT(0x22, notsrc_and_dst, 0);                 // DSna
3904   SETUP_BITBLT(0x33, notsrc, 0);                         // Sn
3905   SETUP_BITBLT(0x44, src_and_notdst, 0);                 // SDna
3906   SETUP_BITBLT(0x50, src_and_notdst, 0);                 // PDna
3907   SETUP_BITBLT(0x55, notdst, 0);                         // Dn
3908   SETUP_BITBLT(0x5a, src_xor_dst, BX_ROP_PATTERN);       // DPx
3909   SETUP_BITBLT(0x5f, notsrc_or_notdst, BX_ROP_PATTERN);  // DSan
3910   SETUP_BITBLT(0x66, src_xor_dst, 0);                    // DSx
3911   SETUP_BITBLT(0x77, notsrc_or_notdst, 0);               // DSan
3912   SETUP_BITBLT(0x88, src_and_dst, 0);                    // DSa
3913   SETUP_BITBLT(0x99, src_notxor_dst, 0);                 // DSxn
3914   SETUP_BITBLT(0xaa, nop, 0);                            // D
3915   SETUP_BITBLT(0xad, src_and_dst, BX_ROP_PATTERN);       // DPa
3916   SETUP_BITBLT(0xaf, notsrc_or_dst, BX_ROP_PATTERN);     // DPno
3917   SETUP_BITBLT(0xbb, notsrc_or_dst, 0);                  // DSno
3918   SETUP_BITBLT(0xcc, src, 0);                            // S
3919   SETUP_BITBLT(0xdd, src_and_notdst, 0);                 // SDna
3920   SETUP_BITBLT(0xee, src_or_dst, 0);                     // DSo
3921   SETUP_BITBLT(0xf0, src, BX_ROP_PATTERN);               // P
3922   SETUP_BITBLT(0xf5, src_or_notdst, BX_ROP_PATTERN);     // PDno
3923   SETUP_BITBLT(0xfa, src_or_dst, BX_ROP_PATTERN);        // DPo
3924   SETUP_BITBLT(0xff, 1, 0);                              // 1
3925 }
3926 
3927 void voodoo_init(Bit8u _type)
3928 {
3929   int pen;
3930   int val;
3931 
3932   v->reg[lfbMode].u = 0;
3933   v->reg[fbiInit0].u = (1 << 4) | (0x10 << 6);
3934   v->reg[fbiInit1].u = (1 << 1) | (1 << 8) | (1 << 12) | (2 << 20);
3935   v->reg[fbiInit2].u = (1 << 6) | (0x100 << 23);
3936   v->reg[fbiInit3].u = (2 << 13) | (0xf << 17);
3937   v->reg[fbiInit4].u = (1 << 0);
3938   v->type = _type;
3939   v->chipmask = 0x01 | 0x02 | 0x04 | 0x08;
3940   switch (v->type) {
3941     case VOODOO_1:
3942       v->regaccess = voodoo_register_access;
3943       v->regnames = voodoo_reg_name;
3944       v->alt_regmap = 0;
3945       v->fbi.lfb_stride = 10;
3946       break;
3947 
3948     case VOODOO_2:
3949       v->regaccess = voodoo2_register_access;
3950       v->regnames = voodoo_reg_name;
3951       v->alt_regmap = 0;
3952       v->fbi.lfb_stride = 10;
3953       break;
3954 
3955     case VOODOO_BANSHEE:
3956       v->regaccess = banshee_register_access;
3957       v->regnames = banshee_reg_name;
3958       v->alt_regmap = 1;
3959       v->fbi.lfb_stride = 11;
3960       v->chipmask = 0x01 | 0x02;
3961       break;
3962 
3963     case VOODOO_3:
3964       v->regaccess = banshee_register_access;
3965       v->regnames = banshee_reg_name;
3966       v->alt_regmap = 1;
3967       v->fbi.lfb_stride = 11;
3968       v->chipmask = 0x01 | 0x02 | 0x04;
3969       break;
3970   }
3971   memset(v->dac.reg, 0, sizeof(v->dac.reg));
3972   v->dac.read_result = 0;
3973   v->dac.clk0_m = 0x37;
3974   v->dac.clk0_n = 0x02;
3975   v->dac.clk0_p = 0x03;
3976 
3977   /* set up the PCI FIFO */
3978   v->pci.fifo.base = v->pci.fifo_mem;
3979   v->pci.fifo.size = 64*2;
3980   v->pci.fifo.in = v->pci.fifo.out = 0;
3981 
3982   /* create a table of precomputed 1/n and log2(n) values */
3983   /* n ranges from 1.0000 to 2.0000 */
3984   for (val = 0; val <= (1 << RECIPLOG_LOOKUP_BITS); val++) {
3985     Bit32u value = (1 << RECIPLOG_LOOKUP_BITS) + val;
3986     voodoo_reciplog[val*2 + 0] = (1 << (RECIPLOG_LOOKUP_PREC + RECIPLOG_LOOKUP_BITS)) / value;
3987     voodoo_reciplog[val*2 + 1] = (Bit32u)(LOGB2((double)value / (double)(1 << RECIPLOG_LOOKUP_BITS)) * (double)(1 << RECIPLOG_LOOKUP_PREC));
3988   }
3989 
3990   /* create dithering tables */
3991   for (int val = 0; val < 256*16*2; val++) {
3992     int g = (val >> 0) & 1;
3993     int x = (val >> 1) & 3;
3994     int color = (val >> 3) & 0xff;
3995     int y = (val >> 11) & 3;
3996 
3997     if (!g) {
3998       dither4_lookup[val] = DITHER_RB(color, dither_matrix_4x4[y * 4 + x]) >> 3;
3999       dither2_lookup[val] = DITHER_RB(color, dither_matrix_2x2[y * 4 + x]) >> 3;
4000     } else {
4001       dither4_lookup[val] = DITHER_G(color, dither_matrix_4x4[y * 4 + x]) >> 2;
4002       dither2_lookup[val] = DITHER_G(color, dither_matrix_2x2[y * 4 + x]) >> 2;
4003     }
4004   }
4005 
4006   /* init the pens */
4007   v->fbi.clut_dirty = 1;
4008   if (v->type <= VOODOO_2) {
4009     for (pen = 0; pen < 32; pen++)
4010       v->fbi.clut[pen] = MAKE_ARGB(pen, pal5bit(pen), pal5bit(pen), pal5bit(pen));
4011     v->fbi.clut[32] = MAKE_ARGB(32,0xff,0xff,0xff);
4012   } else {
4013     for (pen = 0; pen < 512; pen++)
4014       v->fbi.clut[pen] = MAKE_RGB(pen,pen,pen);
4015   }
4016   if (v->type < VOODOO_BANSHEE) {
4017     v->fbi.ram = (Bit8u*)malloc(4<<20);
4018     v->fbi.mask = (4<<20)-1;
4019   } else {
4020     v->fbi.ram = (Bit8u*)malloc(16<<20);
4021     v->fbi.mask = (16<<20)-1;
4022   }
4023   v->fbi.frontbuf = 0;
4024   v->fbi.backbuf = 1;
4025   v->fbi.width = 640;
4026   v->fbi.height = 480;
4027   v->fbi.rowpixels = v->fbi.width;
4028   v->fbi.fogdelta_mask = (v->type < VOODOO_2) ? 0xff : 0xfc;
4029 
4030   /* build shared TMU tables */
4031   init_tmu_shared(&v->tmushare);
4032 
4033   init_tmu(v, &v->tmu[0], &v->reg[0x100], 0, 4 << 20);
4034   init_tmu(v, &v->tmu[1], &v->reg[0x200], 0, 4 << 20);
4035 
4036   v->tmu[0].reg = &v->reg[0x100];
4037   v->tmu[1].reg = &v->reg[0x200];
4038 
4039   if (v->type < VOODOO_BANSHEE) {
4040     v->tmu[0].ram = (Bit8u*)malloc(4<<20);
4041     v->tmu[1].ram = (Bit8u*)malloc(4<<20);
4042     v->tmu[0].mask = (4<<20)-1;
4043     v->tmu[1].mask = (4<<20)-1;
4044   } else {
4045     v->tmu[0].ram = v->fbi.ram;
4046     v->tmu[1].ram = v->fbi.ram;
4047     v->tmu[0].mask = (16<<20)-1;
4048     v->tmu[1].mask = (16<<20)-1;
4049   }
4050 
4051   v->tmu_config = 64;
4052 
4053   v->thread_stats = new stats_block[16];
4054 
4055   soft_reset(v);
4056 }
4057 
4058 void update_pens(void)
4059 {
4060   int x, y;
4061 
4062   /* if the CLUT is dirty, recompute the pens array */
4063   if (v->fbi.clut_dirty) {
4064     Bit8u rtable[32], gtable[64], btable[32];
4065 
4066     /* Voodoo/Voodoo-2 have an internal 33-entry CLUT */
4067     if (v->type <= VOODOO_2) {
4068       /* kludge: some of the Midway games write 0 to the last entry when they obviously mean FF */
4069       if ((v->fbi.clut[32] & 0xffffff) == 0 && (v->fbi.clut[31] & 0xffffff) != 0)
4070         v->fbi.clut[32] = 0x20ffffff;
4071 
4072       /* compute the R/G/B pens first */
4073       for (x = 0; x < 32; x++) {
4074         /* treat X as a 5-bit value, scale up to 8 bits, and linear interpolate for red/blue */
4075         y = (x << 3) | (x >> 2);
4076         rtable[x] = (RGB_RED(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_RED(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4077         btable[x] = (RGB_BLUE(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_BLUE(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4078 
4079         /* treat X as a 6-bit value with LSB=0, scale up to 8 bits, and linear interpolate */
4080         y = (x * 2) + 0;
4081         y = (y << 2) | (y >> 4);
4082         gtable[x*2+0] = (RGB_GREEN(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_GREEN(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4083 
4084         /* treat X as a 6-bit value with LSB=1, scale up to 8 bits, and linear interpolate */
4085         y = (x * 2) + 1;
4086         y = (y << 2) | (y >> 4);
4087         gtable[x*2+1] = (RGB_GREEN(v->fbi.clut[y >> 3]) * (8 - (y & 7)) + RGB_GREEN(v->fbi.clut[(y >> 3) + 1]) * (y & 7)) >> 3;
4088       }
4089     }
4090 
4091     /* Banshee and later have a 512-entry CLUT that can be bypassed */
4092     else
4093     {
4094       int mode3d = (v->banshee.io[io_vidProcCfg] >> 8) & 1;
4095       int which = (v->banshee.io[io_vidProcCfg] >> (12 + mode3d)) & 1;
4096       int bypass = (v->banshee.io[io_vidProcCfg] >> (10 + mode3d)) & 1;
4097 
4098       /* compute R/G/B pens first */
4099       for (x = 0; x < 32; x++) {
4100         /* treat X as a 5-bit value, scale up to 8 bits */
4101         y = (x << 3) | (x >> 2);
4102         rtable[x] = bypass ? y : RGB_RED(v->fbi.clut[which * 256 + y]);
4103         btable[x] = bypass ? y : RGB_BLUE(v->fbi.clut[which * 256 + y]);
4104 
4105         /* treat X as a 6-bit value with LSB=0, scale up to 8 bits */
4106         y = (x * 2) + 0;
4107         y = (y << 2) | (y >> 4);
4108         gtable[x*2+0] = bypass ? y : RGB_GREEN(v->fbi.clut[which * 256 + y]);
4109 
4110         /* treat X as a 6-bit value with LSB=1, scale up to 8 bits, and linear interpolate */
4111         y = (x * 2) + 1;
4112         y = (y << 2) | (y >> 4);
4113         gtable[x*2+1] = bypass ? y : RGB_GREEN(v->fbi.clut[which * 256 + y]);
4114       }
4115     }
4116 
4117     /* now compute the actual pens array */
4118     for (x = 0; x < 65536; x++) {
4119       int r = rtable[(x >> 11) & 0x1f];
4120       int g = gtable[(x >> 5) & 0x3f];
4121       int b = btable[x & 0x1f];
4122       v->fbi.pen[x] = MAKE_RGB(r, g, b);
4123     }
4124     /* no longer dirty */
4125     v->fbi.clut_dirty = 0;
4126   }
4127 }
4128