1 /*
2  * Copyright 2005 Eric Anholt
3  * Copyright 2005 Benjamin Herrenschmidt
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Eric Anholt <anholt@FreeBSD.org>
27  *    Zack Rusin <zrusin@trolltech.com>
28  *    Benjamin Herrenschmidt <benh@kernel.crashing.org>
29  *    Alex Deucher <alexander.deucher@amd.com>
30  *
31  */
32 
33 struct blendinfo {
34     Bool dst_alpha;
35     Bool src_alpha;
36     uint32_t blend_cntl;
37 };
38 
39 static struct blendinfo RadeonBlendOp[] = {
40     /* Clear */
41     {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
42     /* Src */
43     {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
44     /* Dst */
45     {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
46     /* Over */
47     {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
48     /* OverReverse */
49     {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
50     /* In */
51     {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
52     /* InReverse */
53     {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
54     /* Out */
55     {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
56     /* OutReverse */
57     {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
58     /* Atop */
59     {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
60     /* AtopReverse */
61     {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
62     /* Xor */
63     {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
64     /* Add */
65     {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
66 };
67 
68 struct formatinfo {
69     int fmt;
70     uint32_t card_fmt;
71 };
72 
73 /* Note on texture formats:
74  * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
75  */
76 static struct formatinfo R100TexFormats[] = {
77 	{PICT_a8r8g8b8,	RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
78 	{PICT_x8r8g8b8,	RADEON_TXFORMAT_ARGB8888},
79 	{PICT_r5g6b5,	RADEON_TXFORMAT_RGB565},
80 	{PICT_a1r5g5b5,	RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
81 	{PICT_x1r5g5b5,	RADEON_TXFORMAT_ARGB1555},
82 	{PICT_a8,	RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
83 };
84 
85 static struct formatinfo R200TexFormats[] = {
86     {PICT_a8r8g8b8,	R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
87     {PICT_x8r8g8b8,	R200_TXFORMAT_ARGB8888},
88     {PICT_a8b8g8r8,	R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP},
89     {PICT_x8b8g8r8,	R200_TXFORMAT_ABGR8888},
90     {PICT_r5g6b5,	R200_TXFORMAT_RGB565},
91     {PICT_a1r5g5b5,	R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
92     {PICT_x1r5g5b5,	R200_TXFORMAT_ARGB1555},
93     {PICT_a8,		R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
94 };
95 
96 static struct formatinfo R300TexFormats[] = {
97     {PICT_a8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)},
98     {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
99     {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
100     {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
101     {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
102     {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
103     {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
104     {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
105     {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
106     {PICT_a8,		R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)},
107 };
108 
109 /* Common Radeon setup code */
110 
RADEONGetDestFormat(PicturePtr pDstPicture,uint32_t * dst_format)111 static Bool RADEONGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
112 {
113     switch (pDstPicture->format) {
114     case PICT_a8r8g8b8:
115     case PICT_x8r8g8b8:
116 	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
117 	break;
118     case PICT_r5g6b5:
119 	*dst_format = RADEON_COLOR_FORMAT_RGB565;
120 	break;
121     case PICT_a1r5g5b5:
122     case PICT_x1r5g5b5:
123 	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
124 	break;
125     case PICT_a8:
126 	*dst_format = RADEON_COLOR_FORMAT_RGB8;
127 	break;
128     default:
129 	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
130 			(int)pDstPicture->format));
131     }
132 
133     return TRUE;
134 }
135 
R300GetDestFormat(PicturePtr pDstPicture,uint32_t * dst_format)136 static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
137 {
138     switch (pDstPicture->format) {
139     case PICT_a8r8g8b8:
140     case PICT_x8r8g8b8:
141     case PICT_a8b8g8r8:
142     case PICT_x8b8g8r8:
143     case PICT_b8g8r8a8:
144     case PICT_b8g8r8x8:
145 	*dst_format = R300_COLORFORMAT_ARGB8888;
146 	break;
147     case PICT_r5g6b5:
148 	*dst_format = R300_COLORFORMAT_RGB565;
149 	break;
150     case PICT_a1r5g5b5:
151     case PICT_x1r5g5b5:
152 	*dst_format = R300_COLORFORMAT_ARGB1555;
153 	break;
154     case PICT_a8:
155 	*dst_format = R300_COLORFORMAT_I8;
156 	break;
157     default:
158 	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
159 	       (int)pDstPicture->format));
160     }
161     return TRUE;
162 }
163 
RADEONGetBlendCntl(int op,PicturePtr pMask,uint32_t dst_format)164 static uint32_t RADEONGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
165 {
166     uint32_t sblend, dblend;
167 
168     sblend = RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK;
169     dblend = RadeonBlendOp[op].blend_cntl & RADEON_DST_BLEND_MASK;
170 
171     /* If there's no dst alpha channel, adjust the blend op so that we'll treat
172      * it as always 1.
173      */
174     if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
175 	if (sblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
176 	    sblend = RADEON_SRC_BLEND_GL_ONE;
177 	else if (sblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA)
178 	    sblend = RADEON_SRC_BLEND_GL_ZERO;
179     }
180 
181     /* If the source alpha is being used, then we should only be in a case where
182      * the source blend factor is 0, and the source blend value is the mask
183      * channels multiplied by the source picture's alpha.
184      */
185     if (pMask && pMask->componentAlpha && RadeonBlendOp[op].src_alpha) {
186 	if (dblend == RADEON_DST_BLEND_GL_SRC_ALPHA) {
187 	    dblend = RADEON_DST_BLEND_GL_SRC_COLOR;
188 	} else if (dblend == RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA) {
189 	    dblend = RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
190 	}
191     }
192 
193     return sblend | dblend;
194 }
195 
196 union intfloat {
197     float f;
198     uint32_t i;
199 };
200 
201 /* Check if we need a software-fallback because of a repeating
202  *   non-power-of-two texture.
203  *
204  * canTile: whether we can emulate a repeat by drawing in tiles:
205  *   possible for the source, but not for the mask. (Actually
206  *   we could do tiling for the mask too, but dealing with the
207  *   combination of a tiled mask and a tiled source would be
208  *   a lot of complexity, so we handle only the most common
209  *   case of a repeating mask.)
210  */
RADEONCheckTexturePOT(PicturePtr pPict,Bool canTile)211 static Bool RADEONCheckTexturePOT(PicturePtr pPict, Bool canTile)
212 {
213     int w = pPict->pDrawable->width;
214     int h = pPict->pDrawable->height;
215     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
216 
217     if ((repeatType == RepeatNormal || repeatType == RepeatReflect) &&
218 	((w & (w - 1)) != 0 || (h & (h - 1)) != 0) &&
219 	!(repeatType == RepeatNormal && !pPict->transform && canTile))
220 	RADEON_FALLBACK(("NPOT repeating %s unsupported (%dx%d), transform=%d\n",
221 			 canTile ? "source" : "mask", w, h, pPict->transform != 0));
222 
223     return TRUE;
224 }
225 
226 /* Determine if the pitch of the pixmap meets the criteria for being
227  * used as a repeating texture: no padding or only a single line texture.
228  */
RADEONPitchMatches(PixmapPtr pPix)229 static Bool RADEONPitchMatches(PixmapPtr pPix)
230 {
231     int w = pPix->drawable.width;
232     int h = pPix->drawable.height;
233     uint32_t txpitch = exaGetPixmapPitch(pPix);
234 
235     if (h > 1 && (RADEON_ALIGN(w * pPix->drawable.bitsPerPixel / 8, 32)) != txpitch)
236 	return FALSE;
237 
238     return TRUE;
239 }
240 
241 /* We can't turn on repeats normally for a non-power-of-two dimension,
242  * but if the source isn't transformed, we can get the same effect
243  * by drawing the image in multiple tiles. (A common case that it's
244  * important to get right is drawing a strip of a NPOTxPOT texture
245  * repeating in the POT direction. With tiling, this ends up as a
246  * a single tile on R300 and newer, which is perfect.)
247  *
248  * canTile1d: On R300 and newer, we can repeat a texture that is NPOT in
249  *   one direction and POT in the other in the POT direction; on
250  *   older chips we can only repeat at all if the texture is POT in
251  *   both directions.
252  *
253  * needMatchingPitch: On R100/R200, we can only repeat horizontally if
254  *   there is no padding in the texture. Textures with small POT widths
255  *   (1,2,4,8) thus can't be tiled.
256  */
RADEONSetupSourceTile(PicturePtr pPict,PixmapPtr pPix,Bool canTile1d,Bool needMatchingPitch)257 static Bool RADEONSetupSourceTile(PicturePtr pPict,
258 				  PixmapPtr pPix,
259 				  Bool canTile1d,
260 				  Bool needMatchingPitch)
261 {
262     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
263     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
264 
265     info->accel_state->need_src_tile_x = info->accel_state->need_src_tile_y = FALSE;
266     info->accel_state->src_tile_width = info->accel_state->src_tile_height = 65536; /* "infinite" */
267 
268     if (repeatType == RepeatNormal || repeatType == RepeatReflect) {
269 	Bool badPitch = needMatchingPitch && !RADEONPitchMatches(pPix);
270 
271 	int w = pPict->pDrawable ? pPict->pDrawable->width : 1;
272 	int h = pPict->pDrawable ? pPict->pDrawable->height : 1;
273 
274 	if (pPict->transform) {
275 	    if (badPitch)
276 		RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
277 				 w, (unsigned)exaGetPixmapPitch(pPix)));
278 	} else {
279 	    info->accel_state->need_src_tile_x = (w & (w - 1)) != 0 || badPitch;
280 	    info->accel_state->need_src_tile_y = (h & (h - 1)) != 0;
281 
282 	    if ((info->accel_state->need_src_tile_x ||
283 		 info->accel_state->need_src_tile_y) &&
284 		repeatType != RepeatNormal)
285 		RADEON_FALLBACK(("Can only tile RepeatNormal at this time\n"));
286 
287 	    if (!canTile1d)
288 		info->accel_state->need_src_tile_x =
289 		    info->accel_state->need_src_tile_y =
290 		    info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y;
291 	}
292 
293 	if (info->accel_state->need_src_tile_x)
294 	    info->accel_state->src_tile_width = w;
295 	if (info->accel_state->need_src_tile_y)
296 	    info->accel_state->src_tile_height = h;
297     }
298 
299     return TRUE;
300 }
301 
302 /* R100-specific code */
303 
R100CheckCompositeTexture(PicturePtr pPict,PicturePtr pDstPict,int op,int unit)304 static Bool R100CheckCompositeTexture(PicturePtr pPict,
305 				      PicturePtr pDstPict,
306 				      int op,
307 				      int unit)
308 {
309     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
310     int i;
311 
312     for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
313 	if (R100TexFormats[i].fmt == pPict->format)
314 	    break;
315     }
316     if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
317 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
318 			(int)pPict->format));
319 
320     if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
321 	return FALSE;
322 
323     if (pPict->filter != PictFilterNearest &&
324 	pPict->filter != PictFilterBilinear)
325     {
326 	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
327     }
328 
329     /* for REPEAT_NONE, Render semantics are that sampling outside the source
330      * picture results in alpha=0 pixels. We can implement this with a border color
331      * *if* our source texture has an alpha channel, otherwise we need to fall
332      * back. If we're not transformed then we hope that upper layers have clipped
333      * rendering to the bounds of the source drawable, in which case it doesn't
334      * matter. I have not, however, verified that the X server always does such
335      * clipping.
336      */
337     if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
338 	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
339 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
340     }
341 
342     if (!radeon_transform_is_affine_or_scaled(pPict->transform))
343 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
344 
345     return TRUE;
346 }
347 
R100TextureSetup(PicturePtr pPict,PixmapPtr pPix,int unit)348 static Bool R100TextureSetup(PicturePtr pPict, PixmapPtr pPix,
349 					int unit)
350 {
351     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
352     uint32_t txfilter, txformat, txoffset, txpitch;
353     unsigned int repeatType;
354     Bool repeat;
355     int i, w, h;
356     struct radeon_exa_pixmap_priv *driver_priv;
357 
358     if (pPict->pDrawable) {
359 	w = pPict->pDrawable->width;
360 	h = pPict->pDrawable->height;
361 	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
362     } else {
363 	w = h = 1;
364 	repeatType = RepeatNormal;
365     }
366 
367     repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
368 	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
369 
370     txpitch = exaGetPixmapPitch(pPix);
371     txoffset = 0;
372 
373     if ((txpitch & 0x1f) != 0)
374 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
375 
376     for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
377     {
378 	if (R100TexFormats[i].fmt == pPict->format)
379 	    break;
380     }
381     txformat = R100TexFormats[i].card_fmt;
382     if (RADEONPixmapIsColortiled(pPix))
383 	txoffset |= RADEON_TXO_MACRO_TILE;
384 
385     if (repeat) {
386 	if (!RADEONPitchMatches(pPix))
387 	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
388 			     w, (unsigned)txpitch));
389 
390 	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
391 	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
392     } else
393 	txformat |= RADEON_TXFORMAT_NON_POWER2;
394     txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
395 
396     info->accel_state->texW[unit] = w;
397     info->accel_state->texH[unit] = h;
398 
399     switch (pPict->filter) {
400     case PictFilterNearest:
401 	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
402 	break;
403     case PictFilterBilinear:
404 	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
405 	break;
406     default:
407 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
408     }
409 
410     switch (repeatType) {
411 	case RepeatNormal:
412 	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
413 		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
414 	    else
415 	        txfilter |= RADEON_CLAMP_S_WRAP | RADEON_CLAMP_T_WRAP;
416 	    break;
417 	case RepeatPad:
418 	    txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
419 	    break;
420 	case RepeatReflect:
421 	    txfilter |= RADEON_CLAMP_S_MIRROR | RADEON_CLAMP_T_MIRROR;
422 	    break;
423 	case RepeatNone:
424 	    /* don't set an illegal clamp mode for rects */
425 	    if (txformat & RADEON_TXFORMAT_NON_POWER2)
426 		txfilter |= RADEON_CLAMP_S_CLAMP_LAST | RADEON_CLAMP_T_CLAMP_LAST;
427 	    break;
428     }
429 
430     BEGIN_ACCEL_RELOC(5, 1);
431     if (unit == 0) {
432 	OUT_RING_REG(RADEON_PP_TXFILTER_0, txfilter);
433 	OUT_RING_REG(RADEON_PP_TXFORMAT_0, txformat);
434 	OUT_RING_REG(RADEON_PP_TEX_SIZE_0,
435 	    (pPix->drawable.width - 1) |
436 	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
437 	OUT_RING_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
438 
439 	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
440 	/* emit a texture relocation */
441     } else {
442 	OUT_RING_REG(RADEON_PP_TXFILTER_1, txfilter);
443 	OUT_RING_REG(RADEON_PP_TXFORMAT_1, txformat);
444 
445 	OUT_RING_REG(RADEON_PP_TEX_SIZE_1,
446 	    (pPix->drawable.width - 1) |
447 	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
448 	OUT_RING_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
449 	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
450 	/* emit a texture relocation */
451     }
452     ADVANCE_RING();
453 
454     if (pPict->transform != 0) {
455 	info->accel_state->is_transform[unit] = TRUE;
456 	info->accel_state->transform[unit] = pPict->transform;
457     } else {
458 	info->accel_state->is_transform[unit] = FALSE;
459     }
460 
461     return TRUE;
462 }
463 
R100CheckComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture)464 static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
465 			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
466 {
467     PixmapPtr pSrcPixmap, pDstPixmap;
468     uint32_t tmp1;
469 
470     /* Check for unsupported compositing operations. */
471     if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
472 	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
473 
474     pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
475 
476     if (pDstPixmap->drawable.width > 2048 ||
477 	pDstPixmap->drawable.height > 2048) {
478 	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
479 			 pDstPixmap->drawable.width,
480 			 pDstPixmap->drawable.height));
481     }
482 
483     if (pSrcPicture->pDrawable) {
484 	/* r100 limit should be 2048, there are issues with 2048
485 	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
486 	 */
487 	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
488 
489 	if (pSrcPixmap->drawable.width > 2048 ||
490 	    pSrcPixmap->drawable.height > 2048) {
491 	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
492 			     pSrcPixmap->drawable.width,
493 			     pSrcPixmap->drawable.height));
494 	}
495     } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
496 	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
497 
498     if (pMaskPicture) {
499 	PixmapPtr pMaskPixmap;
500 
501 	if (pMaskPicture->pDrawable) {
502 	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
503 
504 	    if (pMaskPixmap->drawable.width > 2048 ||
505 		pMaskPixmap->drawable.height > 2048) {
506 		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
507 				 pMaskPixmap->drawable.width,
508 				 pMaskPixmap->drawable.height));
509 	    }
510 	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
511 	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
512 
513 	if (pMaskPicture->componentAlpha) {
514 	    /* Check if it's component alpha that relies on a source alpha and
515 	     * on the source value.  We can only get one of those into the
516 	     * single source value that we get to blend with.
517 	     */
518 	    if (RadeonBlendOp[op].src_alpha &&
519 		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
520 		RADEON_SRC_BLEND_GL_ZERO) {
521 		RADEON_FALLBACK(("Component alpha not supported with source "
522 				 "alpha and source value blending.\n"));
523 	    }
524 	}
525 
526 	if (!R100CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
527 	    return FALSE;
528     }
529 
530     if (!R100CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
531 	return FALSE;
532 
533     if (!RADEONGetDestFormat(pDstPicture, &tmp1))
534 	return FALSE;
535 
536     return TRUE;
537 }
538 
539 static Bool
RADEONPrepareCompositeCS(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture,PixmapPtr pSrc,PixmapPtr pMask,PixmapPtr pDst)540 RADEONPrepareCompositeCS(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
541 			    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask,
542 			    PixmapPtr pDst)
543 {
544     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
545     int ret;
546 
547     info->accel_state->composite_op = op;
548     info->accel_state->dst_pic = pDstPicture;
549     info->accel_state->msk_pic = pMaskPicture;
550     info->accel_state->src_pic = pSrcPicture;
551     info->accel_state->dst_pix = pDst;
552     info->accel_state->msk_pix = pMask;
553     info->accel_state->src_pix = pSrc;
554 
555     radeon_cs_space_reset_bos(info->cs);
556 
557     radeon_add_pixmap(info->cs, pSrc,
558 		      RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
559 
560     if (pMask)
561 	radeon_add_pixmap(info->cs, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
562 
563     radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
564 
565     ret = radeon_cs_space_check(info->cs);
566     if (ret)
567 	RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
568 
569     return TRUE;
570 }
571 
R100PrepareComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture,PixmapPtr pSrc,PixmapPtr pMask,PixmapPtr pDst)572 static Bool R100PrepareComposite(int op,
573 					    PicturePtr pSrcPicture,
574 					    PicturePtr pMaskPicture,
575 					    PicturePtr pDstPicture,
576 					    PixmapPtr pSrc,
577 					    PixmapPtr pMask,
578 					    PixmapPtr pDst)
579 {
580     ScreenPtr pScreen = pDst->drawable.pScreen;
581     RINFO_FROM_SCREEN(pScreen);
582     uint32_t dst_format, dst_pitch, colorpitch;
583     uint32_t pp_cntl, blendcntl, cblend, ablend;
584     int pixel_shift;
585     struct radeon_exa_pixmap_priv *driver_priv;
586 
587     TRACE;
588 
589     if (!RADEONGetDestFormat(pDstPicture, &dst_format))
590 	return FALSE;
591 
592     if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
593 	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
594 
595     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
596 
597     dst_pitch = exaGetPixmapPitch(pDst);
598     colorpitch = dst_pitch >> pixel_shift;
599     if (RADEONPixmapIsColortiled(pDst))
600 	colorpitch |= RADEON_COLOR_TILE_ENABLE;
601 
602     if (!pSrc) {
603 	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
604 	if (!pSrc)
605 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
606     }
607 
608     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
609 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
610 
611     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
612 	return FALSE;
613 
614     if (pMaskPicture && !pMask) {
615 	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
616 	if (!pMask) {
617 	    if (!pSrcPicture->pDrawable)
618 		pScreen->DestroyPixmap(pSrc);
619 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
620 	}
621     }
622 
623     RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
624 			     pSrc, pMask, pDst);
625 
626     /* switch to 3D after doing buffer space checks as the latter may flush */
627     RADEON_SWITCH_TO_3D();
628 
629     if (!R100TextureSetup(pSrcPicture, pSrc, 0))
630 	return FALSE;
631     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
632 
633     if (pMask) {
634 	if (!R100TextureSetup(pMaskPicture, pMask, 1))
635 	    return FALSE;
636 	pp_cntl |= RADEON_TEX_1_ENABLE;
637     } else {
638 	info->accel_state->is_transform[1] = FALSE;
639     }
640 
641     BEGIN_ACCEL_RELOC(10, 2);
642     OUT_RING_REG(RADEON_PP_CNTL, pp_cntl);
643     OUT_RING_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
644     EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
645     EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
646 
647     /* IN operator: Multiply src by mask components or mask alpha.
648      * BLEND_CTL_ADD is A * B + C.
649      * If a source is a8, we have to explicitly zero its color values.
650      * If the destination is a8, we have to route the alpha to red, I think.
651      * If we're doing component alpha where the source for blending is going to
652      * be the source alpha (and there's no source value used), we have to zero
653      * the source's color values.
654      */
655     cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
656     ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
657 
658     if (pDstPicture->format == PICT_a8 ||
659 	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
660     {
661 	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
662     } else if (pSrcPicture->format == PICT_a8)
663 	cblend |= RADEON_COLOR_ARG_A_ZERO;
664     else
665 	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
666     ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
667 
668     if (pMask) {
669 	if (pMaskPicture->componentAlpha &&
670 	    pDstPicture->format != PICT_a8)
671 	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
672 	else
673 	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
674 	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
675     } else {
676 	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
677 	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
678     }
679 
680     OUT_RING_REG(RADEON_PP_TXCBLEND_0, cblend);
681     OUT_RING_REG(RADEON_PP_TXABLEND_0, ablend);
682     if (pMask)
683 	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
684 					  RADEON_SE_VTX_FMT_ST0 |
685 					  RADEON_SE_VTX_FMT_ST1));
686     else
687 	OUT_RING_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
688 					  RADEON_SE_VTX_FMT_ST0));
689     /* Op operator. */
690     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
691 
692     OUT_RING_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
693 
694     OUT_RING_REG(RADEON_RE_TOP_LEFT, 0);
695     OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width - 1) << RADEON_RE_WIDTH_SHIFT) |
696 					   ((pDst->drawable.height - 1) << RADEON_RE_HEIGHT_SHIFT)));
697     ADVANCE_RING();
698 
699     return TRUE;
700 }
701 
R200CheckCompositeTexture(PicturePtr pPict,PicturePtr pDstPict,int op,int unit)702 static Bool R200CheckCompositeTexture(PicturePtr pPict,
703 				      PicturePtr pDstPict,
704 				      int op,
705 				      int unit)
706 {
707     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
708     int i;
709 
710     for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
711     {
712 	if (R200TexFormats[i].fmt == pPict->format)
713 	    break;
714     }
715     if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
716 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
717 			 (int)pPict->format));
718 
719     if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0))
720 	return FALSE;
721 
722     if (pPict->filter != PictFilterNearest &&
723 	pPict->filter != PictFilterBilinear)
724 	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
725 
726     /* for REPEAT_NONE, Render semantics are that sampling outside the source
727      * picture results in alpha=0 pixels. We can implement this with a border color
728      * *if* our source texture has an alpha channel, otherwise we need to fall
729      * back. If we're not transformed then we hope that upper layers have clipped
730      * rendering to the bounds of the source drawable, in which case it doesn't
731      * matter. I have not, however, verified that the X server always does such
732      * clipping.
733      */
734     if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
735 	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
736 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
737     }
738 
739     if (!radeon_transform_is_affine_or_scaled(pPict->transform))
740 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
741 
742     return TRUE;
743 }
744 
R200TextureSetup(PicturePtr pPict,PixmapPtr pPix,int unit)745 static Bool R200TextureSetup(PicturePtr pPict, PixmapPtr pPix,
746 					int unit)
747 {
748     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
749     uint32_t txfilter, txformat, txoffset, txpitch;
750     unsigned int repeatType;
751     Bool repeat;
752     int i, w, h;
753     struct radeon_exa_pixmap_priv *driver_priv;
754 
755     if (pPict->pDrawable) {
756 	w = pPict->pDrawable->width;
757 	h = pPict->pDrawable->height;
758 	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
759     } else {
760 	w = h = 1;
761 	repeatType = RepeatNormal;
762     }
763 
764     repeat = (repeatType == RepeatNormal || repeatType == RepeatReflect) &&
765 	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
766 
767     txpitch = exaGetPixmapPitch(pPix);
768 
769     txoffset = 0;
770 
771     if ((txpitch & 0x1f) != 0)
772 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
773 
774     for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
775     {
776 	if (R200TexFormats[i].fmt == pPict->format)
777 	    break;
778     }
779     txformat = R200TexFormats[i].card_fmt;
780     if (RADEONPixmapIsColortiled(pPix))
781 	txoffset |= R200_TXO_MACRO_TILE;
782 
783     if (repeat) {
784 	if (!RADEONPitchMatches(pPix))
785 	    RADEON_FALLBACK(("Width %d and pitch %u not compatible for repeat\n",
786 			     w, (unsigned)txpitch));
787 
788 	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
789 	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
790     } else
791 	txformat |= R200_TXFORMAT_NON_POWER2;
792     txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
793 
794     info->accel_state->texW[unit] = w;
795     info->accel_state->texH[unit] = h;
796 
797     switch (pPict->filter) {
798     case PictFilterNearest:
799 	txfilter = (R200_MAG_FILTER_NEAREST |
800 		    R200_MIN_FILTER_NEAREST);
801 	break;
802     case PictFilterBilinear:
803 	txfilter = (R200_MAG_FILTER_LINEAR |
804 		    R200_MIN_FILTER_LINEAR);
805 	break;
806     default:
807 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
808     }
809 
810     switch (repeatType) {
811 	case RepeatNormal:
812 	    if (txformat & R200_TXFORMAT_NON_POWER2)
813 		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
814 	    else
815 	        txfilter |= R200_CLAMP_S_WRAP | R200_CLAMP_T_WRAP;
816 	    break;
817 	case RepeatPad:
818 	    txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
819 	    break;
820 	case RepeatReflect:
821 	    txfilter |= R200_CLAMP_S_MIRROR | R200_CLAMP_T_MIRROR;
822 	    break;
823 	case RepeatNone:
824 	    /* don't set an illegal clamp mode for rect textures */
825 	    if (txformat & R200_TXFORMAT_NON_POWER2)
826 		txfilter |= R200_CLAMP_S_CLAMP_LAST | R200_CLAMP_T_CLAMP_LAST;
827 	    break;
828     }
829 
830     BEGIN_ACCEL_RELOC(6, 1);
831     if (unit == 0) {
832 	OUT_RING_REG(R200_PP_TXFILTER_0, txfilter);
833 	OUT_RING_REG(R200_PP_TXFORMAT_0, txformat);
834 	OUT_RING_REG(R200_PP_TXFORMAT_X_0, 0);
835 	OUT_RING_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
836 		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
837 	OUT_RING_REG(R200_PP_TXPITCH_0, txpitch - 32);
838 	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
839     } else {
840 	OUT_RING_REG(R200_PP_TXFILTER_1, txfilter);
841 	OUT_RING_REG(R200_PP_TXFORMAT_1, txformat);
842 	OUT_RING_REG(R200_PP_TXFORMAT_X_1, 0);
843 	OUT_RING_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
844 		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
845 	OUT_RING_REG(R200_PP_TXPITCH_1, txpitch - 32);
846 	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
847 	/* emit a texture relocation */
848     }
849     ADVANCE_RING();
850 
851     if (pPict->transform != 0) {
852 	info->accel_state->is_transform[unit] = TRUE;
853 	info->accel_state->transform[unit] = pPict->transform;
854     } else {
855 	info->accel_state->is_transform[unit] = FALSE;
856     }
857 
858     return TRUE;
859 }
860 
R200CheckComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture)861 static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
862 			       PicturePtr pDstPicture)
863 {
864     PixmapPtr pSrcPixmap, pDstPixmap;
865     uint32_t tmp1;
866 
867     TRACE;
868 
869     /* Check for unsupported compositing operations. */
870     if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
871 	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
872 
873     pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
874 
875     if (pDstPixmap->drawable.width > 2048 ||
876 	pDstPixmap->drawable.height > 2048) {
877 	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
878 			 pDstPixmap->drawable.width,
879 			 pDstPixmap->drawable.height));
880     }
881 
882     if (pSrcPicture->pDrawable) {
883 	/* r200 limit should be 2048, there are issues with 2048
884 	 * see 197a62704742a4a19736c2637ac92d1dc5ab34ed
885 	 */
886 	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
887 
888 	if (pSrcPixmap->drawable.width > 2048 ||
889 	    pSrcPixmap->drawable.height > 2048) {
890 	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
891 			     pSrcPixmap->drawable.width,
892 			     pSrcPixmap->drawable.height));
893 	}
894     } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
895 	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
896 
897     if (pMaskPicture) {
898 	PixmapPtr pMaskPixmap;
899 
900 	if (pMaskPicture->pDrawable) {
901 	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
902 
903 	    if (pMaskPixmap->drawable.width > 2048 ||
904 		pMaskPixmap->drawable.height > 2048) {
905 		RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
906 				 pMaskPixmap->drawable.width,
907 				 pMaskPixmap->drawable.height));
908 	    }
909 	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
910 	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
911 
912 	if (pMaskPicture->componentAlpha) {
913 	    /* Check if it's component alpha that relies on a source alpha and
914 	     * on the source value.  We can only get one of those into the
915 	     * single source value that we get to blend with.
916 	     */
917 	    if (RadeonBlendOp[op].src_alpha &&
918 		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
919 		RADEON_SRC_BLEND_GL_ZERO) {
920 		RADEON_FALLBACK(("Component alpha not supported with source "
921 				 "alpha and source value blending.\n"));
922 	    }
923 	}
924 
925 	if (!R200CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
926 	    return FALSE;
927     }
928 
929     if (!R200CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
930 	return FALSE;
931 
932     if (!RADEONGetDestFormat(pDstPicture, &tmp1))
933 	return FALSE;
934 
935     return TRUE;
936 }
937 
R200PrepareComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture,PixmapPtr pSrc,PixmapPtr pMask,PixmapPtr pDst)938 static Bool R200PrepareComposite(int op, PicturePtr pSrcPicture,
939 				PicturePtr pMaskPicture, PicturePtr pDstPicture,
940 				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
941 {
942     ScreenPtr pScreen = pDst->drawable.pScreen;
943     RINFO_FROM_SCREEN(pScreen);
944     uint32_t dst_format, dst_pitch;
945     uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
946     int pixel_shift;
947     struct radeon_exa_pixmap_priv *driver_priv;
948 
949     TRACE;
950 
951     if (!RADEONGetDestFormat(pDstPicture, &dst_format))
952 	return FALSE;
953 
954     if (pDstPicture->format == PICT_a8 && RadeonBlendOp[op].dst_alpha)
955 	RADEON_FALLBACK(("Can't dst alpha blend A8\n"));
956 
957     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
958 
959     dst_pitch = exaGetPixmapPitch(pDst);
960     colorpitch = dst_pitch >> pixel_shift;
961     if (RADEONPixmapIsColortiled(pDst))
962 	colorpitch |= RADEON_COLOR_TILE_ENABLE;
963 
964     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
965 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
966 
967     if (!pSrc) {
968 	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
969 	if (!pSrc)
970 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
971     }
972 
973     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
974 	return FALSE;
975 
976     if (pMaskPicture && !pMask) {
977 	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
978 	if (!pMask) {
979 	    if (!pSrcPicture->pDrawable)
980 		pScreen->DestroyPixmap(pSrc);
981 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
982 	}
983     }
984 
985     RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
986 			     pSrc, pMask, pDst);
987 
988     /* switch to 3D after doing buffer space checks as it may flush */
989     RADEON_SWITCH_TO_3D();
990 
991     if (!R200TextureSetup(pSrcPicture, pSrc, 0))
992 	return FALSE;
993     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
994 
995     if (pMask) {
996 	if (!R200TextureSetup(pMaskPicture, pMask, 1))
997 	    return FALSE;
998 	pp_cntl |= RADEON_TEX_1_ENABLE;
999     } else {
1000 	info->accel_state->is_transform[1] = FALSE;
1001     }
1002 
1003     BEGIN_ACCEL_RELOC(12, 2);
1004 
1005     OUT_RING_REG(RADEON_PP_CNTL, pp_cntl);
1006     OUT_RING_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
1007 
1008     EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
1009     EMIT_COLORPITCH(RADEON_RB3D_COLORPITCH, colorpitch, pDst);
1010 
1011     OUT_RING_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
1012     if (pMask)
1013 	OUT_RING_REG(R200_SE_VTX_FMT_1,
1014 		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
1015 		      (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
1016     else
1017 	OUT_RING_REG(R200_SE_VTX_FMT_1,
1018 		      (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
1019 
1020 
1021 
1022     /* IN operator: Multiply src by mask components or mask alpha.
1023      * BLEND_CTL_ADD is A * B + C.
1024      * If a picture is a8, we have to explicitly zero its color values.
1025      * If the destination is a8, we have to route the alpha to red, I think.
1026      * If we're doing component alpha where the source for blending is going to
1027      * be the source alpha (and there's no source value used), we have to zero
1028      * the source's color values.
1029      */
1030     cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
1031     ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
1032 
1033     if (pDstPicture->format == PICT_a8 ||
1034 	(pMask && pMaskPicture->componentAlpha && RadeonBlendOp[op].src_alpha))
1035     {
1036 	cblend |= R200_TXC_ARG_A_R0_ALPHA;
1037     } else if (pSrcPicture->format == PICT_a8)
1038 	cblend |= R200_TXC_ARG_A_ZERO;
1039     else
1040 	cblend |= R200_TXC_ARG_A_R0_COLOR;
1041     ablend |= R200_TXA_ARG_A_R0_ALPHA;
1042 
1043     if (pMask) {
1044 	if (pMaskPicture->componentAlpha &&
1045 	    pDstPicture->format != PICT_a8)
1046 	    cblend |= R200_TXC_ARG_B_R1_COLOR;
1047 	else
1048 	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
1049 	ablend |= R200_TXA_ARG_B_R1_ALPHA;
1050     } else {
1051 	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
1052 	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
1053     }
1054 
1055     OUT_RING_REG(R200_PP_TXCBLEND_0, cblend);
1056     OUT_RING_REG(R200_PP_TXCBLEND2_0,
1057 	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
1058     OUT_RING_REG(R200_PP_TXABLEND_0, ablend);
1059     OUT_RING_REG(R200_PP_TXABLEND2_0,
1060 	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
1061 
1062     /* Op operator. */
1063     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
1064     OUT_RING_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
1065 
1066     OUT_RING_REG(RADEON_RE_WIDTH_HEIGHT, (((pDst->drawable.width - 1) << RADEON_RE_WIDTH_SHIFT) |
1067 					   ((pDst->drawable.height - 1) << RADEON_RE_HEIGHT_SHIFT)));
1068 
1069     ADVANCE_RING();
1070 
1071     return TRUE;
1072 }
1073 
R300CheckCompositeTexture(PicturePtr pPict,PicturePtr pDstPict,int op,int unit,Bool is_r500)1074 static Bool R300CheckCompositeTexture(PicturePtr pPict,
1075 				      PicturePtr pDstPict,
1076 				      int op,
1077 				      int unit,
1078 				      Bool is_r500)
1079 {
1080     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1081     int i;
1082 
1083     for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1084     {
1085 	if (R300TexFormats[i].fmt == pPict->format)
1086 	    break;
1087     }
1088     if (i == sizeof(R300TexFormats) / sizeof(R300TexFormats[0]))
1089 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
1090 			 (int)pPict->format));
1091 
1092     if (pPict->pDrawable && !RADEONCheckTexturePOT(pPict, unit == 0)) {
1093 #if 0
1094 	      		struct radeon_exa_pixmap_priv *driver_priv;
1095 		PixmapPtr pPix;
1096 
1097     		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
1098 		driver_priv = exaGetPixmapDriverPrivate(pPix);
1099 		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
1100 #endif
1101 	return FALSE;
1102     }
1103 
1104     if (pPict->filter != PictFilterNearest &&
1105 	pPict->filter != PictFilterBilinear)
1106 	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
1107 
1108     /* for REPEAT_NONE, Render semantics are that sampling outside the source
1109      * picture results in alpha=0 pixels. We can implement this with a border color
1110      * *if* our source texture has an alpha channel, otherwise we need to fall
1111      * back. If we're not transformed then we hope that upper layers have clipped
1112      * rendering to the bounds of the source drawable, in which case it doesn't
1113      * matter. I have not, however, verified that the X server always does such
1114      * clipping.
1115      */
1116     if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
1117 	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
1118 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
1119     }
1120 
1121     if (!radeon_transform_is_affine_or_scaled(pPict->transform))
1122 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
1123 
1124     return TRUE;
1125 }
1126 
R300TextureSetup(PicturePtr pPict,PixmapPtr pPix,int unit)1127 static Bool R300TextureSetup(PicturePtr pPict, PixmapPtr pPix,
1128 					int unit)
1129 {
1130     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
1131     uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
1132     int w, h;
1133     int i, pixel_shift, out_size = 6;
1134     unsigned int repeatType;
1135     struct radeon_exa_pixmap_priv *driver_priv;
1136 
1137     TRACE;
1138 
1139     if (pPict->pDrawable) {
1140 	w = pPict->pDrawable->width;
1141 	h = pPict->pDrawable->height;
1142 	repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
1143     } else {
1144 	w = h = 1;
1145 	repeatType = RepeatNormal;
1146     }
1147 
1148     txpitch = exaGetPixmapPitch(pPix);
1149     txoffset = 0;
1150 
1151     if ((txpitch & 0x1f) != 0)
1152 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
1153 
1154     /* TXPITCH = pixels (texels) per line - 1 */
1155     pixel_shift = pPix->drawable.bitsPerPixel >> 4;
1156     txpitch >>= pixel_shift;
1157     txpitch -= 1;
1158 
1159     if (RADEONPixmapIsColortiled(pPix))
1160 	txoffset |= R300_MACRO_TILE;
1161 
1162     for (i = 0; i < sizeof(R300TexFormats) / sizeof(R300TexFormats[0]); i++)
1163     {
1164 	if (R300TexFormats[i].fmt == pPict->format)
1165 	    break;
1166     }
1167 
1168     txformat1 = R300TexFormats[i].card_fmt;
1169 
1170     if (IS_R300_3D) {
1171 	if ((unit == 0) && info->accel_state->msk_pic)
1172 	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_0;
1173 	else if (unit == 1)
1174 	    txformat1 |= R300_TX_FORMAT_CACHE_HALF_REGION_1;
1175     }
1176 
1177     txformat0 = ((((w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
1178 		 (((h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
1179 
1180     if (IS_R500_3D && ((w - 1) & 0x800))
1181 	txpitch |= R500_TXWIDTH_11;
1182 
1183     if (IS_R500_3D && ((h - 1) & 0x800))
1184 	txpitch |= R500_TXHEIGHT_11;
1185 
1186     if (info->ChipFamily == CHIP_FAMILY_R520) {
1187 	unsigned us_width = (w - 1) & 0x7ff;
1188 	unsigned us_height = (h - 1) & 0x7ff;
1189 	unsigned us_depth = 0;
1190 
1191 	if (w > 2048) {
1192 	    us_width = (0x7ff + us_width) >> 1;
1193 	    us_depth |= 0x0d;
1194 	}
1195 	if (h > 2048) {
1196 	    us_height = (0x7ff + us_height) >> 1;
1197 	    us_depth |= 0x0e;
1198 	}
1199 
1200 	us_format = (us_width << R300_TXWIDTH_SHIFT) |
1201 		    (us_height << R300_TXHEIGHT_SHIFT) |
1202 		    (us_depth << R300_TXDEPTH_SHIFT);
1203 	out_size++;
1204     }
1205 
1206     /* Use TXPITCH instead of TXWIDTH for address computations: we could
1207      * omit this if there is no padding, but there is no apparent advantage
1208      * in doing so.
1209      */
1210     txformat0 |= R300_TXPITCH_EN;
1211 
1212     txfilter = (unit << R300_TX_ID_SHIFT);
1213 
1214     switch (repeatType) {
1215     case RepeatNormal:
1216 	if (unit != 0 || !info->accel_state->need_src_tile_x)
1217 	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP);
1218 	else
1219 	    txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL);
1220 
1221 	if (unit != 0 || !info->accel_state->need_src_tile_y)
1222 	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP);
1223 	else
1224 	    txfilter |= R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1225 
1226 	break;
1227     case RepeatPad:
1228 	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
1229 	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST);
1230 	break;
1231     case RepeatReflect:
1232 	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_MIRROR) |
1233 	    R300_TX_CLAMP_T(R300_TX_CLAMP_MIRROR);
1234 	break;
1235     case RepeatNone:
1236 	txfilter |= R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_GL) |
1237 	    R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_GL);
1238 	break;
1239     }
1240 
1241     switch (pPict->filter) {
1242     case PictFilterNearest:
1243 	txfilter |= (R300_TX_MAG_FILTER_NEAREST | R300_TX_MIN_FILTER_NEAREST);
1244 	break;
1245     case PictFilterBilinear:
1246 	txfilter |= (R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
1247 	break;
1248     default:
1249 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1250     }
1251 
1252     if (repeatType == RepeatNone)
1253 	out_size++;
1254     BEGIN_ACCEL_RELOC(out_size, 1);
1255     OUT_RING_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
1256     OUT_RING_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
1257     OUT_RING_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
1258     OUT_RING_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
1259     OUT_RING_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
1260 
1261     EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
1262 
1263     if (repeatType == RepeatNone)
1264 	OUT_RING_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
1265     if (info->ChipFamily == CHIP_FAMILY_R520)
1266 	OUT_RING_REG(R500_US_FORMAT0_0 + (unit * 4), us_format);
1267     ADVANCE_RING();
1268 
1269     if (pPict->transform != 0) {
1270 	info->accel_state->is_transform[unit] = TRUE;
1271 	info->accel_state->transform[unit] = pPict->transform;
1272 
1273 	/* setup the PVS consts */
1274 	if (info->accel_state->has_tcl) {
1275 	    info->accel_state->texW[unit] = 1;
1276 	    info->accel_state->texH[unit] = 1;
1277 	    BEGIN_RING(2*9);
1278 	    if (IS_R300_3D)
1279 		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1280 	    else
1281 		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1282 
1283 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
1284 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
1285 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
1286 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1287 
1288 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
1289 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
1290 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
1291 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1292 
1293 	    ADVANCE_RING();
1294 	} else {
1295 	    info->accel_state->texW[unit] = w;
1296 	    info->accel_state->texH[unit] = h;
1297 	}
1298     } else {
1299 	info->accel_state->is_transform[unit] = FALSE;
1300 
1301 	/* setup the PVS consts */
1302 	if (info->accel_state->has_tcl) {
1303 	    info->accel_state->texW[unit] = 1;
1304 	    info->accel_state->texH[unit] = 1;
1305 
1306 	    BEGIN_RING(2*9);
1307 	    if (IS_R300_3D)
1308 		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
1309 	    else
1310 		OUT_RING_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
1311 
1312 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1313 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1314 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1315 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
1316 
1317 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1318 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
1319 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
1320 	    OUT_RING_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
1321 
1322 	    ADVANCE_RING();
1323 	} else {
1324 	    info->accel_state->texW[unit] = w;
1325 	    info->accel_state->texH[unit] = h;
1326 	}
1327     }
1328 
1329     return TRUE;
1330 }
1331 
R300CheckComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture)1332 static Bool R300CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1333 			       PicturePtr pDstPicture)
1334 {
1335     uint32_t tmp1;
1336     ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
1337     PixmapPtr pSrcPixmap, pDstPixmap;
1338     ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1339     RADEONInfoPtr info = RADEONPTR(pScrn);
1340     int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
1341 
1342     TRACE;
1343 
1344     /* Check for unsupported compositing operations. */
1345     if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
1346 	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1347 
1348     if (IS_R500_3D) {
1349 	max_tex_w = 4096;
1350 	max_tex_h = 4096;
1351 	max_dst_w = 4096;
1352 	max_dst_h = 4096;
1353     } else {
1354 	max_tex_w = 2048;
1355 	max_tex_h = 2048;
1356 	if (IS_R400_3D) {
1357 	    max_dst_w = 4021;
1358 	    max_dst_h = 4021;
1359 	} else {
1360 	    max_dst_w = 2560;
1361 	    max_dst_h = 2560;
1362 	}
1363     }
1364 
1365     pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1366 
1367     if (pDstPixmap->drawable.width > max_dst_w ||
1368 	pDstPixmap->drawable.height > max_dst_h) {
1369 	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1370 			 pDstPixmap->drawable.width,
1371 			 pDstPixmap->drawable.height));
1372     }
1373 
1374     if (pSrcPicture->pDrawable) {
1375 	pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1376 
1377 	if (pSrcPixmap->drawable.width > max_tex_w ||
1378 	    pSrcPixmap->drawable.height > max_tex_h) {
1379 	    RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1380 			     pSrcPixmap->drawable.width,
1381 			     pSrcPixmap->drawable.height));
1382 	}
1383     } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1384 	RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1385 
1386     if (pMaskPicture) {
1387 	PixmapPtr pMaskPixmap;
1388 
1389 	if (pMaskPicture->pDrawable) {
1390 	    pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1391 
1392 	    if (pMaskPixmap->drawable.width > max_tex_w ||
1393 		pMaskPixmap->drawable.height > max_tex_h) {
1394 	      RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1395 			       pMaskPixmap->drawable.width,
1396 			       pMaskPixmap->drawable.height));
1397 	    }
1398 	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1399 	    RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1400 
1401 	if (pMaskPicture->componentAlpha) {
1402 	    /* Check if it's component alpha that relies on a source alpha and
1403 	     * on the source value.  We can only get one of those into the
1404 	     * single source value that we get to blend with.
1405 	     */
1406 	    if (RadeonBlendOp[op].src_alpha &&
1407 		(RadeonBlendOp[op].blend_cntl & RADEON_SRC_BLEND_MASK) !=
1408 		RADEON_SRC_BLEND_GL_ZERO) {
1409 		RADEON_FALLBACK(("Component alpha not supported with source "
1410 				 "alpha and source value blending.\n"));
1411 	    }
1412 	}
1413 
1414 	if (!R300CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1, IS_R500_3D))
1415 	    return FALSE;
1416     }
1417 
1418     if (!R300CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0, IS_R500_3D))
1419 	return FALSE;
1420 
1421     if (!R300GetDestFormat(pDstPicture, &tmp1))
1422 	return FALSE;
1423 
1424     return TRUE;
1425 
1426 }
1427 
R300PrepareComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture,PixmapPtr pSrc,PixmapPtr pMask,PixmapPtr pDst)1428 static Bool R300PrepareComposite(int op, PicturePtr pSrcPicture,
1429 				PicturePtr pMaskPicture, PicturePtr pDstPicture,
1430 				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1431 {
1432     ScreenPtr pScreen = pDst->drawable.pScreen;
1433     RINFO_FROM_SCREEN(pScreen);
1434     uint32_t dst_format, dst_pitch;
1435     uint32_t txenable, colorpitch;
1436     uint32_t blendcntl, output_fmt;
1437     uint32_t src_color, src_alpha;
1438     uint32_t mask_color, mask_alpha;
1439     int pixel_shift;
1440     struct radeon_exa_pixmap_priv *driver_priv;
1441     TRACE;
1442 
1443     if (!R300GetDestFormat(pDstPicture, &dst_format))
1444 	return FALSE;
1445 
1446     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
1447 
1448     dst_pitch = exaGetPixmapPitch(pDst);
1449     colorpitch = dst_pitch >> pixel_shift;
1450 
1451     if (RADEONPixmapIsColortiled(pDst))
1452 	colorpitch |= R300_COLORTILE;
1453 
1454     colorpitch |= dst_format;
1455 
1456     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
1457 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
1458 
1459     if (!pSrc) {
1460 	pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
1461 	if (!pSrc)
1462 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
1463     }
1464 
1465     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
1466 	return FALSE;
1467 
1468     if (pMaskPicture && !pMask) {
1469 	pMask = RADEONSolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
1470 	if (!pMask) {
1471 	    if (!pSrcPicture->pDrawable)
1472 		pScreen->DestroyPixmap(pSrc);
1473 	    RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
1474 	}
1475     }
1476 
1477     RADEONPrepareCompositeCS(op, pSrcPicture, pMaskPicture, pDstPicture,
1478 			     pSrc, pMask, pDst);
1479 
1480     /* have to execute switch after doing buffer sizing check as the latter flushes */
1481     RADEON_SWITCH_TO_3D();
1482 
1483     if (!R300TextureSetup(pSrcPicture, pSrc, 0))
1484 	return FALSE;
1485     txenable = R300_TEX_0_ENABLE;
1486 
1487     if (pMask) {
1488 	if (!R300TextureSetup(pMaskPicture, pMask, 1))
1489 	    return FALSE;
1490 	txenable |= R300_TEX_1_ENABLE;
1491     } else {
1492 	info->accel_state->is_transform[1] = FALSE;
1493     }
1494 
1495     /* setup the VAP */
1496     if (info->accel_state->has_tcl) {
1497 	if (pMask)
1498 	    BEGIN_RING(2*10);
1499 	else
1500 	    BEGIN_RING(2*9);
1501 	OUT_RING_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
1502     } else {
1503 	if (pMask)
1504 	    BEGIN_RING(2*6);
1505 	else
1506 	    BEGIN_RING(2*5);
1507     }
1508 
1509     /* These registers define the number, type, and location of data submitted
1510      * to the PVS unit of GA input (when PVS is disabled)
1511      * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
1512      * enabled.  This memory provides the imputs to the vertex shader program
1513      * and ordering is not important.  When PVS/TCL is disabled, this field maps
1514      * directly to the GA input memory and the order is signifigant.  In
1515      * PVS_BYPASS mode the order is as follows:
1516      * Position
1517      * Point Size
1518      * Color 0-3
1519      * Textures 0-7
1520      * Fog
1521      */
1522     if (pMask) {
1523 	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1524 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1525 		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1526 		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1527 		       R300_SIGNED_0 |
1528 		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1529 		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1530 		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1531 		       R300_SIGNED_1));
1532 	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_1,
1533 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
1534 		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
1535 		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
1536 		       R300_LAST_VEC_2 |
1537 		       R300_SIGNED_2));
1538     } else
1539 	OUT_RING_REG(R300_VAP_PROG_STREAM_CNTL_0,
1540 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
1541 		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
1542 		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
1543 		       R300_SIGNED_0 |
1544 		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
1545 		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
1546 		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
1547 		       R300_LAST_VEC_1 |
1548 		       R300_SIGNED_1));
1549 
1550     /* load the vertex shader
1551      * We pre-load vertex programs in RADEONInit3DEngine():
1552      * - exa
1553      * - Xv
1554      * - Xv bicubic
1555      * Here we select the offset of the vertex program we want to use
1556      */
1557     if (info->accel_state->has_tcl) {
1558 	if (pMask) {
1559 	    /* consts used by vertex shaders */
1560 	    OUT_RING_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1561 						    R300_PVS_MAX_CONST_ADDR(3)));
1562 	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1563 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1564 			   (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1565 			   (8 << R300_PVS_LAST_INST_SHIFT)));
1566 	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1567 			  (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1568 	} else {
1569 	    /* consts used by vertex shaders */
1570 	    OUT_RING_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
1571 						    R300_PVS_MAX_CONST_ADDR(3)));
1572 	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_0,
1573 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
1574 			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
1575 			   (4 << R300_PVS_LAST_INST_SHIFT)));
1576 	    OUT_RING_REG(R300_VAP_PVS_CODE_CNTL_1,
1577 			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
1578 	}
1579     }
1580 
1581     /* Position and one or two sets of 2 texture coordinates */
1582     OUT_RING_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
1583     if (pMask)
1584 	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1,
1585 		      ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
1586 		       (2 << R300_TEX_1_COMP_CNT_SHIFT)));
1587     else
1588 	OUT_RING_REG(R300_VAP_OUT_VTX_FMT_1,
1589 		      (2 << R300_TEX_0_COMP_CNT_SHIFT));
1590 
1591     OUT_RING_REG(R300_TX_INVALTAGS, 0x0);
1592     OUT_RING_REG(R300_TX_ENABLE, txenable);
1593     ADVANCE_RING();
1594 
1595     /* shader output swizzling */
1596     switch (pDstPicture->format) {
1597     case PICT_a8r8g8b8:
1598     case PICT_x8r8g8b8:
1599     default:
1600 	output_fmt = (R300_OUT_FMT_C4_8 |
1601 		      R300_OUT_FMT_C0_SEL_BLUE |
1602 		      R300_OUT_FMT_C1_SEL_GREEN |
1603 		      R300_OUT_FMT_C2_SEL_RED |
1604 		      R300_OUT_FMT_C3_SEL_ALPHA);
1605 	break;
1606     case PICT_a8b8g8r8:
1607     case PICT_x8b8g8r8:
1608 	output_fmt = (R300_OUT_FMT_C4_8 |
1609 		      R300_OUT_FMT_C0_SEL_RED |
1610 		      R300_OUT_FMT_C1_SEL_GREEN |
1611 		      R300_OUT_FMT_C2_SEL_BLUE |
1612 		      R300_OUT_FMT_C3_SEL_ALPHA);
1613 	break;
1614     case PICT_b8g8r8a8:
1615     case PICT_b8g8r8x8:
1616 	output_fmt = (R300_OUT_FMT_C4_8 |
1617 		      R300_OUT_FMT_C0_SEL_ALPHA |
1618 		      R300_OUT_FMT_C1_SEL_RED |
1619 		      R300_OUT_FMT_C2_SEL_GREEN |
1620 		      R300_OUT_FMT_C3_SEL_BLUE);
1621 	break;
1622     case PICT_a8:
1623 	output_fmt = (R300_OUT_FMT_C4_8 |
1624 		      R300_OUT_FMT_C0_SEL_ALPHA);
1625 	break;
1626     }
1627 
1628     /* setup pixel shader */
1629     if (IS_R300_3D) {
1630 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1631 	    src_color = R300_ALU_RGB_0_0;
1632 	else
1633 	    src_color = R300_ALU_RGB_SRC0_RGB;
1634 
1635 	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1636 	    src_alpha = R300_ALU_ALPHA_1_0;
1637 	else
1638 	    src_alpha = R300_ALU_ALPHA_SRC0_A;
1639 
1640 	if (pMask) {
1641 	    if (pMaskPicture->componentAlpha) {
1642 		if (RadeonBlendOp[op].src_alpha) {
1643 		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1644 			src_color = R300_ALU_RGB_1_0;
1645 		    else
1646 			src_color = R300_ALU_RGB_SRC0_AAA;
1647 		} else
1648 		    src_color = R300_ALU_RGB_SRC0_RGB;
1649 		mask_color = R300_ALU_RGB_SRC1_RGB;
1650 	    } else {
1651 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1652 		    mask_color = R300_ALU_RGB_1_0;
1653 		else
1654 		    mask_color = R300_ALU_RGB_SRC1_AAA;
1655 	    }
1656 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1657 		mask_alpha = R300_ALU_ALPHA_1_0;
1658 	    else
1659 		mask_alpha = R300_ALU_ALPHA_SRC1_A;
1660 	} else {
1661 	    mask_color = R300_ALU_RGB_1_0;
1662 	    mask_alpha = R300_ALU_ALPHA_1_0;
1663 	}
1664 
1665 	/* setup the rasterizer, load FS */
1666 	if (pMask) {
1667 	    BEGIN_RING(2*16);
1668 	    /* 4 components: 2 for tex0, 2 for tex1 */
1669 	    OUT_RING_REG(R300_RS_COUNT,
1670 			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1671 			   R300_RS_COUNT_HIRES_EN));
1672 
1673 	    /* R300_INST_COUNT_RS - highest RS instruction used */
1674 	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1675 
1676 	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1677 						R300_ALU_CODE_SIZE(0) |
1678 						R300_TEX_CODE_OFFSET(0) |
1679 						R300_TEX_CODE_SIZE(1)));
1680 
1681 	    OUT_RING_REG(R300_US_CODE_ADDR_3,
1682 			  (R300_ALU_START(0) |
1683 			   R300_ALU_SIZE(0) |
1684 			   R300_TEX_START(0) |
1685 			   R300_TEX_SIZE(1) |
1686 			   R300_RGBA_OUT));
1687 
1688 
1689 	} else {
1690 	    BEGIN_RING(2*15);
1691 	    /* 2 components: 2 for tex0 */
1692 	    OUT_RING_REG(R300_RS_COUNT,
1693 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1694 			   R300_RS_COUNT_HIRES_EN));
1695 
1696 	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1697 
1698 	    OUT_RING_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
1699 						R300_ALU_CODE_SIZE(0) |
1700 						R300_TEX_CODE_OFFSET(0) |
1701 						R300_TEX_CODE_SIZE(0)));
1702 
1703 	    OUT_RING_REG(R300_US_CODE_ADDR_3,
1704 			  (R300_ALU_START(0) |
1705 			   R300_ALU_SIZE(0) |
1706 			   R300_TEX_START(0) |
1707 			   R300_TEX_SIZE(0) |
1708 			   R300_RGBA_OUT));
1709 
1710 	}
1711 
1712 	OUT_RING_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
1713 	OUT_RING_REG(R300_US_CODE_ADDR_0,
1714 		      (R300_ALU_START(0) |
1715 		       R300_ALU_SIZE(0) |
1716 		       R300_TEX_START(0) |
1717 		       R300_TEX_SIZE(0)));
1718 	OUT_RING_REG(R300_US_CODE_ADDR_1,
1719 		      (R300_ALU_START(0) |
1720 		       R300_ALU_SIZE(0) |
1721 		       R300_TEX_START(0) |
1722 		       R300_TEX_SIZE(0)));
1723 	OUT_RING_REG(R300_US_CODE_ADDR_2,
1724 		      (R300_ALU_START(0) |
1725 		       R300_ALU_SIZE(0) |
1726 		       R300_TEX_START(0) |
1727 		       R300_TEX_SIZE(0)));
1728 
1729 	OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1730 	/* shader output swizzling */
1731 	OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
1732 
1733 	/* tex inst for src texture */
1734 	OUT_RING_REG(R300_US_TEX_INST(0),
1735 		      (R300_TEX_SRC_ADDR(0) |
1736 		       R300_TEX_DST_ADDR(0) |
1737 		       R300_TEX_ID(0) |
1738 		       R300_TEX_INST(R300_TEX_INST_LD)));
1739 
1740 	if (pMask) {
1741 	    /* tex inst for mask texture */
1742 	    OUT_RING_REG(R300_US_TEX_INST(1),
1743 			  (R300_TEX_SRC_ADDR(1) |
1744 			   R300_TEX_DST_ADDR(1) |
1745 			   R300_TEX_ID(1) |
1746 			   R300_TEX_INST(R300_TEX_INST_LD)));
1747 	}
1748 
1749 	/* RGB inst
1750 	 * temp addresses for texture inputs
1751 	 * ALU_RGB_ADDR0 is src tex (temp 0)
1752 	 * ALU_RGB_ADDR1 is mask tex (temp 1)
1753 	 * R300_ALU_RGB_OMASK - output components to write
1754 	 * R300_ALU_RGB_TARGET_A - render target
1755 	 */
1756 	OUT_RING_REG(R300_US_ALU_RGB_ADDR(0),
1757 		      (R300_ALU_RGB_ADDR0(0) |
1758 		       R300_ALU_RGB_ADDR1(1) |
1759 		       R300_ALU_RGB_ADDR2(0) |
1760 		       R300_ALU_RGB_ADDRD(0) |
1761 		       R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
1762 					   R300_ALU_RGB_MASK_G |
1763 					   R300_ALU_RGB_MASK_B)) |
1764 		       R300_ALU_RGB_TARGET_A));
1765 	/* RGB inst
1766 	 * ALU operation
1767 	 */
1768 	OUT_RING_REG(R300_US_ALU_RGB_INST(0),
1769 		      (R300_ALU_RGB_SEL_A(src_color) |
1770 		       R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
1771 		       R300_ALU_RGB_SEL_B(mask_color) |
1772 		       R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
1773 		       R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
1774 		       R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
1775 		       R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
1776 		       R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
1777 		       R300_ALU_RGB_CLAMP));
1778 	/* Alpha inst
1779 	 * temp addresses for texture inputs
1780 	 * ALU_ALPHA_ADDR0 is src tex (0)
1781 	 * ALU_ALPHA_ADDR1 is mask tex (1)
1782 	 * R300_ALU_ALPHA_OMASK - output components to write
1783 	 * R300_ALU_ALPHA_TARGET_A - render target
1784 	 */
1785 	OUT_RING_REG(R300_US_ALU_ALPHA_ADDR(0),
1786 		      (R300_ALU_ALPHA_ADDR0(0) |
1787 		       R300_ALU_ALPHA_ADDR1(1) |
1788 		       R300_ALU_ALPHA_ADDR2(0) |
1789 		       R300_ALU_ALPHA_ADDRD(0) |
1790 		       R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
1791 		       R300_ALU_ALPHA_TARGET_A |
1792 		       R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
1793 	/* Alpha inst
1794 	 * ALU operation
1795 	 */
1796 	OUT_RING_REG(R300_US_ALU_ALPHA_INST(0),
1797 		      (R300_ALU_ALPHA_SEL_A(src_alpha) |
1798 		       R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
1799 		       R300_ALU_ALPHA_SEL_B(mask_alpha) |
1800 		       R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
1801 		       R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
1802 		       R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
1803 		       R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
1804 		       R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
1805 		       R300_ALU_ALPHA_CLAMP));
1806 	ADVANCE_RING();
1807     } else {
1808 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
1809 	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
1810 			 R500_ALU_RGB_G_SWIZ_A_0 |
1811 			 R500_ALU_RGB_B_SWIZ_A_0);
1812 	else
1813 	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1814 			 R500_ALU_RGB_G_SWIZ_A_G |
1815 			 R500_ALU_RGB_B_SWIZ_A_B);
1816 
1817 	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1818 	    src_alpha = R500_ALPHA_SWIZ_A_1;
1819 	else
1820 	    src_alpha = R500_ALPHA_SWIZ_A_A;
1821 
1822 	if (pMask) {
1823 	    if (pMaskPicture->componentAlpha) {
1824 		if (RadeonBlendOp[op].src_alpha) {
1825 		    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
1826 			src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
1827 				     R500_ALU_RGB_G_SWIZ_A_1 |
1828 				     R500_ALU_RGB_B_SWIZ_A_1);
1829 		    else
1830 			src_color = (R500_ALU_RGB_R_SWIZ_A_A |
1831 				     R500_ALU_RGB_G_SWIZ_A_A |
1832 				     R500_ALU_RGB_B_SWIZ_A_A);
1833 		} else
1834 		    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
1835 				 R500_ALU_RGB_G_SWIZ_A_G |
1836 				 R500_ALU_RGB_B_SWIZ_A_B);
1837 
1838 		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
1839 			      R500_ALU_RGB_G_SWIZ_B_G |
1840 			      R500_ALU_RGB_B_SWIZ_B_B);
1841 	    } else {
1842 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1843 		    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1844 				  R500_ALU_RGB_G_SWIZ_B_1 |
1845 				  R500_ALU_RGB_B_SWIZ_B_1);
1846 		else
1847 		    mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
1848 				  R500_ALU_RGB_G_SWIZ_B_A |
1849 				  R500_ALU_RGB_B_SWIZ_B_A);
1850 	    }
1851 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
1852 		mask_alpha = R500_ALPHA_SWIZ_B_1;
1853 	    else
1854 		mask_alpha = R500_ALPHA_SWIZ_B_A;
1855 	} else {
1856 	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
1857 			  R500_ALU_RGB_G_SWIZ_B_1 |
1858 			  R500_ALU_RGB_B_SWIZ_B_1);
1859 	    mask_alpha = R500_ALPHA_SWIZ_B_1;
1860 	}
1861 
1862 	BEGIN_RING(2*7);
1863 	if (pMask) {
1864 	    /* 4 components: 2 for tex0, 2 for tex1 */
1865 	    OUT_RING_REG(R300_RS_COUNT,
1866 			  ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1867 			   R300_RS_COUNT_HIRES_EN));
1868 
1869 	    /* 2 RS instructions: 1 for tex0 (src), 1 for tex1 (mask) */
1870 	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
1871 
1872 	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1873 					      R500_US_CODE_END_ADDR(2)));
1874 	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1875 					       R500_US_CODE_RANGE_SIZE(2)));
1876 	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
1877 	} else {
1878 	    OUT_RING_REG(R300_RS_COUNT,
1879 			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
1880 			   R300_RS_COUNT_HIRES_EN));
1881 
1882 	    OUT_RING_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));
1883 
1884 	    OUT_RING_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
1885 					      R500_US_CODE_END_ADDR(1)));
1886 	    OUT_RING_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
1887 					       R500_US_CODE_RANGE_SIZE(1)));
1888 	    OUT_RING_REG(R500_US_CODE_OFFSET, 0);
1889 	}
1890 
1891 	OUT_RING_REG(R300_US_PIXSIZE, 1); /* highest temp used */
1892 	OUT_RING_REG(R300_US_OUT_FMT_0, output_fmt);
1893 	ADVANCE_RING();
1894 
1895 	if (pMask) {
1896 	    BEGIN_RING(2*19);
1897 	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1898 	    /* tex inst for src texture */
1899 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1900 						   R500_INST_RGB_WMASK_R |
1901 						   R500_INST_RGB_WMASK_G |
1902 						   R500_INST_RGB_WMASK_B |
1903 						   R500_INST_ALPHA_WMASK |
1904 						   R500_INST_RGB_CLAMP |
1905 						   R500_INST_ALPHA_CLAMP));
1906 
1907 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1908 						   R500_TEX_INST_LD |
1909 						   R500_TEX_IGNORE_UNCOVERED));
1910 
1911 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1912 						   R500_TEX_SRC_S_SWIZ_R |
1913 						   R500_TEX_SRC_T_SWIZ_G |
1914 						   R500_TEX_DST_ADDR(0) |
1915 						   R500_TEX_DST_R_SWIZ_R |
1916 						   R500_TEX_DST_G_SWIZ_G |
1917 						   R500_TEX_DST_B_SWIZ_B |
1918 						   R500_TEX_DST_A_SWIZ_A));
1919 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1920 						   R500_DX_S_SWIZ_R |
1921 						   R500_DX_T_SWIZ_R |
1922 						   R500_DX_R_SWIZ_R |
1923 						   R500_DX_Q_SWIZ_R |
1924 						   R500_DY_ADDR(0) |
1925 						   R500_DY_S_SWIZ_R |
1926 						   R500_DY_T_SWIZ_R |
1927 						   R500_DY_R_SWIZ_R |
1928 						   R500_DY_Q_SWIZ_R));
1929 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1930 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1931 
1932 	    /* tex inst for mask texture */
1933 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1934 						   R500_INST_TEX_SEM_WAIT |
1935 						   R500_INST_RGB_WMASK_R |
1936 						   R500_INST_RGB_WMASK_G |
1937 						   R500_INST_RGB_WMASK_B |
1938 						   R500_INST_ALPHA_WMASK |
1939 						   R500_INST_RGB_CLAMP |
1940 						   R500_INST_ALPHA_CLAMP));
1941 
1942 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
1943 						   R500_TEX_INST_LD |
1944 						   R500_TEX_SEM_ACQUIRE |
1945 						   R500_TEX_IGNORE_UNCOVERED));
1946 
1947 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
1948 						   R500_TEX_SRC_S_SWIZ_R |
1949 						   R500_TEX_SRC_T_SWIZ_G |
1950 						   R500_TEX_DST_ADDR(1) |
1951 						   R500_TEX_DST_R_SWIZ_R |
1952 						   R500_TEX_DST_G_SWIZ_G |
1953 						   R500_TEX_DST_B_SWIZ_B |
1954 						   R500_TEX_DST_A_SWIZ_A));
1955 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(1) |
1956 						   R500_DX_S_SWIZ_R |
1957 						   R500_DX_T_SWIZ_R |
1958 						   R500_DX_R_SWIZ_R |
1959 						   R500_DX_Q_SWIZ_R |
1960 						   R500_DY_ADDR(1) |
1961 						   R500_DY_S_SWIZ_R |
1962 						   R500_DY_T_SWIZ_R |
1963 						   R500_DY_R_SWIZ_R |
1964 						   R500_DY_Q_SWIZ_R));
1965 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1966 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
1967 	} else {
1968 	    BEGIN_RING(2*13);
1969 	    OUT_RING_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
1970 	    /* tex inst for src texture */
1971 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
1972 						   R500_INST_TEX_SEM_WAIT |
1973 						   R500_INST_RGB_WMASK_R |
1974 						   R500_INST_RGB_WMASK_G |
1975 						   R500_INST_RGB_WMASK_B |
1976 						   R500_INST_ALPHA_WMASK |
1977 						   R500_INST_RGB_CLAMP |
1978 						   R500_INST_ALPHA_CLAMP));
1979 
1980 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
1981 						   R500_TEX_INST_LD |
1982 						   R500_TEX_SEM_ACQUIRE |
1983 						   R500_TEX_IGNORE_UNCOVERED));
1984 
1985 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
1986 						   R500_TEX_SRC_S_SWIZ_R |
1987 						   R500_TEX_SRC_T_SWIZ_G |
1988 						   R500_TEX_DST_ADDR(0) |
1989 						   R500_TEX_DST_R_SWIZ_R |
1990 						   R500_TEX_DST_G_SWIZ_G |
1991 						   R500_TEX_DST_B_SWIZ_B |
1992 						   R500_TEX_DST_A_SWIZ_A));
1993 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
1994 						   R500_DX_S_SWIZ_R |
1995 						   R500_DX_T_SWIZ_R |
1996 						   R500_DX_R_SWIZ_R |
1997 						   R500_DX_Q_SWIZ_R |
1998 						   R500_DY_ADDR(0) |
1999 						   R500_DY_S_SWIZ_R |
2000 						   R500_DY_T_SWIZ_R |
2001 						   R500_DY_R_SWIZ_R |
2002 						   R500_DY_Q_SWIZ_R));
2003 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2004 	    OUT_RING_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
2005 	}
2006 
2007 	/* ALU inst */
2008 	/* *_OMASK* - output component write mask */
2009 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
2010 					       R500_INST_TEX_SEM_WAIT |
2011 					       R500_INST_LAST |
2012 					       R500_INST_RGB_OMASK_R |
2013 					       R500_INST_RGB_OMASK_G |
2014 					       R500_INST_RGB_OMASK_B |
2015 					       R500_INST_ALPHA_OMASK |
2016 					       R500_INST_RGB_CLAMP |
2017 					       R500_INST_ALPHA_CLAMP));
2018 	/* ALU inst
2019 	 * temp addresses for texture inputs
2020 	 * RGB_ADDR0 is src tex (temp 0)
2021 	 * RGB_ADDR1 is mask tex (temp 1)
2022 	 */
2023 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
2024 					       R500_RGB_ADDR1(1) |
2025 					       R500_RGB_ADDR2(0)));
2026 	/* ALU inst
2027 	 * temp addresses for texture inputs
2028 	 * ALPHA_ADDR0 is src tex (temp 0)
2029 	 * ALPHA_ADDR1 is mask tex (temp 1)
2030 	 */
2031 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
2032 					       R500_ALPHA_ADDR1(1) |
2033 					       R500_ALPHA_ADDR2(0)));
2034 
2035 	/* R500_ALU_RGB_TARGET - RGB render target */
2036 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
2037 					       src_color |
2038 					       R500_ALU_RGB_SEL_B_SRC1 |
2039 					       mask_color |
2040 					       R500_ALU_RGB_TARGET(0)));
2041 
2042 	/* R500_ALPHA_RGB_TARGET - alpha render target */
2043 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
2044 					       R500_ALPHA_ADDRD(0) |
2045 					       R500_ALPHA_SEL_A_SRC0 |
2046 					       src_alpha |
2047 					       R500_ALPHA_SEL_B_SRC1 |
2048 					       mask_alpha |
2049 					       R500_ALPHA_TARGET(0)));
2050 
2051 	OUT_RING_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
2052 					       R500_ALU_RGBA_ADDRD(0) |
2053 					       R500_ALU_RGBA_R_SWIZ_0 |
2054 					       R500_ALU_RGBA_G_SWIZ_0 |
2055 					       R500_ALU_RGBA_B_SWIZ_0 |
2056 					       R500_ALU_RGBA_A_SWIZ_0));
2057 	ADVANCE_RING();
2058     }
2059 
2060     /* Clear out scissoring */
2061     BEGIN_RING(2*2);
2062     if (IS_R300_3D) {
2063 	OUT_RING_REG(R300_SC_SCISSOR0, ((1440 << R300_SCISSOR_X_SHIFT) |
2064 					 (1440 << R300_SCISSOR_Y_SHIFT)));
2065 	OUT_RING_REG(R300_SC_SCISSOR1, (((pDst->drawable.width + 1440 - 1) << R300_SCISSOR_X_SHIFT) |
2066 					 ((pDst->drawable.height + 1440 - 1) << R300_SCISSOR_Y_SHIFT)));
2067 
2068     } else {
2069 	OUT_RING_REG(R300_SC_SCISSOR0, ((0 << R300_SCISSOR_X_SHIFT) |
2070 					 (0 << R300_SCISSOR_Y_SHIFT)));
2071 	OUT_RING_REG(R300_SC_SCISSOR1, (((pDst->drawable.width - 1) << R300_SCISSOR_X_SHIFT) |
2072 					 ((pDst->drawable.height - 1) << R300_SCISSOR_Y_SHIFT)));
2073     }
2074     ADVANCE_RING();
2075 
2076 
2077     BEGIN_ACCEL_RELOC(3, 2);
2078     EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
2079     EMIT_COLORPITCH(R300_RB3D_COLORPITCH0, colorpitch, pDst);
2080 
2081     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
2082     OUT_RING_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
2083 
2084     ADVANCE_RING();
2085 
2086     BEGIN_RING(2*1);
2087     if (pMask)
2088 	OUT_RING_REG(R300_VAP_VTX_SIZE, 6);
2089     else
2090 	OUT_RING_REG(R300_VAP_VTX_SIZE, 4);
2091     ADVANCE_RING();
2092 
2093     return TRUE;
2094 }
2095 
RadeonFinishComposite(PixmapPtr pDst)2096 static void RadeonFinishComposite(PixmapPtr pDst)
2097 {
2098     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2099 
2100     ENTER_DRAW(0);
2101 
2102     if (info->accel_state->draw_header) {
2103 	if (info->ChipFamily < CHIP_FAMILY_R200) {
2104 	    info->accel_state->draw_header[0] = CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2105 							   info->accel_state->num_vtx *
2106 							   info->accel_state->vtx_count + 1);
2107 	    info->accel_state->draw_header[2] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2108 						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2109 						 RADEON_CP_VC_CNTL_MAOS_ENABLE |
2110 						 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2111 						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2112 	} else if (IS_R300_3D || IS_R500_3D) {
2113 	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2114 							   info->accel_state->num_vtx *
2115 							   info->accel_state->vtx_count);
2116 	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2117 						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2118 						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2119 	} else {
2120 	    info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2121 							   info->accel_state->num_vtx *
2122 							   info->accel_state->vtx_count);
2123 	    info->accel_state->draw_header[1] = (RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2124 						 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2125 						 (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT));
2126 	}
2127 	info->accel_state->draw_header = NULL;
2128     }
2129 
2130     if (IS_R300_3D || IS_R500_3D) {
2131 	BEGIN_RING(2*3);
2132 	OUT_RING_REG(R300_SC_CLIP_RULE, 0xAAAA);
2133 	OUT_RING_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
2134     } else
2135 	BEGIN_RING(2*1);
2136     OUT_RING_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
2137     ADVANCE_RING();
2138 
2139     LEAVE_DRAW(0);
2140 }
2141 
RadeonDoneComposite(PixmapPtr pDst)2142 static void RadeonDoneComposite(PixmapPtr pDst)
2143 {
2144     ScreenPtr pScreen = pDst->drawable.pScreen;
2145     RINFO_FROM_SCREEN(pScreen);
2146     struct radeon_accel_state *accel_state = info->accel_state;
2147 
2148     RadeonFinishComposite(pDst);
2149 
2150     if (!accel_state->src_pic->pDrawable)
2151 	pScreen->DestroyPixmap(accel_state->src_pix);
2152 
2153     if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
2154 	pScreen->DestroyPixmap(accel_state->msk_pix);
2155 }
2156 
2157 #define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
2158 do {								\
2159     OUT_RING(F_TO_DW(_dstX));						\
2160     OUT_RING(F_TO_DW(_dstY));						\
2161     OUT_RING(F_TO_DW(_srcX));						\
2162     OUT_RING(F_TO_DW(_srcY));						\
2163     OUT_RING(F_TO_DW(_maskX));						\
2164     OUT_RING(F_TO_DW(_maskY));						\
2165 } while (0)
2166 
2167 #define VTX_OUT(_dstX, _dstY, _srcX, _srcY)	\
2168 do {								\
2169     OUT_RING(F_TO_DW(_dstX));						\
2170     OUT_RING(F_TO_DW(_dstY));						\
2171     OUT_RING(F_TO_DW(_srcX));						\
2172     OUT_RING(F_TO_DW(_srcY));						\
2173 } while (0)
2174 
transformPoint(PictTransform * transform,xPointFixed * point)2175 static inline void transformPoint(PictTransform *transform, xPointFixed *point)
2176 {
2177     PictVector v;
2178     v.vector[0] = point->x;
2179     v.vector[1] = point->y;
2180     v.vector[2] = xFixed1;
2181     PictureTransformPoint(transform, &v);
2182     point->x = v.vector[0];
2183     point->y = v.vector[1];
2184 }
2185 
RadeonCompositeTile(ScrnInfoPtr pScrn,RADEONInfoPtr info,PixmapPtr pDst,int srcX,int srcY,int maskX,int maskY,int dstX,int dstY,int w,int h)2186 static void RadeonCompositeTile(ScrnInfoPtr pScrn,
2187 					   RADEONInfoPtr info,
2188 					   PixmapPtr pDst,
2189 					   int srcX, int srcY,
2190 					   int maskX, int maskY,
2191 					   int dstX, int dstY,
2192 					   int w, int h)
2193 {
2194     int vtx_count;
2195     xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
2196     static xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
2197 
2198     ENTER_DRAW(0);
2199 
2200     /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
2201        srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
2202 
2203     if (CS_FULL(info->cs)) {
2204 	RadeonFinishComposite(info->accel_state->dst_pix);
2205 	radeon_cs_flush_indirect(pScrn);
2206 	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
2207 						 info->accel_state->src_pic,
2208 						 info->accel_state->msk_pic,
2209 						 info->accel_state->dst_pic,
2210 						 info->accel_state->src_pix,
2211 						 info->accel_state->msk_pix,
2212 						 info->accel_state->dst_pix);
2213     }
2214 
2215     srcTopLeft.x     = IntToxFixed(srcX);
2216     srcTopLeft.y     = IntToxFixed(srcY);
2217     srcTopRight.x    = IntToxFixed(srcX + w);
2218     srcTopRight.y    = IntToxFixed(srcY);
2219     srcBottomLeft.x  = IntToxFixed(srcX);
2220     srcBottomLeft.y  = IntToxFixed(srcY + h);
2221     srcBottomRight.x = IntToxFixed(srcX + w);
2222     srcBottomRight.y = IntToxFixed(srcY + h);
2223 
2224     if (info->accel_state->is_transform[0]) {
2225 	if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2226 	    transformPoint(info->accel_state->transform[0], &srcTopLeft);
2227 	    transformPoint(info->accel_state->transform[0], &srcTopRight);
2228 	    transformPoint(info->accel_state->transform[0], &srcBottomLeft);
2229 	    transformPoint(info->accel_state->transform[0], &srcBottomRight);
2230 	}
2231     }
2232 
2233     if (info->accel_state->msk_pic) {
2234 	maskTopLeft.x     = IntToxFixed(maskX);
2235 	maskTopLeft.y     = IntToxFixed(maskY);
2236 	maskTopRight.x    = IntToxFixed(maskX + w);
2237 	maskTopRight.y    = IntToxFixed(maskY);
2238 	maskBottomLeft.x  = IntToxFixed(maskX);
2239 	maskBottomLeft.y  = IntToxFixed(maskY + h);
2240 	maskBottomRight.x = IntToxFixed(maskX + w);
2241 	maskBottomRight.y = IntToxFixed(maskY + h);
2242 
2243 	if (info->accel_state->is_transform[1]) {
2244 	    if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
2245 		transformPoint(info->accel_state->transform[1], &maskTopLeft);
2246 		transformPoint(info->accel_state->transform[1], &maskTopRight);
2247 		transformPoint(info->accel_state->transform[1], &maskBottomLeft);
2248 		transformPoint(info->accel_state->transform[1], &maskBottomRight);
2249 	    }
2250 	}
2251 
2252 	vtx_count = 6;
2253     } else
2254 	vtx_count = 4;
2255 
2256     if (info->accel_state->vsync)
2257         RADEONWaitForVLine(pScrn, pDst,
2258 			   radeon_pick_best_crtc(pScrn, FALSE, dstX, dstX + w, dstY, dstY + h),
2259 			   dstY, dstY + h);
2260 
2261     if (info->ChipFamily < CHIP_FAMILY_R200) {
2262 	if (!info->accel_state->draw_header) {
2263 	    BEGIN_RING(3);
2264 
2265 	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2266 	    info->accel_state->num_vtx = 0;
2267 	    info->accel_state->vtx_count = vtx_count;
2268 
2269 	    OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
2270 				3 * vtx_count + 1));
2271 	    if (info->accel_state->msk_pic)
2272 		OUT_RING(RADEON_CP_VC_FRMT_XY |
2273 			 RADEON_CP_VC_FRMT_ST0 |
2274 			 RADEON_CP_VC_FRMT_ST1);
2275 	    else
2276 		OUT_RING(RADEON_CP_VC_FRMT_XY |
2277 			 RADEON_CP_VC_FRMT_ST0);
2278 	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2279 		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2280 		     RADEON_CP_VC_CNTL_MAOS_ENABLE |
2281 		     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
2282 		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2283 	    ADVANCE_RING();
2284 	}
2285 
2286 	info->accel_state->num_vtx += 3;
2287 	BEGIN_RING(3 * vtx_count);
2288     } else if (IS_R300_3D || IS_R500_3D) {
2289 	if (!info->accel_state->draw_header) {
2290 	    BEGIN_RING(2);
2291 
2292 	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2293 	    info->accel_state->num_vtx = 0;
2294 	    info->accel_state->vtx_count = vtx_count;
2295 
2296 	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2297 				4 * vtx_count));
2298 	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
2299 		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2300 		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2301 	    ADVANCE_RING();
2302 	}
2303 
2304 	info->accel_state->num_vtx += 4;
2305 	BEGIN_RING(4 * vtx_count);
2306     } else {
2307 	if (!info->accel_state->draw_header) {
2308 	    BEGIN_RING(2);
2309 
2310 	    info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
2311 	    info->accel_state->num_vtx = 0;
2312 	    info->accel_state->vtx_count = vtx_count;
2313 
2314 	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
2315 				3 * vtx_count));
2316 	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
2317 		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
2318 		     (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
2319 	    ADVANCE_RING();
2320 	}
2321 
2322 	info->accel_state->num_vtx += 3;
2323 	BEGIN_RING(3 * vtx_count);
2324     }
2325 
2326     if (info->accel_state->msk_pic) {
2327 	if (IS_R300_3D || IS_R500_3D) {
2328 	    VTX_OUT_MASK((float)dstX,                                      (float)dstY,
2329 			 xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0],
2330 			 xFixedToFloat(maskTopLeft.x) / info->accel_state->texW[1],     xFixedToFloat(maskTopLeft.y) / info->accel_state->texH[1]);
2331 	}
2332 	VTX_OUT_MASK((float)dstX,                                      (float)(dstY + h),
2333 		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0],
2334 		xFixedToFloat(maskBottomLeft.x) / info->accel_state->texW[1],  xFixedToFloat(maskBottomLeft.y) / info->accel_state->texH[1]);
2335 	VTX_OUT_MASK((float)(dstX + w),                                (float)(dstY + h),
2336 		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0],
2337 		xFixedToFloat(maskBottomRight.x) / info->accel_state->texW[1], xFixedToFloat(maskBottomRight.y) / info->accel_state->texH[1]);
2338 	VTX_OUT_MASK((float)(dstX + w),                                (float)dstY,
2339 		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0],
2340 		xFixedToFloat(maskTopRight.x) / info->accel_state->texW[1],    xFixedToFloat(maskTopRight.y) / info->accel_state->texH[1]);
2341     } else {
2342 	if (IS_R300_3D || IS_R500_3D) {
2343 	    VTX_OUT((float)dstX,                                      (float)dstY,
2344 		    xFixedToFloat(srcTopLeft.x) / info->accel_state->texW[0],      xFixedToFloat(srcTopLeft.y) / info->accel_state->texH[0]);
2345 	}
2346 	VTX_OUT((float)dstX,                                      (float)(dstY + h),
2347 		xFixedToFloat(srcBottomLeft.x) / info->accel_state->texW[0],   xFixedToFloat(srcBottomLeft.y) / info->accel_state->texH[0]);
2348 	VTX_OUT((float)(dstX + w),                                (float)(dstY + h),
2349 		xFixedToFloat(srcBottomRight.x) / info->accel_state->texW[0],  xFixedToFloat(srcBottomRight.y) / info->accel_state->texH[0]);
2350 	VTX_OUT((float)(dstX + w),                                (float)dstY,
2351 		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
2352     }
2353 
2354     ADVANCE_RING();
2355 
2356     LEAVE_DRAW(0);
2357 }
2358 #undef VTX_OUT
2359 #undef VTX_OUT_MASK
2360 
RadeonComposite(PixmapPtr pDst,int srcX,int srcY,int maskX,int maskY,int dstX,int dstY,int width,int height)2361 static void RadeonComposite(PixmapPtr pDst,
2362 				       int srcX, int srcY,
2363 				       int maskX, int maskY,
2364 				       int dstX, int dstY,
2365 				       int width, int height)
2366 {
2367     int tileSrcY, tileMaskY, tileDstY;
2368     int remainingHeight;
2369     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
2370 
2371     if (!info->accel_state->need_src_tile_x && !info->accel_state->need_src_tile_y) {
2372 	RadeonCompositeTile(pScrn,
2373 				       info,
2374 				       pDst,
2375 				       srcX, srcY,
2376 				       maskX, maskY,
2377 				       dstX, dstY,
2378 				       width, height);
2379 	return;
2380     }
2381 
2382     /* Tiling logic borrowed from exaFillRegionTiled */
2383 
2384     modulus(srcY, info->accel_state->src_tile_height, tileSrcY);
2385     tileMaskY = maskY;
2386     tileDstY = dstY;
2387 
2388     remainingHeight = height;
2389     while (remainingHeight > 0) {
2390 	int remainingWidth = width;
2391 	int tileSrcX, tileMaskX, tileDstX;
2392 	int h = info->accel_state->src_tile_height - tileSrcY;
2393 
2394 	if (h > remainingHeight)
2395 	    h = remainingHeight;
2396 	remainingHeight -= h;
2397 
2398 	modulus(srcX, info->accel_state->src_tile_width, tileSrcX);
2399 	tileMaskX = maskX;
2400 	tileDstX = dstX;
2401 
2402 	while (remainingWidth > 0) {
2403 	    int w = info->accel_state->src_tile_width - tileSrcX;
2404 	    if (w > remainingWidth)
2405 		w = remainingWidth;
2406 	    remainingWidth -= w;
2407 
2408 	    RadeonCompositeTile(pScrn,
2409 					   info,
2410 					   pDst,
2411 					   tileSrcX, tileSrcY,
2412 					   tileMaskX, tileMaskY,
2413 					   tileDstX, tileDstY,
2414 					   w, h);
2415 
2416 	    tileSrcX = 0;
2417 	    tileMaskX += w;
2418 	    tileDstX += w;
2419 	}
2420 	tileSrcY = 0;
2421 	tileMaskY += h;
2422 	tileDstY += h;
2423     }
2424 }
2425 
2426