1 /*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author: Alex Deucher <alexander.deucher@amd.com>
24 *
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30
31 #include "xf86.h"
32
33 #include "exa.h"
34
35 #include "radeon.h"
36 #include "radeon_reg.h"
37 #include "r600_shader.h"
38 #include "r600_reg.h"
39 #include "r600_state.h"
40 #include "radeon_exa_shared.h"
41 #include "radeon_vbo.h"
42
43 /* #define SHOW_VERTEXES */
44
45 Bool
R600SetAccelState(ScrnInfoPtr pScrn,struct r600_accel_object * src0,struct r600_accel_object * src1,struct r600_accel_object * dst,uint32_t vs_offset,uint32_t ps_offset,int rop,Pixel planemask)46 R600SetAccelState(ScrnInfoPtr pScrn,
47 struct r600_accel_object *src0,
48 struct r600_accel_object *src1,
49 struct r600_accel_object *dst,
50 uint32_t vs_offset, uint32_t ps_offset,
51 int rop, Pixel planemask)
52 {
53 RADEONInfoPtr info = RADEONPTR(pScrn);
54 struct radeon_accel_state *accel_state = info->accel_state;
55 uint32_t pitch_align = 0x7;
56 int ret;
57
58 if (src0) {
59 memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
60 accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
61 if (src0->surface)
62 accel_state->src_size[0] = src0->surface->bo_size;
63
64 /* bad pitch */
65 if (accel_state->src_obj[0].pitch & pitch_align)
66 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
67
68 } else {
69 memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
70 accel_state->src_size[0] = 0;
71 }
72
73 if (src1) {
74 memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
75 accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
76 if (src1->surface) {
77 accel_state->src_size[1] = src1->surface->bo_size;
78 }
79
80 /* bad pitch */
81 if (accel_state->src_obj[1].pitch & pitch_align)
82 RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
83
84 } else {
85 memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
86 accel_state->src_size[1] = 0;
87 }
88
89 if (dst) {
90 memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
91 accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
92 if (dst->surface) {
93 accel_state->dst_size = dst->surface->bo_size;
94 } else
95 {
96 accel_state->dst_obj.tiling_flags = 0;
97 }
98 if (accel_state->dst_obj.pitch & pitch_align)
99 RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
100
101 } else {
102 memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
103 accel_state->dst_size = 0;
104 }
105
106 if (CS_FULL(info->cs))
107 radeon_cs_flush_indirect(pScrn);
108
109 accel_state->rop = rop;
110 accel_state->planemask = planemask;
111
112 accel_state->vs_size = 512;
113 accel_state->ps_size = 512;
114 accel_state->vs_mc_addr = vs_offset;
115 accel_state->ps_mc_addr = ps_offset;
116
117 radeon_cs_space_reset_bos(info->cs);
118 radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
119 RADEON_GEM_DOMAIN_VRAM, 0);
120 if (accel_state->src_obj[0].bo)
121 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
122 accel_state->src_obj[0].domain, 0);
123 if (accel_state->src_obj[1].bo)
124 radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
125 accel_state->src_obj[1].domain, 0);
126 if (accel_state->dst_obj.bo)
127 radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
128 0, accel_state->dst_obj.domain);
129 ret = radeon_cs_space_check(info->cs);
130 if (ret)
131 RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
132
133 return TRUE;
134 }
135
136 static Bool
R600PrepareSolid(PixmapPtr pPix,int alu,Pixel pm,Pixel fg)137 R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
138 {
139 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
140 RADEONInfoPtr info = RADEONPTR(pScrn);
141 struct radeon_accel_state *accel_state = info->accel_state;
142 cb_config_t cb_conf;
143 shader_config_t vs_conf, ps_conf;
144 uint32_t a, r, g, b;
145 float ps_alu_consts[4];
146 struct r600_accel_object dst;
147
148 if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
149 RADEON_FALLBACK(("R600CheckDatatype failed\n"));
150 if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
151 RADEON_FALLBACK(("invalid planemask\n"));
152
153 dst.bo = radeon_get_pixmap_bo(pPix)->bo.radeon;
154 dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
155 dst.surface = radeon_get_pixmap_surface(pPix);
156
157 dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
158 dst.width = pPix->drawable.width;
159 dst.height = pPix->drawable.height;
160 dst.bpp = pPix->drawable.bitsPerPixel;
161 dst.domain = RADEON_GEM_DOMAIN_VRAM;
162
163 if (!R600SetAccelState(pScrn,
164 NULL,
165 NULL,
166 &dst,
167 accel_state->solid_vs_offset, accel_state->solid_ps_offset,
168 alu, pm))
169 return FALSE;
170
171 CLEAR (cb_conf);
172 CLEAR (vs_conf);
173 CLEAR (ps_conf);
174
175 radeon_vbo_check(pScrn, &accel_state->vbo, 16);
176 radeon_cp_start(pScrn);
177
178 r600_set_default_state(pScrn);
179
180 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
181 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
182 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
183
184 /* Shader */
185 vs_conf.shader_addr = accel_state->vs_mc_addr;
186 vs_conf.shader_size = accel_state->vs_size;
187 vs_conf.num_gprs = 2;
188 vs_conf.stack_size = 0;
189 vs_conf.bo = accel_state->shaders_bo;
190 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
191
192 ps_conf.shader_addr = accel_state->ps_mc_addr;
193 ps_conf.shader_size = accel_state->ps_size;
194 ps_conf.num_gprs = 1;
195 ps_conf.stack_size = 0;
196 ps_conf.uncached_first_inst = 1;
197 ps_conf.clamp_consts = 0;
198 ps_conf.export_mode = 2;
199 ps_conf.bo = accel_state->shaders_bo;
200 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
201
202 cb_conf.id = 0;
203 cb_conf.w = accel_state->dst_obj.pitch;
204 cb_conf.h = accel_state->dst_obj.height;
205 cb_conf.base = 0;
206 cb_conf.bo = accel_state->dst_obj.bo;
207 cb_conf.surface = accel_state->dst_obj.surface;
208
209 if (accel_state->dst_obj.bpp == 8) {
210 cb_conf.format = COLOR_8;
211 cb_conf.comp_swap = 3; /* A */
212 } else if (accel_state->dst_obj.bpp == 16) {
213 cb_conf.format = COLOR_5_6_5;
214 cb_conf.comp_swap = 2; /* RGB */
215 #if X_BYTE_ORDER == X_BIG_ENDIAN
216 cb_conf.endian = ENDIAN_8IN16;
217 #endif
218 } else {
219 cb_conf.format = COLOR_8_8_8_8;
220 cb_conf.comp_swap = 1; /* ARGB */
221 #if X_BYTE_ORDER == X_BIG_ENDIAN
222 cb_conf.endian = ENDIAN_8IN32;
223 #endif
224 }
225 cb_conf.source_format = 1;
226 cb_conf.blend_clamp = 1;
227 /* Render setup */
228 if (accel_state->planemask & 0x000000ff)
229 cb_conf.pmask |= 4; /* B */
230 if (accel_state->planemask & 0x0000ff00)
231 cb_conf.pmask |= 2; /* G */
232 if (accel_state->planemask & 0x00ff0000)
233 cb_conf.pmask |= 1; /* R */
234 if (accel_state->planemask & 0xff000000)
235 cb_conf.pmask |= 8; /* A */
236 cb_conf.rop = accel_state->rop;
237 if (accel_state->dst_obj.tiling_flags == 0)
238 cb_conf.array_mode = 0;
239 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
240
241 r600_set_spi(pScrn, 0, 0);
242
243 /* PS alu constants */
244 if (accel_state->dst_obj.bpp == 16) {
245 r = (fg >> 11) & 0x1f;
246 g = (fg >> 5) & 0x3f;
247 b = (fg >> 0) & 0x1f;
248 ps_alu_consts[0] = (float)r / 31; /* R */
249 ps_alu_consts[1] = (float)g / 63; /* G */
250 ps_alu_consts[2] = (float)b / 31; /* B */
251 ps_alu_consts[3] = 1.0; /* A */
252 } else if (accel_state->dst_obj.bpp == 8) {
253 a = (fg >> 0) & 0xff;
254 ps_alu_consts[0] = 0.0; /* R */
255 ps_alu_consts[1] = 0.0; /* G */
256 ps_alu_consts[2] = 0.0; /* B */
257 ps_alu_consts[3] = (float)a / 255; /* A */
258 } else {
259 a = (fg >> 24) & 0xff;
260 r = (fg >> 16) & 0xff;
261 g = (fg >> 8) & 0xff;
262 b = (fg >> 0) & 0xff;
263 ps_alu_consts[0] = (float)r / 255; /* R */
264 ps_alu_consts[1] = (float)g / 255; /* G */
265 ps_alu_consts[2] = (float)b / 255; /* B */
266 ps_alu_consts[3] = (float)a / 255; /* A */
267 }
268 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
269 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
270
271 if (accel_state->vsync)
272 RADEONVlineHelperClear(pScrn);
273
274 accel_state->dst_pix = pPix;
275 accel_state->fg = fg;
276
277 return TRUE;
278 }
279
280 static void
R600DoneSolid(PixmapPtr pPix)281 R600DoneSolid(PixmapPtr pPix)
282 {
283 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
284 RADEONInfoPtr info = RADEONPTR(pScrn);
285 struct radeon_accel_state *accel_state = info->accel_state;
286
287 if (accel_state->vsync)
288 r600_cp_wait_vline_sync(pScrn, pPix,
289 accel_state->vline_crtc,
290 accel_state->vline_y1,
291 accel_state->vline_y2);
292
293 r600_finish_op(pScrn, 8);
294 }
295
296 static void
R600Solid(PixmapPtr pPix,int x1,int y1,int x2,int y2)297 R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
298 {
299 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
300 RADEONInfoPtr info = RADEONPTR(pScrn);
301 struct radeon_accel_state *accel_state = info->accel_state;
302 float *vb;
303
304 if (CS_FULL(info->cs)) {
305 R600DoneSolid(info->accel_state->dst_pix);
306 radeon_cs_flush_indirect(pScrn);
307 R600PrepareSolid(accel_state->dst_pix,
308 accel_state->rop,
309 accel_state->planemask,
310 accel_state->fg);
311 }
312
313 if (accel_state->vsync)
314 RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
315
316 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
317
318 vb[0] = (float)x1;
319 vb[1] = (float)y1;
320
321 vb[2] = (float)x1;
322 vb[3] = (float)y2;
323
324 vb[4] = (float)x2;
325 vb[5] = (float)y2;
326
327 radeon_vbo_commit(pScrn, &accel_state->vbo);
328 }
329
330 static void
R600DoPrepareCopy(ScrnInfoPtr pScrn)331 R600DoPrepareCopy(ScrnInfoPtr pScrn)
332 {
333 RADEONInfoPtr info = RADEONPTR(pScrn);
334 struct radeon_accel_state *accel_state = info->accel_state;
335 cb_config_t cb_conf;
336 tex_resource_t tex_res;
337 tex_sampler_t tex_samp;
338 shader_config_t vs_conf, ps_conf;
339
340 CLEAR (cb_conf);
341 CLEAR (tex_res);
342 CLEAR (tex_samp);
343 CLEAR (vs_conf);
344 CLEAR (ps_conf);
345
346 radeon_vbo_check(pScrn, &accel_state->vbo, 16);
347 radeon_cp_start(pScrn);
348
349 r600_set_default_state(pScrn);
350
351 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
352 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
353 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
354
355 /* Shader */
356 vs_conf.shader_addr = accel_state->vs_mc_addr;
357 vs_conf.shader_size = accel_state->vs_size;
358 vs_conf.num_gprs = 2;
359 vs_conf.stack_size = 0;
360 vs_conf.bo = accel_state->shaders_bo;
361 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
362
363 ps_conf.shader_addr = accel_state->ps_mc_addr;
364 ps_conf.shader_size = accel_state->ps_size;
365 ps_conf.num_gprs = 1;
366 ps_conf.stack_size = 0;
367 ps_conf.uncached_first_inst = 1;
368 ps_conf.clamp_consts = 0;
369 ps_conf.export_mode = 2;
370 ps_conf.bo = accel_state->shaders_bo;
371 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
372
373 /* Texture */
374 tex_res.id = 0;
375 tex_res.w = accel_state->src_obj[0].width;
376 tex_res.h = accel_state->src_obj[0].height;
377 tex_res.pitch = accel_state->src_obj[0].pitch;
378 tex_res.depth = 0;
379 tex_res.dim = SQ_TEX_DIM_2D;
380 tex_res.base = 0;
381 tex_res.mip_base = 0;
382 tex_res.size = accel_state->src_size[0];
383 tex_res.bo = accel_state->src_obj[0].bo;
384 tex_res.mip_bo = accel_state->src_obj[0].bo;
385 tex_res.surface = accel_state->src_obj[0].surface;
386 if (accel_state->src_obj[0].bpp == 8) {
387 tex_res.format = FMT_8;
388 tex_res.dst_sel_x = SQ_SEL_1; /* R */
389 tex_res.dst_sel_y = SQ_SEL_1; /* G */
390 tex_res.dst_sel_z = SQ_SEL_1; /* B */
391 tex_res.dst_sel_w = SQ_SEL_X; /* A */
392 } else if (accel_state->src_obj[0].bpp == 16) {
393 tex_res.format = FMT_5_6_5;
394 tex_res.dst_sel_x = SQ_SEL_Z; /* R */
395 tex_res.dst_sel_y = SQ_SEL_Y; /* G */
396 tex_res.dst_sel_z = SQ_SEL_X; /* B */
397 tex_res.dst_sel_w = SQ_SEL_1; /* A */
398 } else {
399 tex_res.format = FMT_8_8_8_8;
400 tex_res.dst_sel_x = SQ_SEL_Z; /* R */
401 tex_res.dst_sel_y = SQ_SEL_Y; /* G */
402 tex_res.dst_sel_z = SQ_SEL_X; /* B */
403 tex_res.dst_sel_w = SQ_SEL_W; /* A */
404 }
405
406 tex_res.request_size = 1;
407 tex_res.base_level = 0;
408 tex_res.last_level = 0;
409 tex_res.perf_modulation = 0;
410 if (accel_state->src_obj[0].tiling_flags == 0)
411 tex_res.tile_mode = 1;
412 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
413
414 tex_samp.id = 0;
415 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
416 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
417 tex_samp.clamp_z = SQ_TEX_WRAP;
418 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
419 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
420 tex_samp.mc_coord_truncate = 1;
421 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
422 tex_samp.mip_filter = 0; /* no mipmap */
423 r600_set_tex_sampler(pScrn, &tex_samp);
424
425 cb_conf.id = 0;
426 cb_conf.w = accel_state->dst_obj.pitch;
427 cb_conf.h = accel_state->dst_obj.height;
428 cb_conf.base = 0;
429 cb_conf.bo = accel_state->dst_obj.bo;
430 cb_conf.surface = accel_state->dst_obj.surface;
431 if (accel_state->dst_obj.bpp == 8) {
432 cb_conf.format = COLOR_8;
433 cb_conf.comp_swap = 3; /* A */
434 } else if (accel_state->dst_obj.bpp == 16) {
435 cb_conf.format = COLOR_5_6_5;
436 cb_conf.comp_swap = 2; /* RGB */
437 } else {
438 cb_conf.format = COLOR_8_8_8_8;
439 cb_conf.comp_swap = 1; /* ARGB */
440 }
441 cb_conf.source_format = 1;
442 cb_conf.blend_clamp = 1;
443
444 /* Render setup */
445 if (accel_state->planemask & 0x000000ff)
446 cb_conf.pmask |= 4; /* B */
447 if (accel_state->planemask & 0x0000ff00)
448 cb_conf.pmask |= 2; /* G */
449 if (accel_state->planemask & 0x00ff0000)
450 cb_conf.pmask |= 1; /* R */
451 if (accel_state->planemask & 0xff000000)
452 cb_conf.pmask |= 8; /* A */
453 cb_conf.rop = accel_state->rop;
454 if (accel_state->dst_obj.tiling_flags == 0)
455 cb_conf.array_mode = 0;
456 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
457
458 r600_set_spi(pScrn, (1 - 1), 1);
459
460 }
461
462 static void
R600DoCopy(ScrnInfoPtr pScrn)463 R600DoCopy(ScrnInfoPtr pScrn)
464 {
465 r600_finish_op(pScrn, 16);
466 }
467
468 static void
R600DoCopyVline(PixmapPtr pPix)469 R600DoCopyVline(PixmapPtr pPix)
470 {
471 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
472 RADEONInfoPtr info = RADEONPTR(pScrn);
473 struct radeon_accel_state *accel_state = info->accel_state;
474
475 if (accel_state->vsync)
476 r600_cp_wait_vline_sync(pScrn, pPix,
477 accel_state->vline_crtc,
478 accel_state->vline_y1,
479 accel_state->vline_y2);
480
481 r600_finish_op(pScrn, 16);
482 }
483
484 static void
R600AppendCopyVertex(ScrnInfoPtr pScrn,int srcX,int srcY,int dstX,int dstY,int w,int h)485 R600AppendCopyVertex(ScrnInfoPtr pScrn,
486 int srcX, int srcY,
487 int dstX, int dstY,
488 int w, int h)
489 {
490 RADEONInfoPtr info = RADEONPTR(pScrn);
491 struct radeon_accel_state *accel_state = info->accel_state;
492 float *vb;
493
494 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
495
496 vb[0] = (float)dstX;
497 vb[1] = (float)dstY;
498 vb[2] = (float)srcX;
499 vb[3] = (float)srcY;
500
501 vb[4] = (float)dstX;
502 vb[5] = (float)(dstY + h);
503 vb[6] = (float)srcX;
504 vb[7] = (float)(srcY + h);
505
506 vb[8] = (float)(dstX + w);
507 vb[9] = (float)(dstY + h);
508 vb[10] = (float)(srcX + w);
509 vb[11] = (float)(srcY + h);
510
511 radeon_vbo_commit(pScrn, &accel_state->vbo);
512 }
513
514 static Bool
R600PrepareCopy(PixmapPtr pSrc,PixmapPtr pDst,int xdir,int ydir,int rop,Pixel planemask)515 R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
516 int xdir, int ydir,
517 int rop,
518 Pixel planemask)
519 {
520 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
521 RADEONInfoPtr info = RADEONPTR(pScrn);
522 struct radeon_accel_state *accel_state = info->accel_state;
523 struct r600_accel_object src_obj, dst_obj;
524
525 if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
526 RADEON_FALLBACK(("R600CheckDatatype src failed\n"));
527 if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
528 RADEON_FALLBACK(("R600CheckDatatype dst failed\n"));
529 if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
530 RADEON_FALLBACK(("Invalid planemask\n"));
531
532 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
533 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
534
535 accel_state->same_surface = FALSE;
536
537 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
538 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
539 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
540 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
541 src_obj.surface = radeon_get_pixmap_surface(pSrc);
542 dst_obj.surface = radeon_get_pixmap_surface(pDst);
543 if (src_obj.bo == dst_obj.bo)
544 accel_state->same_surface = TRUE;
545
546 src_obj.width = pSrc->drawable.width;
547 src_obj.height = pSrc->drawable.height;
548 src_obj.bpp = pSrc->drawable.bitsPerPixel;
549 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
550
551 dst_obj.width = pDst->drawable.width;
552 dst_obj.height = pDst->drawable.height;
553 dst_obj.bpp = pDst->drawable.bitsPerPixel;
554 if (radeon_get_pixmap_shared(pDst) == TRUE) {
555 dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
556 } else
557 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
558
559 if (!R600SetAccelState(pScrn,
560 &src_obj,
561 NULL,
562 &dst_obj,
563 accel_state->copy_vs_offset, accel_state->copy_ps_offset,
564 rop, planemask))
565 return FALSE;
566
567 if (accel_state->same_surface == TRUE) {
568 unsigned long size = accel_state->dst_obj.surface->bo_size;
569 unsigned long align = accel_state->dst_obj.surface->bo_alignment;
570
571 if (accel_state->copy_area_bo) {
572 radeon_bo_unref(accel_state->copy_area_bo);
573 accel_state->copy_area_bo = NULL;
574 }
575 accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, align,
576 RADEON_GEM_DOMAIN_VRAM,
577 0);
578 if (!accel_state->copy_area_bo)
579 RADEON_FALLBACK(("temp copy surface alloc failed\n"));
580
581 radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
582 0, RADEON_GEM_DOMAIN_VRAM);
583 if (radeon_cs_space_check(info->cs)) {
584 radeon_bo_unref(accel_state->copy_area_bo);
585 accel_state->copy_area_bo = NULL;
586 return FALSE;
587 }
588 accel_state->copy_area = (void*)accel_state->copy_area_bo;
589 } else
590 R600DoPrepareCopy(pScrn);
591
592 if (accel_state->vsync)
593 RADEONVlineHelperClear(pScrn);
594
595 accel_state->dst_pix = pDst;
596 accel_state->src_pix = pSrc;
597 accel_state->xdir = xdir;
598 accel_state->ydir = ydir;
599
600 return TRUE;
601 }
602
603 static void
R600DoneCopy(PixmapPtr pDst)604 R600DoneCopy(PixmapPtr pDst)
605 {
606 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
607 RADEONInfoPtr info = RADEONPTR(pScrn);
608 struct radeon_accel_state *accel_state = info->accel_state;
609
610 if (!accel_state->same_surface)
611 R600DoCopyVline(pDst);
612
613 if (accel_state->copy_area) {
614 accel_state->copy_area = NULL;
615 }
616
617 }
618
619 static void
R600Copy(PixmapPtr pDst,int srcX,int srcY,int dstX,int dstY,int w,int h)620 R600Copy(PixmapPtr pDst,
621 int srcX, int srcY,
622 int dstX, int dstY,
623 int w, int h)
624 {
625 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
626 RADEONInfoPtr info = RADEONPTR(pScrn);
627 struct radeon_accel_state *accel_state = info->accel_state;
628
629 if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
630 return;
631
632 if (CS_FULL(info->cs)) {
633 R600DoneCopy(info->accel_state->dst_pix);
634 radeon_cs_flush_indirect(pScrn);
635 R600PrepareCopy(accel_state->src_pix,
636 accel_state->dst_pix,
637 accel_state->xdir,
638 accel_state->ydir,
639 accel_state->rop,
640 accel_state->planemask);
641 }
642
643 if (accel_state->vsync)
644 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
645
646 if (accel_state->same_surface &&
647 (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) {
648 R600DoPrepareCopy(pScrn);
649 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
650 R600DoCopyVline(pDst);
651 } else if (accel_state->same_surface && accel_state->copy_area) {
652 uint32_t orig_dst_domain = accel_state->dst_obj.domain;
653 uint32_t orig_src_domain = accel_state->src_obj[0].domain;
654 uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags;
655 uint32_t orig_dst_tiling_flags = accel_state->dst_obj.tiling_flags;
656 struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
657 int orig_rop = accel_state->rop;
658
659 /* src to tmp */
660 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
661 accel_state->dst_obj.bo = accel_state->copy_area_bo;
662 accel_state->dst_obj.tiling_flags = 0;
663 accel_state->rop = 3;
664 R600DoPrepareCopy(pScrn);
665 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
666 R600DoCopy(pScrn);
667
668 /* tmp to dst */
669 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
670 accel_state->src_obj[0].bo = accel_state->copy_area_bo;
671 accel_state->src_obj[0].tiling_flags = 0;
672 accel_state->dst_obj.domain = orig_dst_domain;
673 accel_state->dst_obj.bo = orig_bo;
674 accel_state->dst_obj.tiling_flags = orig_dst_tiling_flags;
675 accel_state->rop = orig_rop;
676 R600DoPrepareCopy(pScrn);
677 R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
678 R600DoCopyVline(pDst);
679
680 /* restore state */
681 accel_state->src_obj[0].domain = orig_src_domain;
682 accel_state->src_obj[0].bo = orig_bo;
683 accel_state->src_obj[0].tiling_flags = orig_src_tiling_flags;
684 } else
685 R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
686
687 }
688
689 struct blendinfo {
690 Bool dst_alpha;
691 Bool src_alpha;
692 uint32_t blend_cntl;
693 };
694
695 static struct blendinfo R600BlendOp[] = {
696 /* Clear */
697 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
698 /* Src */
699 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
700 /* Dst */
701 {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
702 /* Over */
703 {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
704 /* OverReverse */
705 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
706 /* In */
707 {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
708 /* InReverse */
709 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
710 /* Out */
711 {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
712 /* OutReverse */
713 {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
714 /* Atop */
715 {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
716 /* AtopReverse */
717 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
718 /* Xor */
719 {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
720 /* Add */
721 {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
722 };
723
724 struct formatinfo {
725 unsigned int fmt;
726 uint32_t card_fmt;
727 };
728
729 static struct formatinfo R600TexFormats[] = {
730 {PICT_a2r10g10b10, FMT_2_10_10_10},
731 {PICT_x2r10g10b10, FMT_2_10_10_10},
732 {PICT_a2b10g10r10, FMT_2_10_10_10},
733 {PICT_x2b10g10r10, FMT_2_10_10_10},
734 {PICT_a8r8g8b8, FMT_8_8_8_8},
735 {PICT_x8r8g8b8, FMT_8_8_8_8},
736 {PICT_a8b8g8r8, FMT_8_8_8_8},
737 {PICT_x8b8g8r8, FMT_8_8_8_8},
738 {PICT_b8g8r8a8, FMT_8_8_8_8},
739 {PICT_b8g8r8x8, FMT_8_8_8_8},
740 {PICT_r5g6b5, FMT_5_6_5},
741 {PICT_a1r5g5b5, FMT_1_5_5_5},
742 {PICT_x1r5g5b5, FMT_1_5_5_5},
743 {PICT_a8, FMT_8},
744 };
745
R600GetBlendCntl(int op,PicturePtr pMask,uint32_t dst_format)746 static uint32_t R600GetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
747 {
748 uint32_t sblend, dblend;
749
750 sblend = R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
751 dblend = R600BlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
752
753 /* If there's no dst alpha channel, adjust the blend op so that we'll treat
754 * it as always 1.
755 */
756 if (PICT_FORMAT_A(dst_format) == 0 && R600BlendOp[op].dst_alpha) {
757 if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
758 sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
759 else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
760 sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
761 }
762
763 /* If the source alpha is being used, then we should only be in a case where
764 * the source blend factor is 0, and the source blend value is the mask
765 * channels multiplied by the source picture's alpha.
766 */
767 if (pMask && pMask->componentAlpha && R600BlendOp[op].src_alpha) {
768 if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
769 dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
770 } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
771 dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
772 }
773
774 /* With some tricks, we can still accelerate PictOpOver with solid src.
775 * This is commonly used for text rendering, so it's worth the extra
776 * effort.
777 */
778 if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) {
779 sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift);
780 }
781 }
782
783 return sblend | dblend;
784 }
785
R600GetDestFormat(PicturePtr pDstPicture,uint32_t * dst_format)786 static Bool R600GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
787 {
788 switch (pDstPicture->format) {
789 case PICT_a2r10g10b10:
790 case PICT_x2r10g10b10:
791 case PICT_a2b10g10r10:
792 case PICT_x2b10g10r10:
793 *dst_format = COLOR_2_10_10_10;
794 break;
795 case PICT_a8r8g8b8:
796 case PICT_x8r8g8b8:
797 case PICT_a8b8g8r8:
798 case PICT_x8b8g8r8:
799 case PICT_b8g8r8a8:
800 case PICT_b8g8r8x8:
801 *dst_format = COLOR_8_8_8_8;
802 break;
803 case PICT_r5g6b5:
804 *dst_format = COLOR_5_6_5;
805 break;
806 case PICT_a1r5g5b5:
807 case PICT_x1r5g5b5:
808 *dst_format = COLOR_1_5_5_5;
809 break;
810 case PICT_a8:
811 *dst_format = COLOR_8;
812 break;
813 default:
814 RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
815 (int)pDstPicture->format));
816 }
817 return TRUE;
818 }
819
R600CheckCompositeTexture(PicturePtr pPict,PicturePtr pDstPict,int op,int unit)820 static Bool R600CheckCompositeTexture(PicturePtr pPict,
821 PicturePtr pDstPict,
822 int op,
823 int unit)
824 {
825 unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
826 unsigned int i;
827
828 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
829 if (R600TexFormats[i].fmt == pPict->format)
830 break;
831 }
832 if (i == sizeof(R600TexFormats) / sizeof(R600TexFormats[0]))
833 RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
834 (int)pPict->format));
835
836 if (pPict->filter != PictFilterNearest &&
837 pPict->filter != PictFilterBilinear)
838 RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
839
840 /* for REPEAT_NONE, Render semantics are that sampling outside the source
841 * picture results in alpha=0 pixels. We can implement this with a border color
842 * *if* our source texture has an alpha channel, otherwise we need to fall
843 * back. If we're not transformed then we hope that upper layers have clipped
844 * rendering to the bounds of the source drawable, in which case it doesn't
845 * matter. I have not, however, verified that the X server always does such
846 * clipping.
847 */
848 /* FIXME R6xx */
849 if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
850 if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
851 RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
852 }
853
854 if (!radeon_transform_is_affine_or_scaled(pPict->transform))
855 RADEON_FALLBACK(("non-affine transforms not supported\n"));
856
857 return TRUE;
858 }
859
R600TextureSetup(PicturePtr pPict,PixmapPtr pPix,int unit)860 static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
861 int unit)
862 {
863 ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
864 RADEONInfoPtr info = RADEONPTR(pScrn);
865 struct radeon_accel_state *accel_state = info->accel_state;
866 unsigned int repeatType;
867 unsigned int i;
868 tex_resource_t tex_res;
869 tex_sampler_t tex_samp;
870 int pix_r, pix_g, pix_b, pix_a;
871 float vs_alu_consts[8];
872
873 CLEAR (tex_res);
874 CLEAR (tex_samp);
875
876 for (i = 0; i < sizeof(R600TexFormats) / sizeof(R600TexFormats[0]); i++) {
877 if (R600TexFormats[i].fmt == pPict->format)
878 break;
879 }
880
881 /* Texture */
882 if (pPict->pDrawable) {
883 tex_res.w = pPict->pDrawable->width;
884 tex_res.h = pPict->pDrawable->height;
885 repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
886 } else {
887 tex_res.w = 1;
888 tex_res.h = 1;
889 repeatType = RepeatNormal;
890 }
891 tex_res.id = unit;
892 tex_res.pitch = accel_state->src_obj[unit].pitch;
893 tex_res.depth = 0;
894 tex_res.dim = SQ_TEX_DIM_2D;
895 tex_res.base = 0;
896 tex_res.mip_base = 0;
897 tex_res.size = accel_state->src_size[unit];
898 tex_res.format = R600TexFormats[i].card_fmt;
899 tex_res.bo = accel_state->src_obj[unit].bo;
900 tex_res.mip_bo = accel_state->src_obj[unit].bo;
901 tex_res.surface = accel_state->src_obj[unit].surface;
902 tex_res.request_size = 1;
903
904 #if X_BYTE_ORDER == X_BIG_ENDIAN
905 switch (accel_state->src_obj[unit].bpp) {
906 case 16:
907 tex_res.endian = SQ_ENDIAN_8IN16;
908 break;
909 case 32:
910 tex_res.endian = SQ_ENDIAN_8IN32;
911 break;
912 default :
913 break;
914 }
915 #endif
916
917 /* component swizzles */
918 switch (pPict->format) {
919 case PICT_a2r10g10b10:
920 case PICT_a1r5g5b5:
921 case PICT_a8r8g8b8:
922 pix_r = SQ_SEL_Z; /* R */
923 pix_g = SQ_SEL_Y; /* G */
924 pix_b = SQ_SEL_X; /* B */
925 pix_a = SQ_SEL_W; /* A */
926 break;
927 case PICT_a2b10g10r10:
928 case PICT_a8b8g8r8:
929 pix_r = SQ_SEL_X; /* R */
930 pix_g = SQ_SEL_Y; /* G */
931 pix_b = SQ_SEL_Z; /* B */
932 pix_a = SQ_SEL_W; /* A */
933 break;
934 case PICT_x2b10g10r10:
935 case PICT_x8b8g8r8:
936 pix_r = SQ_SEL_X; /* R */
937 pix_g = SQ_SEL_Y; /* G */
938 pix_b = SQ_SEL_Z; /* B */
939 pix_a = SQ_SEL_1; /* A */
940 break;
941 case PICT_b8g8r8a8:
942 pix_r = SQ_SEL_Y; /* R */
943 pix_g = SQ_SEL_Z; /* G */
944 pix_b = SQ_SEL_W; /* B */
945 pix_a = SQ_SEL_X; /* A */
946 break;
947 case PICT_b8g8r8x8:
948 pix_r = SQ_SEL_Y; /* R */
949 pix_g = SQ_SEL_Z; /* G */
950 pix_b = SQ_SEL_W; /* B */
951 pix_a = SQ_SEL_1; /* A */
952 break;
953 case PICT_x2r10g10b10:
954 case PICT_x1r5g5b5:
955 case PICT_x8r8g8b8:
956 case PICT_r5g6b5:
957 pix_r = SQ_SEL_Z; /* R */
958 pix_g = SQ_SEL_Y; /* G */
959 pix_b = SQ_SEL_X; /* B */
960 pix_a = SQ_SEL_1; /* A */
961 break;
962 case PICT_a8:
963 pix_r = SQ_SEL_0; /* R */
964 pix_g = SQ_SEL_0; /* G */
965 pix_b = SQ_SEL_0; /* B */
966 pix_a = SQ_SEL_X; /* A */
967 break;
968 default:
969 RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
970 }
971
972 if (unit == 0) {
973 if (!accel_state->msk_pic) {
974 if (PICT_FORMAT_RGB(pPict->format) == 0) {
975 pix_r = SQ_SEL_0;
976 pix_g = SQ_SEL_0;
977 pix_b = SQ_SEL_0;
978 }
979
980 if (PICT_FORMAT_A(pPict->format) == 0)
981 pix_a = SQ_SEL_1;
982 } else {
983 if (accel_state->component_alpha) {
984 if (accel_state->src_alpha) {
985 if (PICT_FORMAT_A(pPict->format) == 0) {
986 pix_r = SQ_SEL_1;
987 pix_g = SQ_SEL_1;
988 pix_b = SQ_SEL_1;
989 pix_a = SQ_SEL_1;
990 } else {
991 pix_r = pix_a;
992 pix_g = pix_a;
993 pix_b = pix_a;
994 }
995 } else {
996 if (PICT_FORMAT_A(pPict->format) == 0)
997 pix_a = SQ_SEL_1;
998 }
999 } else {
1000 if (PICT_FORMAT_RGB(pPict->format) == 0) {
1001 pix_r = SQ_SEL_0;
1002 pix_g = SQ_SEL_0;
1003 pix_b = SQ_SEL_0;
1004 }
1005
1006 if (PICT_FORMAT_A(pPict->format) == 0)
1007 pix_a = SQ_SEL_1;
1008 }
1009 }
1010 } else {
1011 if (accel_state->component_alpha) {
1012 if (PICT_FORMAT_A(pPict->format) == 0)
1013 pix_a = SQ_SEL_1;
1014 } else {
1015 if (PICT_FORMAT_A(pPict->format) == 0) {
1016 pix_r = SQ_SEL_1;
1017 pix_g = SQ_SEL_1;
1018 pix_b = SQ_SEL_1;
1019 pix_a = SQ_SEL_1;
1020 } else {
1021 pix_r = pix_a;
1022 pix_g = pix_a;
1023 pix_b = pix_a;
1024 }
1025 }
1026 }
1027
1028 tex_res.dst_sel_x = pix_r; /* R */
1029 tex_res.dst_sel_y = pix_g; /* G */
1030 tex_res.dst_sel_z = pix_b; /* B */
1031 tex_res.dst_sel_w = pix_a; /* A */
1032
1033 tex_res.base_level = 0;
1034 tex_res.last_level = 0;
1035 tex_res.perf_modulation = 0;
1036 if (accel_state->src_obj[unit].tiling_flags == 0)
1037 tex_res.tile_mode = 1;
1038 r600_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[unit].domain);
1039
1040 tex_samp.id = unit;
1041 tex_samp.border_color = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1042
1043 switch (repeatType) {
1044 case RepeatNormal:
1045 tex_samp.clamp_x = SQ_TEX_WRAP;
1046 tex_samp.clamp_y = SQ_TEX_WRAP;
1047 break;
1048 case RepeatPad:
1049 tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
1050 tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
1051 break;
1052 case RepeatReflect:
1053 tex_samp.clamp_x = SQ_TEX_MIRROR;
1054 tex_samp.clamp_y = SQ_TEX_MIRROR;
1055 break;
1056 case RepeatNone:
1057 tex_samp.clamp_x = SQ_TEX_CLAMP_BORDER;
1058 tex_samp.clamp_y = SQ_TEX_CLAMP_BORDER;
1059 break;
1060 default:
1061 RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
1062 }
1063
1064 switch (pPict->filter) {
1065 case PictFilterNearest:
1066 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_POINT;
1067 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_POINT;
1068 tex_samp.mc_coord_truncate = 1;
1069 break;
1070 case PictFilterBilinear:
1071 tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
1072 tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
1073 break;
1074 default:
1075 RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
1076 }
1077
1078 tex_samp.clamp_z = SQ_TEX_WRAP;
1079 tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
1080 tex_samp.mip_filter = 0; /* no mipmap */
1081 r600_set_tex_sampler(pScrn, &tex_samp);
1082
1083 if (pPict->transform != 0) {
1084 accel_state->is_transform[unit] = TRUE;
1085 accel_state->transform[unit] = pPict->transform;
1086
1087 vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
1088 vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
1089 vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
1090 vs_alu_consts[3] = 1.0 / tex_res.w;
1091
1092 vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
1093 vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
1094 vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
1095 vs_alu_consts[7] = 1.0 / tex_res.h;
1096 } else {
1097 accel_state->is_transform[unit] = FALSE;
1098
1099 vs_alu_consts[0] = 1.0;
1100 vs_alu_consts[1] = 0.0;
1101 vs_alu_consts[2] = 0.0;
1102 vs_alu_consts[3] = 1.0 / tex_res.w;
1103
1104 vs_alu_consts[4] = 0.0;
1105 vs_alu_consts[5] = 1.0;
1106 vs_alu_consts[6] = 0.0;
1107 vs_alu_consts[7] = 1.0 / tex_res.h;
1108 }
1109
1110 /* VS alu constants */
1111 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_vs + (unit * 2),
1112 sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
1113
1114 return TRUE;
1115 }
1116
R600CheckComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture)1117 static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
1118 PicturePtr pDstPicture)
1119 {
1120 uint32_t tmp1;
1121 PixmapPtr pSrcPixmap, pDstPixmap;
1122
1123 /* Check for unsupported compositing operations. */
1124 if (op >= (int) (sizeof(R600BlendOp) / sizeof(R600BlendOp[0])))
1125 RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
1126
1127 if (pSrcPicture->pDrawable) {
1128 pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
1129
1130 if (pSrcPixmap->drawable.width >= 8192 ||
1131 pSrcPixmap->drawable.height >= 8192) {
1132 RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
1133 pSrcPixmap->drawable.width,
1134 pSrcPixmap->drawable.height));
1135 }
1136
1137 if (!R600CheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
1138 return FALSE;
1139 } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill)
1140 RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1141
1142 pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
1143
1144 if (pDstPixmap->drawable.width >= 8192 ||
1145 pDstPixmap->drawable.height >= 8192) {
1146 RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
1147 pDstPixmap->drawable.width,
1148 pDstPixmap->drawable.height));
1149 }
1150
1151 if (pMaskPicture) {
1152 PixmapPtr pMaskPixmap;
1153
1154 if (pMaskPicture->pDrawable) {
1155 pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
1156
1157 if (pMaskPixmap->drawable.width >= 8192 ||
1158 pMaskPixmap->drawable.height >= 8192) {
1159 RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
1160 pMaskPixmap->drawable.width,
1161 pMaskPixmap->drawable.height));
1162 }
1163
1164 if (pMaskPicture->componentAlpha) {
1165 /* Check if it's component alpha that relies on a source alpha and
1166 * on the source value. We can only get one of those into the
1167 * single source value that we get to blend with.
1168 *
1169 * We can cheat a bit if the src is solid, though. PictOpOver
1170 * can use the constant blend color to sneak a second blend
1171 * source in.
1172 */
1173 if (R600BlendOp[op].src_alpha &&
1174 (R600BlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
1175 (BLEND_ZERO << COLOR_SRCBLEND_shift)) {
1176 if (pSrcPicture->pDrawable || op != PictOpOver)
1177 RADEON_FALLBACK(("Component alpha not supported with source "
1178 "alpha and source value blending.\n"));
1179 }
1180 }
1181
1182 if (!R600CheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
1183 return FALSE;
1184 } else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill)
1185 RADEON_FALLBACK(("Gradient pictures not supported yet\n"));
1186 }
1187
1188 if (!R600GetDestFormat(pDstPicture, &tmp1))
1189 return FALSE;
1190
1191 return TRUE;
1192
1193 }
1194
R600SetSolidConsts(ScrnInfoPtr pScrn,float * buf,int format,uint32_t fg,int unit)1195 static void R600SetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
1196 {
1197 RADEONInfoPtr info = RADEONPTR(pScrn);
1198 struct radeon_accel_state *accel_state = info->accel_state;
1199 float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
1200
1201 uint32_t w = (fg >> 24) & 0xff;
1202 uint32_t z = (fg >> 16) & 0xff;
1203 uint32_t y = (fg >> 8) & 0xff;
1204 uint32_t x = (fg >> 0) & 0xff;
1205 float xf = (float)x / 255; /* R */
1206 float yf = (float)y / 255; /* G */
1207 float zf = (float)z / 255; /* B */
1208 float wf = (float)w / 255; /* A */
1209
1210 /* component swizzles */
1211 switch (format) {
1212 case PICT_a1r5g5b5:
1213 case PICT_a8r8g8b8:
1214 pix_r = zf; /* R */
1215 pix_g = yf; /* G */
1216 pix_b = xf; /* B */
1217 pix_a = wf; /* A */
1218 break;
1219 case PICT_a8b8g8r8:
1220 pix_r = xf; /* R */
1221 pix_g = yf; /* G */
1222 pix_b = zf; /* B */
1223 pix_a = wf; /* A */
1224 break;
1225 case PICT_x8b8g8r8:
1226 pix_r = xf; /* R */
1227 pix_g = yf; /* G */
1228 pix_b = zf; /* B */
1229 pix_a = 1.0; /* A */
1230 break;
1231 case PICT_b8g8r8a8:
1232 pix_r = yf; /* R */
1233 pix_g = zf; /* G */
1234 pix_b = wf; /* B */
1235 pix_a = xf; /* A */
1236 break;
1237 case PICT_b8g8r8x8:
1238 pix_r = yf; /* R */
1239 pix_g = zf; /* G */
1240 pix_b = wf; /* B */
1241 pix_a = 1.0; /* A */
1242 break;
1243 case PICT_x1r5g5b5:
1244 case PICT_x8r8g8b8:
1245 case PICT_r5g6b5:
1246 pix_r = zf; /* R */
1247 pix_g = yf; /* G */
1248 pix_b = xf; /* B */
1249 pix_a = 1.0; /* A */
1250 break;
1251 case PICT_a8:
1252 pix_r = 0.0; /* R */
1253 pix_g = 0.0; /* G */
1254 pix_b = 0.0; /* B */
1255 pix_a = xf; /* A */
1256 break;
1257 default:
1258 ErrorF("Bad format 0x%x\n", format);
1259 }
1260
1261 if (unit == 0) {
1262 if (!accel_state->msk_pic) {
1263 if (PICT_FORMAT_RGB(format) == 0) {
1264 pix_r = 0.0;
1265 pix_g = 0.0;
1266 pix_b = 0.0;
1267 }
1268
1269 if (PICT_FORMAT_A(format) == 0)
1270 pix_a = 1.0;
1271 } else {
1272 if (accel_state->component_alpha) {
1273 if (accel_state->src_alpha) {
1274 /* required for PictOpOver */
1275 float cblend[4] = { pix_r / pix_a, pix_g / pix_a,
1276 pix_b / pix_a, pix_a / pix_a };
1277 r600_set_blend_color(pScrn, cblend);
1278
1279 if (PICT_FORMAT_A(format) == 0) {
1280 pix_r = 1.0;
1281 pix_g = 1.0;
1282 pix_b = 1.0;
1283 pix_a = 1.0;
1284 } else {
1285 pix_r = pix_a;
1286 pix_g = pix_a;
1287 pix_b = pix_a;
1288 }
1289 } else {
1290 if (PICT_FORMAT_A(format) == 0)
1291 pix_a = 1.0;
1292 }
1293 } else {
1294 if (PICT_FORMAT_RGB(format) == 0) {
1295 pix_r = 0;
1296 pix_g = 0;
1297 pix_b = 0;
1298 }
1299
1300 if (PICT_FORMAT_A(format) == 0)
1301 pix_a = 1.0;
1302 }
1303 }
1304 } else {
1305 if (accel_state->component_alpha) {
1306 if (PICT_FORMAT_A(format) == 0)
1307 pix_a = 1.0;
1308 } else {
1309 if (PICT_FORMAT_A(format) == 0) {
1310 pix_r = 1.0;
1311 pix_g = 1.0;
1312 pix_b = 1.0;
1313 pix_a = 1.0;
1314 } else {
1315 pix_r = pix_a;
1316 pix_g = pix_a;
1317 pix_b = pix_a;
1318 }
1319 }
1320 }
1321
1322 buf[0] = pix_r;
1323 buf[1] = pix_g;
1324 buf[2] = pix_b;
1325 buf[3] = pix_a;
1326 }
1327
R600PrepareComposite(int op,PicturePtr pSrcPicture,PicturePtr pMaskPicture,PicturePtr pDstPicture,PixmapPtr pSrc,PixmapPtr pMask,PixmapPtr pDst)1328 static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
1329 PicturePtr pMaskPicture, PicturePtr pDstPicture,
1330 PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
1331 {
1332 ScreenPtr pScreen = pDst->drawable.pScreen;
1333 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1334 RADEONInfoPtr info = RADEONPTR(pScrn);
1335 struct radeon_accel_state *accel_state = info->accel_state;
1336 uint32_t dst_format;
1337 cb_config_t cb_conf;
1338 shader_config_t vs_conf, ps_conf;
1339 struct r600_accel_object src_obj, mask_obj, dst_obj;
1340 uint32_t ps_bool_consts = 0;
1341 float ps_alu_consts[8];
1342
1343 if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
1344 return FALSE;
1345
1346 if (pSrc) {
1347 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
1348 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1349 src_obj.surface = radeon_get_pixmap_surface(pSrc);
1350 src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
1351 src_obj.width = pSrc->drawable.width;
1352 src_obj.height = pSrc->drawable.height;
1353 src_obj.bpp = pSrc->drawable.bitsPerPixel;
1354 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1355 }
1356
1357 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
1358 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1359 dst_obj.surface = radeon_get_pixmap_surface(pDst);
1360 dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
1361 dst_obj.width = pDst->drawable.width;
1362 dst_obj.height = pDst->drawable.height;
1363 dst_obj.bpp = pDst->drawable.bitsPerPixel;
1364 if (radeon_get_pixmap_shared(pDst) == TRUE)
1365 dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1366 else
1367 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1368
1369 if (pMaskPicture) {
1370 if (pMask) {
1371 mask_obj.bo = radeon_get_pixmap_bo(pMask)->bo.radeon;
1372 mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
1373 mask_obj.surface = radeon_get_pixmap_surface(pMask);
1374 mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
1375 mask_obj.width = pMask->drawable.width;
1376 mask_obj.height = pMask->drawable.height;
1377 mask_obj.bpp = pMask->drawable.bitsPerPixel;
1378 mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1379 }
1380
1381 accel_state->msk_pic = pMaskPicture;
1382 if (pMaskPicture->componentAlpha) {
1383 accel_state->component_alpha = TRUE;
1384 if (R600BlendOp[op].src_alpha)
1385 accel_state->src_alpha = TRUE;
1386 else
1387 accel_state->src_alpha = FALSE;
1388 } else {
1389 accel_state->component_alpha = FALSE;
1390 accel_state->src_alpha = FALSE;
1391 }
1392 } else {
1393 accel_state->msk_pic = NULL;
1394 accel_state->component_alpha = FALSE;
1395 accel_state->src_alpha = FALSE;
1396 }
1397
1398 if (!R600SetAccelState(pScrn,
1399 pSrc ? &src_obj : NULL,
1400 (pMaskPicture && pMask) ? &mask_obj : NULL,
1401 &dst_obj,
1402 accel_state->comp_vs_offset, accel_state->comp_ps_offset,
1403 3, 0xffffffff))
1404 return FALSE;
1405
1406 if (!R600GetDestFormat(pDstPicture, &dst_format))
1407 return FALSE;
1408
1409 CLEAR (cb_conf);
1410 CLEAR (vs_conf);
1411 CLEAR (ps_conf);
1412
1413 if (pMask)
1414 radeon_vbo_check(pScrn, &accel_state->vbo, 24);
1415 else
1416 radeon_vbo_check(pScrn, &accel_state->vbo, 16);
1417
1418 radeon_cp_start(pScrn);
1419
1420 r600_set_default_state(pScrn);
1421
1422 r600_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1423 r600_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1424 r600_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
1425
1426 if (pSrc) {
1427 if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
1428 R600IBDiscard(pScrn);
1429 return FALSE;
1430 }
1431 } else
1432 accel_state->is_transform[0] = FALSE;
1433
1434 if (pMask) {
1435 if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
1436 R600IBDiscard(pScrn);
1437 return FALSE;
1438 }
1439 } else
1440 accel_state->is_transform[1] = FALSE;
1441
1442 if (pSrc)
1443 ps_bool_consts |= (1 << 0);
1444 if (pMask)
1445 ps_bool_consts |= (1 << 1);
1446 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
1447
1448 if (pMask) {
1449 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
1450 } else {
1451 r600_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
1452 }
1453
1454 /* Shader */
1455 vs_conf.shader_addr = accel_state->vs_mc_addr;
1456 vs_conf.shader_size = accel_state->vs_size;
1457 vs_conf.num_gprs = 5;
1458 vs_conf.stack_size = 1;
1459 vs_conf.bo = accel_state->shaders_bo;
1460 r600_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
1461
1462 ps_conf.shader_addr = accel_state->ps_mc_addr;
1463 ps_conf.shader_size = accel_state->ps_size;
1464 ps_conf.num_gprs = 2;
1465 ps_conf.stack_size = 1;
1466 ps_conf.uncached_first_inst = 1;
1467 ps_conf.clamp_consts = 0;
1468 ps_conf.export_mode = 2;
1469 ps_conf.bo = accel_state->shaders_bo;
1470 r600_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
1471
1472 cb_conf.id = 0;
1473 cb_conf.w = accel_state->dst_obj.pitch;
1474 cb_conf.h = accel_state->dst_obj.height;
1475 cb_conf.base = 0;
1476 cb_conf.format = dst_format;
1477 cb_conf.bo = accel_state->dst_obj.bo;
1478 cb_conf.surface = accel_state->dst_obj.surface;
1479
1480 switch (pDstPicture->format) {
1481 case PICT_a2r10g10b10:
1482 case PICT_x2r10g10b10:
1483 case PICT_a8r8g8b8:
1484 case PICT_x8r8g8b8:
1485 case PICT_a1r5g5b5:
1486 case PICT_x1r5g5b5:
1487 default:
1488 cb_conf.comp_swap = 1; /* ARGB */
1489 break;
1490 case PICT_a2b10g10r10:
1491 case PICT_x2b10g10r10:
1492 case PICT_a8b8g8r8:
1493 case PICT_x8b8g8r8:
1494 cb_conf.comp_swap = 0; /* ABGR */
1495 break;
1496 case PICT_b8g8r8a8:
1497 case PICT_b8g8r8x8:
1498 cb_conf.comp_swap = 3; /* BGRA */
1499 break;
1500 case PICT_r5g6b5:
1501 cb_conf.comp_swap = 2; /* RGB */
1502 break;
1503 case PICT_a8:
1504 cb_conf.comp_swap = 3; /* A */
1505 break;
1506 }
1507 cb_conf.source_format = 1;
1508 cb_conf.blend_clamp = 1;
1509 cb_conf.blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
1510 cb_conf.blend_enable = 1;
1511 cb_conf.pmask = 0xf;
1512 cb_conf.rop = 3;
1513 if (accel_state->dst_obj.tiling_flags == 0)
1514 cb_conf.array_mode = 0;
1515 #if X_BYTE_ORDER == X_BIG_ENDIAN
1516 switch (dst_obj.bpp) {
1517 case 16:
1518 cb_conf.endian = ENDIAN_8IN16;
1519 break;
1520 case 32:
1521 cb_conf.endian = ENDIAN_8IN32;
1522 break;
1523 default:
1524 break;
1525 }
1526 #endif
1527 r600_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
1528
1529 if (pMask)
1530 r600_set_spi(pScrn, (2 - 1), 2);
1531 else
1532 r600_set_spi(pScrn, (1 - 1), 1);
1533
1534 if (!pSrc) {
1535 /* solid src color */
1536 R600SetSolidConsts(pScrn, &ps_alu_consts[0], pSrcPicture->format,
1537 pSrcPicture->pSourcePict->solidFill.color, 0);
1538 }
1539
1540 if (!pMaskPicture) {
1541 /* use identity constant if there is no mask */
1542 ps_alu_consts[4] = 1.0;
1543 ps_alu_consts[5] = 1.0;
1544 ps_alu_consts[6] = 1.0;
1545 ps_alu_consts[7] = 1.0;
1546 } else if (!pMask) {
1547 /* solid mask color */
1548 R600SetSolidConsts(pScrn, &ps_alu_consts[4], pMaskPicture->format,
1549 pMaskPicture->pSourcePict->solidFill.color, 1);
1550 }
1551
1552 r600_set_alu_consts(pScrn, SQ_ALU_CONSTANT_ps,
1553 sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
1554
1555 if (accel_state->vsync)
1556 RADEONVlineHelperClear(pScrn);
1557
1558 accel_state->composite_op = op;
1559 accel_state->dst_pic = pDstPicture;
1560 accel_state->src_pic = pSrcPicture;
1561 accel_state->dst_pix = pDst;
1562 accel_state->msk_pix = pMask;
1563 accel_state->src_pix = pSrc;
1564
1565 return TRUE;
1566 }
1567
R600FinishComposite(ScrnInfoPtr pScrn,PixmapPtr pDst,struct radeon_accel_state * accel_state)1568 static void R600FinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
1569 struct radeon_accel_state *accel_state)
1570 {
1571 int vtx_size;
1572
1573 if (accel_state->vsync)
1574 r600_cp_wait_vline_sync(pScrn, pDst,
1575 accel_state->vline_crtc,
1576 accel_state->vline_y1,
1577 accel_state->vline_y2);
1578
1579 vtx_size = accel_state->msk_pix ? 24 : 16;
1580
1581 r600_finish_op(pScrn, vtx_size);
1582 }
1583
R600DoneComposite(PixmapPtr pDst)1584 static void R600DoneComposite(PixmapPtr pDst)
1585 {
1586 ScreenPtr pScreen = pDst->drawable.pScreen;
1587 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1588 RADEONInfoPtr info = RADEONPTR(pScrn);
1589 struct radeon_accel_state *accel_state = info->accel_state;
1590
1591 R600FinishComposite(pScrn, pDst, accel_state);
1592 }
1593
R600Composite(PixmapPtr pDst,int srcX,int srcY,int maskX,int maskY,int dstX,int dstY,int w,int h)1594 static void R600Composite(PixmapPtr pDst,
1595 int srcX, int srcY,
1596 int maskX, int maskY,
1597 int dstX, int dstY,
1598 int w, int h)
1599 {
1600 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1601 RADEONInfoPtr info = RADEONPTR(pScrn);
1602 struct radeon_accel_state *accel_state = info->accel_state;
1603 float *vb;
1604
1605 /* ErrorF("R600Composite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
1606 srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
1607
1608 if (CS_FULL(info->cs)) {
1609 R600FinishComposite(pScrn, pDst, info->accel_state);
1610 radeon_cs_flush_indirect(pScrn);
1611 R600PrepareComposite(info->accel_state->composite_op,
1612 info->accel_state->src_pic,
1613 info->accel_state->msk_pic,
1614 info->accel_state->dst_pic,
1615 info->accel_state->src_pix,
1616 info->accel_state->msk_pix,
1617 info->accel_state->dst_pix);
1618 }
1619
1620 if (accel_state->vsync)
1621 RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
1622
1623 if (accel_state->msk_pix) {
1624
1625 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
1626
1627 vb[0] = (float)dstX;
1628 vb[1] = (float)dstY;
1629 vb[2] = (float)srcX;
1630 vb[3] = (float)srcY;
1631 vb[4] = (float)maskX;
1632 vb[5] = (float)maskY;
1633
1634 vb[6] = (float)dstX;
1635 vb[7] = (float)(dstY + h);
1636 vb[8] = (float)srcX;
1637 vb[9] = (float)(srcY + h);
1638 vb[10] = (float)maskX;
1639 vb[11] = (float)(maskY + h);
1640
1641 vb[12] = (float)(dstX + w);
1642 vb[13] = (float)(dstY + h);
1643 vb[14] = (float)(srcX + w);
1644 vb[15] = (float)(srcY + h);
1645 vb[16] = (float)(maskX + w);
1646 vb[17] = (float)(maskY + h);
1647
1648 radeon_vbo_commit(pScrn, &accel_state->vbo);
1649
1650 } else {
1651
1652 vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
1653
1654 vb[0] = (float)dstX;
1655 vb[1] = (float)dstY;
1656 vb[2] = (float)srcX;
1657 vb[3] = (float)srcY;
1658
1659 vb[4] = (float)dstX;
1660 vb[5] = (float)(dstY + h);
1661 vb[6] = (float)srcX;
1662 vb[7] = (float)(srcY + h);
1663
1664 vb[8] = (float)(dstX + w);
1665 vb[9] = (float)(dstY + h);
1666 vb[10] = (float)(srcX + w);
1667 vb[11] = (float)(srcY + h);
1668
1669 radeon_vbo_commit(pScrn, &accel_state->vbo);
1670 }
1671
1672
1673 }
1674
1675 static Bool
R600UploadToScreenCS(PixmapPtr pDst,int x,int y,int w,int h,char * src,int src_pitch)1676 R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
1677 char *src, int src_pitch)
1678 {
1679 ScrnInfoPtr pScrn = xf86ScreenToScrn(pDst->drawable.pScreen);
1680 RADEONInfoPtr info = RADEONPTR(pScrn);
1681 struct radeon_accel_state *accel_state = info->accel_state;
1682 struct radeon_exa_pixmap_priv *driver_priv;
1683 struct radeon_bo *scratch = NULL;
1684 struct radeon_bo *copy_dst;
1685 unsigned char *dst;
1686 unsigned size;
1687 uint32_t dst_domain;
1688 int bpp = pDst->drawable.bitsPerPixel;
1689 uint32_t scratch_pitch;
1690 uint32_t copy_pitch;
1691 uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
1692 int ret;
1693 Bool flush = TRUE;
1694 Bool r;
1695 int i;
1696 struct r600_accel_object src_obj, dst_obj;
1697 uint32_t height, base_align;
1698
1699 if (bpp < 8)
1700 return FALSE;
1701
1702 driver_priv = exaGetPixmapDriverPrivate(pDst);
1703 if (!driver_priv || !driver_priv->bo->bo.radeon)
1704 return FALSE;
1705
1706 /* If we know the BO won't be busy / in VRAM, don't bother with a scratch */
1707 copy_dst = driver_priv->bo->bo.radeon;
1708 copy_pitch = pDst->devKind;
1709 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1710 if (!radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
1711 flush = FALSE;
1712 if (!radeon_bo_is_busy(driver_priv->bo->bo.radeon, &dst_domain) &&
1713 !(dst_domain & RADEON_GEM_DOMAIN_VRAM))
1714 goto copy;
1715 }
1716 /* use cpu copy for fast fb access */
1717 if (info->is_fast_fb)
1718 goto copy;
1719 }
1720
1721 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1722 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1723 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1724 size = scratch_pitch * height * (bpp / 8);
1725 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1726 if (!scratch) {
1727 goto copy;
1728 }
1729
1730 src_obj.pitch = scratch_pitch;
1731 src_obj.width = w;
1732 src_obj.height = h;
1733 src_obj.bpp = bpp;
1734 src_obj.domain = RADEON_GEM_DOMAIN_GTT;
1735 src_obj.bo = scratch;
1736 src_obj.tiling_flags = 0;
1737 src_obj.surface = NULL;
1738
1739 dst_obj.pitch = dst_pitch_hw;
1740 dst_obj.width = pDst->drawable.width;
1741 dst_obj.height = pDst->drawable.height;
1742 dst_obj.bpp = bpp;
1743 dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
1744 dst_obj.bo = radeon_get_pixmap_bo(pDst)->bo.radeon;
1745 dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
1746 dst_obj.surface = radeon_get_pixmap_surface(pDst);
1747
1748 if (!R600SetAccelState(pScrn,
1749 &src_obj,
1750 NULL,
1751 &dst_obj,
1752 accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1753 3, 0xffffffff)) {
1754 goto copy;
1755 }
1756 copy_dst = scratch;
1757 copy_pitch = scratch_pitch * (bpp / 8);
1758 flush = FALSE;
1759
1760 copy:
1761 if (flush)
1762 radeon_cs_flush_indirect(pScrn);
1763
1764 ret = radeon_bo_map(copy_dst, 0);
1765 if (ret) {
1766 r = FALSE;
1767 goto out;
1768 }
1769 r = TRUE;
1770 size = w * bpp / 8;
1771 dst = copy_dst->ptr;
1772 if (copy_dst == driver_priv->bo->bo.radeon)
1773 dst += y * copy_pitch + x * bpp / 8;
1774 for (i = 0; i < h; i++) {
1775 memcpy(dst + i * copy_pitch, src, size);
1776 src += src_pitch;
1777 }
1778 radeon_bo_unmap(copy_dst);
1779
1780 if (copy_dst == scratch) {
1781 if (info->accel_state->vsync)
1782 RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
1783
1784 /* blit from gart to vram */
1785 R600DoPrepareCopy(pScrn);
1786 R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
1787 R600DoCopyVline(pDst);
1788 }
1789
1790 out:
1791 if (scratch)
1792 radeon_bo_unref(scratch);
1793 return r;
1794 }
1795
1796 static Bool
R600DownloadFromScreenCS(PixmapPtr pSrc,int x,int y,int w,int h,char * dst,int dst_pitch)1797 R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
1798 int h, char *dst, int dst_pitch)
1799 {
1800 ScrnInfoPtr pScrn = xf86ScreenToScrn(pSrc->drawable.pScreen);
1801 RADEONInfoPtr info = RADEONPTR(pScrn);
1802 struct radeon_accel_state *accel_state = info->accel_state;
1803 struct radeon_exa_pixmap_priv *driver_priv;
1804 struct radeon_bo *scratch = NULL;
1805 struct radeon_bo *copy_src;
1806 unsigned size;
1807 uint32_t src_domain = 0;
1808 int bpp = pSrc->drawable.bitsPerPixel;
1809 uint32_t scratch_pitch;
1810 uint32_t copy_pitch;
1811 uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
1812 int ret;
1813 Bool flush = FALSE;
1814 Bool r;
1815 struct r600_accel_object src_obj, dst_obj;
1816 uint32_t height, base_align;
1817
1818 if (bpp < 8)
1819 return FALSE;
1820
1821 driver_priv = exaGetPixmapDriverPrivate(pSrc);
1822 if (!driver_priv || !driver_priv->bo->bo.radeon)
1823 return FALSE;
1824
1825 /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
1826 copy_src = driver_priv->bo->bo.radeon;
1827 copy_pitch = pSrc->devKind;
1828 if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
1829 if (radeon_bo_is_referenced_by_cs(driver_priv->bo->bo.radeon, info->cs)) {
1830 src_domain = radeon_bo_get_src_domain(driver_priv->bo->bo.radeon);
1831 if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
1832 (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
1833 src_domain = 0;
1834 else /* A write may be scheduled */
1835 flush = TRUE;
1836 }
1837
1838 if (!src_domain)
1839 radeon_bo_is_busy(driver_priv->bo->bo.radeon, &src_domain);
1840
1841 if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
1842 goto copy;
1843 }
1844
1845 scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
1846 height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
1847 base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
1848 size = scratch_pitch * height * (bpp / 8);
1849 scratch = radeon_bo_open(info->bufmgr, 0, size, base_align, RADEON_GEM_DOMAIN_GTT, 0);
1850 if (!scratch) {
1851 goto copy;
1852 }
1853 radeon_cs_space_reset_bos(info->cs);
1854 radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
1855 RADEON_GEM_DOMAIN_VRAM, 0);
1856 accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
1857 radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
1858 accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1859 radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
1860 ret = radeon_cs_space_check(info->cs);
1861 if (ret) {
1862 goto copy;
1863 }
1864
1865 src_obj.pitch = src_pitch_hw;
1866 src_obj.width = pSrc->drawable.width;
1867 src_obj.height = pSrc->drawable.height;
1868 src_obj.bpp = bpp;
1869 src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
1870 src_obj.bo = radeon_get_pixmap_bo(pSrc)->bo.radeon;
1871 src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
1872 src_obj.surface = radeon_get_pixmap_surface(pSrc);
1873
1874 dst_obj.pitch = scratch_pitch;
1875 dst_obj.width = w;
1876 dst_obj.height = h;
1877 dst_obj.bo = scratch;
1878 dst_obj.bpp = bpp;
1879 dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
1880 dst_obj.tiling_flags = 0;
1881 dst_obj.surface = NULL;
1882
1883 if (!R600SetAccelState(pScrn,
1884 &src_obj,
1885 NULL,
1886 &dst_obj,
1887 accel_state->copy_vs_offset, accel_state->copy_ps_offset,
1888 3, 0xffffffff)) {
1889 goto copy;
1890 }
1891
1892 /* blit from vram to gart */
1893 R600DoPrepareCopy(pScrn);
1894 R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
1895 R600DoCopy(pScrn);
1896 copy_src = scratch;
1897 copy_pitch = scratch_pitch * (bpp / 8);
1898 flush = TRUE;
1899
1900 copy:
1901 if (flush)
1902 radeon_cs_flush_indirect(pScrn);
1903
1904 ret = radeon_bo_map(copy_src, 0);
1905 if (ret) {
1906 ErrorF("failed to map pixmap: %d\n", ret);
1907 r = FALSE;
1908 goto out;
1909 }
1910 r = TRUE;
1911 w *= bpp / 8;
1912 if (copy_src == driver_priv->bo->bo.radeon)
1913 size = y * copy_pitch + x * bpp / 8;
1914 else
1915 size = 0;
1916 while (h--) {
1917 memcpy(dst, copy_src->ptr + size, w);
1918 size += copy_pitch;
1919 dst += dst_pitch;
1920 }
1921 radeon_bo_unmap(copy_src);
1922 out:
1923 if (scratch)
1924 radeon_bo_unref(scratch);
1925 return r;
1926 }
1927
1928 static int
R600MarkSync(ScreenPtr pScreen)1929 R600MarkSync(ScreenPtr pScreen)
1930 {
1931 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1932 RADEONInfoPtr info = RADEONPTR(pScrn);
1933 struct radeon_accel_state *accel_state = info->accel_state;
1934
1935 return ++accel_state->exaSyncMarker;
1936
1937 }
1938
1939 static void
R600Sync(ScreenPtr pScreen,int marker)1940 R600Sync(ScreenPtr pScreen, int marker)
1941 {
1942 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
1943 RADEONInfoPtr info = RADEONPTR(pScrn);
1944 struct radeon_accel_state *accel_state = info->accel_state;
1945
1946 if (accel_state->exaMarkerSynced != marker) {
1947 accel_state->exaMarkerSynced = marker;
1948 }
1949
1950 }
1951
1952 static Bool
R600AllocShaders(ScrnInfoPtr pScrn,ScreenPtr pScreen)1953 R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
1954 {
1955 RADEONInfoPtr info = RADEONPTR(pScrn);
1956 struct radeon_accel_state *accel_state = info->accel_state;
1957
1958 /* 512 bytes per shader for now */
1959 int size = 512 * 9;
1960
1961 accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
1962 RADEON_GEM_DOMAIN_VRAM, 0);
1963 if (!accel_state->shaders_bo) {
1964 ErrorF("Allocating shader failed\n");
1965 return FALSE;
1966 }
1967 return TRUE;
1968 }
1969
1970 Bool
R600LoadShaders(ScrnInfoPtr pScrn)1971 R600LoadShaders(ScrnInfoPtr pScrn)
1972 {
1973 RADEONInfoPtr info = RADEONPTR(pScrn);
1974 struct radeon_accel_state *accel_state = info->accel_state;
1975 RADEONChipFamily ChipSet = info->ChipFamily;
1976 uint32_t *shader;
1977 int ret;
1978
1979 ret = radeon_bo_map(accel_state->shaders_bo, 1);
1980 if (ret) {
1981 FatalError("failed to map shader %d\n", ret);
1982 return FALSE;
1983 }
1984 shader = accel_state->shaders_bo->ptr;
1985
1986 /* solid vs --------------------------------------- */
1987 accel_state->solid_vs_offset = 0;
1988 R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
1989
1990 /* solid ps --------------------------------------- */
1991 accel_state->solid_ps_offset = 512;
1992 R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
1993
1994 /* copy vs --------------------------------------- */
1995 accel_state->copy_vs_offset = 1024;
1996 R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
1997
1998 /* copy ps --------------------------------------- */
1999 accel_state->copy_ps_offset = 1536;
2000 R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
2001
2002 /* comp vs --------------------------------------- */
2003 accel_state->comp_vs_offset = 2048;
2004 R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
2005
2006 /* comp ps --------------------------------------- */
2007 accel_state->comp_ps_offset = 2560;
2008 R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
2009
2010 /* xv vs --------------------------------------- */
2011 accel_state->xv_vs_offset = 3072;
2012 R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
2013
2014 /* xv ps --------------------------------------- */
2015 accel_state->xv_ps_offset = 3584;
2016 R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
2017
2018 radeon_bo_unmap(accel_state->shaders_bo);
2019 return TRUE;
2020 }
2021
2022 Bool
R600DrawInit(ScreenPtr pScreen)2023 R600DrawInit(ScreenPtr pScreen)
2024 {
2025 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
2026 RADEONInfoPtr info = RADEONPTR(pScrn);
2027
2028 if (!info->accel_state->exa) {
2029 xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
2030 return FALSE;
2031 }
2032
2033 info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
2034 info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
2035
2036 info->accel_state->exa->PrepareSolid = R600PrepareSolid;
2037 info->accel_state->exa->Solid = R600Solid;
2038 info->accel_state->exa->DoneSolid = R600DoneSolid;
2039
2040 info->accel_state->exa->PrepareCopy = R600PrepareCopy;
2041 info->accel_state->exa->Copy = R600Copy;
2042 info->accel_state->exa->DoneCopy = R600DoneCopy;
2043
2044 info->accel_state->exa->MarkSync = R600MarkSync;
2045 info->accel_state->exa->WaitMarker = R600Sync;
2046
2047 info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
2048 info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
2049 info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
2050 info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
2051 info->accel_state->exa->UploadToScreen = R600UploadToScreenCS;
2052 info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreenCS;
2053 info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
2054 info->accel_state->exa->SharePixmapBacking = RADEONEXASharePixmapBacking;
2055 info->accel_state->exa->SetSharedPixmapBacking = RADEONEXASetSharedPixmapBacking;
2056 info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS | EXA_SUPPORTS_PREPARE_AUX |
2057 EXA_HANDLES_PIXMAPS | EXA_MIXED_PIXMAPS;
2058 info->accel_state->exa->pixmapOffsetAlign = 256;
2059 info->accel_state->exa->pixmapPitchAlign = 256;
2060
2061 info->accel_state->exa->CheckComposite = R600CheckComposite;
2062 info->accel_state->exa->PrepareComposite = R600PrepareComposite;
2063 info->accel_state->exa->Composite = R600Composite;
2064 info->accel_state->exa->DoneComposite = R600DoneComposite;
2065
2066 info->accel_state->exa->maxPitchBytes = 32768;
2067 info->accel_state->exa->maxX = 8192;
2068 info->accel_state->exa->maxY = 8192;
2069
2070 /* not supported yet */
2071 if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
2072 xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
2073 info->accel_state->vsync = TRUE;
2074 } else
2075 info->accel_state->vsync = FALSE;
2076
2077 if (!exaDriverInit(pScreen, info->accel_state->exa)) {
2078 free(info->accel_state->exa);
2079 return FALSE;
2080 }
2081
2082 info->accel_state->XInited3D = FALSE;
2083 info->accel_state->src_obj[0].bo = NULL;
2084 info->accel_state->src_obj[1].bo = NULL;
2085 info->accel_state->dst_obj.bo = NULL;
2086 info->accel_state->copy_area_bo = NULL;
2087 info->accel_state->vbo.vb_start_op = -1;
2088 info->accel_state->finish_op = r600_finish_op;
2089 info->accel_state->vbo.verts_per_op = 3;
2090 RADEONVlineHelperClear(pScrn);
2091
2092 radeon_vbo_init_lists(pScrn);
2093
2094 if (!R600AllocShaders(pScrn, pScreen))
2095 return FALSE;
2096
2097 if (!R600LoadShaders(pScrn))
2098 return FALSE;
2099
2100 exaMarkSync(pScreen);
2101
2102 return TRUE;
2103
2104 }
2105
2106