1 /*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Wang Zhenyu <zhenyu.z.wang@sna.com>
26 * Eric Anholt <eric@anholt.net>
27 * Carl Worth <cworth@redhat.com>
28 * Keith Packard <keithp@keithp.com>
29 * Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36
37 #include "sna.h"
38 #include "sna_reg.h"
39 #include "sna_render.h"
40 #include "sna_render_inline.h"
41 #include "sna_video.h"
42
43 #include "brw/brw.h"
44 #include "gen5_render.h"
45 #include "gen4_common.h"
46 #include "gen4_source.h"
47 #include "gen4_vertex.h"
48
49 #define NO_COMPOSITE 0
50 #define NO_COMPOSITE_SPANS 0
51
52 #define PREFER_BLT_FILL 1
53
54 #define DBG_NO_STATE_CACHE 0
55 #define DBG_NO_SURFACE_CACHE 0
56
57 #define ALWAYS_FLUSH 0
58
59 #define MAX_3D_SIZE 8192
60
61 #define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
62
63 /* Set up a default static partitioning of the URB, which is supposed to
64 * allow anything we would want to do, at potentially lower performance.
65 */
66 #define URB_CS_ENTRY_SIZE 1
67 #define URB_CS_ENTRIES 0
68
69 #define URB_VS_ENTRY_SIZE 1
70 #define URB_VS_ENTRIES 256 /* minimum of 8 */
71
72 #define URB_GS_ENTRY_SIZE 0
73 #define URB_GS_ENTRIES 0
74
75 #define URB_CLIP_ENTRY_SIZE 0
76 #define URB_CLIP_ENTRIES 0
77
78 #define URB_SF_ENTRY_SIZE 2
79 #define URB_SF_ENTRIES 64
80
81 /*
82 * this program computes dA/dx and dA/dy for the texture coordinates along
83 * with the base texture coordinate. It was extracted from the Mesa driver
84 */
85
86 #define SF_KERNEL_NUM_GRF 16
87 #define SF_MAX_THREADS 48
88
89 #define PS_KERNEL_NUM_GRF 32
90 #define PS_MAX_THREADS 72
91
92 static const uint32_t ps_kernel_packed_bt601_static[][4] = {
93 #include "exa_wm_xy.g5b"
94 #include "exa_wm_src_affine.g5b"
95 #include "exa_wm_src_sample_argb.g5b"
96 #include "exa_wm_yuv_rgb_bt601.g5b"
97 #include "exa_wm_write.g5b"
98 };
99
100 static const uint32_t ps_kernel_planar_bt601_static[][4] = {
101 #include "exa_wm_xy.g5b"
102 #include "exa_wm_src_affine.g5b"
103 #include "exa_wm_src_sample_planar.g5b"
104 #include "exa_wm_yuv_rgb_bt601.g5b"
105 #include "exa_wm_write.g5b"
106 };
107
108 static const uint32_t ps_kernel_nv12_bt601_static[][4] = {
109 #include "exa_wm_xy.g5b"
110 #include "exa_wm_src_affine.g5b"
111 #include "exa_wm_src_sample_nv12.g5b"
112 #include "exa_wm_yuv_rgb_bt601.g5b"
113 #include "exa_wm_write.g5b"
114 };
115
116 static const uint32_t ps_kernel_packed_bt709_static[][4] = {
117 #include "exa_wm_xy.g5b"
118 #include "exa_wm_src_affine.g5b"
119 #include "exa_wm_src_sample_argb.g5b"
120 #include "exa_wm_yuv_rgb_bt709.g5b"
121 #include "exa_wm_write.g5b"
122 };
123
124 static const uint32_t ps_kernel_planar_bt709_static[][4] = {
125 #include "exa_wm_xy.g5b"
126 #include "exa_wm_src_affine.g5b"
127 #include "exa_wm_src_sample_planar.g5b"
128 #include "exa_wm_yuv_rgb_bt709.g5b"
129 #include "exa_wm_write.g5b"
130 };
131
132 static const uint32_t ps_kernel_nv12_bt709_static[][4] = {
133 #include "exa_wm_xy.g5b"
134 #include "exa_wm_src_affine.g5b"
135 #include "exa_wm_src_sample_nv12.g5b"
136 #include "exa_wm_yuv_rgb_bt709.g5b"
137 #include "exa_wm_write.g5b"
138 };
139
140 #define NOKERNEL(kernel_enum, func, masked) \
141 [kernel_enum] = {func, 0, masked}
142 #define KERNEL(kernel_enum, kernel, masked) \
143 [kernel_enum] = {&kernel, sizeof(kernel), masked}
144 static const struct wm_kernel_info {
145 const void *data;
146 unsigned int size;
147 bool has_mask;
148 } wm_kernels[] = {
149 NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
150 NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
151
152 NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
153 NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
154
155 NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
156 NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
157
158 NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
159 NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
160
161 NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
162 NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
163
164 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT601, ps_kernel_planar_bt601_static, false),
165 KERNEL(WM_KERNEL_VIDEO_NV12_BT601, ps_kernel_nv12_bt601_static, false),
166 KERNEL(WM_KERNEL_VIDEO_PACKED_BT601, ps_kernel_packed_bt601_static, false),
167
168 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT709, ps_kernel_planar_bt709_static, false),
169 KERNEL(WM_KERNEL_VIDEO_NV12_BT709, ps_kernel_nv12_bt709_static, false),
170 KERNEL(WM_KERNEL_VIDEO_PACKED_BT709, ps_kernel_packed_bt709_static, false),
171 };
172 #undef KERNEL
173
174 static const struct blendinfo {
175 bool src_alpha;
176 uint32_t src_blend;
177 uint32_t dst_blend;
178 } gen5_blend_op[] = {
179 /* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
180 /* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
181 /* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
182 /* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
183 /* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
184 /* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
185 /* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
186 /* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
187 /* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
188 /* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
189 /* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
190 /* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
191 /* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
192 };
193
194 /**
195 * Highest-valued BLENDFACTOR used in gen5_blend_op.
196 *
197 * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
198 * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
199 * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
200 */
201 #define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
202
203 #define BLEND_OFFSET(s, d) \
204 (((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
205
206 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
207 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
208
209 static bool
210 gen5_emit_pipelined_pointers(struct sna *sna,
211 const struct sna_composite_op *op,
212 int blend, int kernel);
213
214 #define OUT_BATCH(v) batch_emit(sna, v)
215 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
216 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
217
too_large(int width,int height)218 static inline bool too_large(int width, int height)
219 {
220 return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
221 }
222
223 static int
gen5_choose_composite_kernel(int op,bool has_mask,bool is_ca,bool is_affine)224 gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
225 {
226 int base;
227
228 if (has_mask) {
229 if (is_ca) {
230 if (gen5_blend_op[op].src_alpha)
231 base = WM_KERNEL_MASKSA;
232 else
233 base = WM_KERNEL_MASKCA;
234 } else
235 base = WM_KERNEL_MASK;
236 } else
237 base = WM_KERNEL;
238
239 return base + !is_affine;
240 }
241
gen5_magic_ca_pass(struct sna * sna,const struct sna_composite_op * op)242 static bool gen5_magic_ca_pass(struct sna *sna,
243 const struct sna_composite_op *op)
244 {
245 struct gen5_render_state *state = &sna->render_state.gen5;
246
247 if (!op->need_magic_ca_pass)
248 return false;
249
250 assert(sna->render.vertex_index > sna->render.vertex_start);
251
252 DBG(("%s: CA fixup\n", __FUNCTION__));
253 assert(op->mask.bo != NULL);
254 assert(op->has_component_alpha);
255
256 gen5_emit_pipelined_pointers
257 (sna, op, PictOpAdd,
258 gen5_choose_composite_kernel(PictOpAdd,
259 true, true, op->is_affine));
260
261 OUT_BATCH(GEN5_3DPRIMITIVE |
262 GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
263 (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
264 (0 << 9) |
265 4);
266 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
267 OUT_BATCH(sna->render.vertex_start);
268 OUT_BATCH(1); /* single instance */
269 OUT_BATCH(0); /* start instance location */
270 OUT_BATCH(0); /* index buffer offset, ignored */
271
272 state->last_primitive = sna->kgem.nbatch;
273 return true;
274 }
275
gen5_get_blend(int op,bool has_component_alpha,uint32_t dst_format)276 static uint32_t gen5_get_blend(int op,
277 bool has_component_alpha,
278 uint32_t dst_format)
279 {
280 uint32_t src, dst;
281
282 src = gen5_blend_op[op].src_blend;
283 dst = gen5_blend_op[op].dst_blend;
284
285 /* If there's no dst alpha channel, adjust the blend op so that we'll treat
286 * it as always 1.
287 */
288 if (PICT_FORMAT_A(dst_format) == 0) {
289 if (src == GEN5_BLENDFACTOR_DST_ALPHA)
290 src = GEN5_BLENDFACTOR_ONE;
291 else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
292 src = GEN5_BLENDFACTOR_ZERO;
293 }
294
295 /* If the source alpha is being used, then we should only be in a
296 * case where the source blend factor is 0, and the source blend
297 * value is the mask channels multiplied by the source picture's alpha.
298 */
299 if (has_component_alpha && gen5_blend_op[op].src_alpha) {
300 if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
301 dst = GEN5_BLENDFACTOR_SRC_COLOR;
302 else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
303 dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
304 }
305
306 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
307 op, dst_format, PICT_FORMAT_A(dst_format),
308 src, dst, BLEND_OFFSET(src, dst)));
309 return BLEND_OFFSET(src, dst);
310 }
311
gen5_get_card_format(PictFormat format)312 static uint32_t gen5_get_card_format(PictFormat format)
313 {
314 switch (format) {
315 default:
316 return -1;
317 case PICT_a8r8g8b8:
318 return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
319 case PICT_x8r8g8b8:
320 return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
321 case PICT_a8b8g8r8:
322 return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
323 case PICT_x8b8g8r8:
324 return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
325 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
326 case PICT_a2r10g10b10:
327 return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
328 case PICT_x2r10g10b10:
329 return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
330 #endif
331 case PICT_r8g8b8:
332 return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
333 case PICT_r5g6b5:
334 return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
335 case PICT_a1r5g5b5:
336 return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
337 case PICT_a8:
338 return GEN5_SURFACEFORMAT_A8_UNORM;
339 case PICT_a4r4g4b4:
340 return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
341 }
342 }
343
gen5_get_dest_format(PictFormat format)344 static uint32_t gen5_get_dest_format(PictFormat format)
345 {
346 switch (format) {
347 default:
348 return -1;
349 case PICT_a8r8g8b8:
350 case PICT_x8r8g8b8:
351 return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
352 case PICT_a8b8g8r8:
353 case PICT_x8b8g8r8:
354 return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
355 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
356 case PICT_a2r10g10b10:
357 case PICT_x2r10g10b10:
358 return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
359 #endif
360 case PICT_r5g6b5:
361 return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
362 case PICT_x1r5g5b5:
363 case PICT_a1r5g5b5:
364 return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
365 case PICT_a8:
366 return GEN5_SURFACEFORMAT_A8_UNORM;
367 case PICT_a4r4g4b4:
368 case PICT_x4r4g4b4:
369 return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
370 }
371 }
372
gen5_check_dst_format(PictFormat format)373 static bool gen5_check_dst_format(PictFormat format)
374 {
375 if (gen5_get_dest_format(format) != -1)
376 return true;
377
378 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
379 return false;
380 }
381
gen5_check_format(uint32_t format)382 static bool gen5_check_format(uint32_t format)
383 {
384 if (gen5_get_card_format(format) != -1)
385 return true;
386
387 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
388 return false;
389 }
390
391 typedef struct gen5_surface_state_padded {
392 struct gen5_surface_state state;
393 char pad[32 - sizeof(struct gen5_surface_state)];
394 } gen5_surface_state_padded;
395
null_create(struct sna_static_stream * stream)396 static void null_create(struct sna_static_stream *stream)
397 {
398 /* A bunch of zeros useful for legacy border color and depth-stencil */
399 sna_static_stream_map(stream, 64, 64);
400 }
401
402 static void
sampler_state_init(struct gen5_sampler_state * sampler_state,sampler_filter_t filter,sampler_extend_t extend)403 sampler_state_init(struct gen5_sampler_state *sampler_state,
404 sampler_filter_t filter,
405 sampler_extend_t extend)
406 {
407 sampler_state->ss0.lod_preclamp = 1; /* GL mode */
408
409 /* We use the legacy mode to get the semantics specified by
410 * the Render extension. */
411 sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
412
413 switch (filter) {
414 default:
415 case SAMPLER_FILTER_NEAREST:
416 sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
417 sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
418 break;
419 case SAMPLER_FILTER_BILINEAR:
420 sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
421 sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
422 break;
423 }
424
425 switch (extend) {
426 default:
427 case SAMPLER_EXTEND_NONE:
428 sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
429 sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
430 sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
431 break;
432 case SAMPLER_EXTEND_REPEAT:
433 sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
434 sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
435 sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
436 break;
437 case SAMPLER_EXTEND_PAD:
438 sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
439 sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
440 sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
441 break;
442 case SAMPLER_EXTEND_REFLECT:
443 sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
444 sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
445 sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
446 break;
447 }
448 }
449
gen5_filter(uint32_t filter)450 static uint32_t gen5_filter(uint32_t filter)
451 {
452 switch (filter) {
453 default:
454 assert(0);
455 case PictFilterNearest:
456 return SAMPLER_FILTER_NEAREST;
457 case PictFilterBilinear:
458 return SAMPLER_FILTER_BILINEAR;
459 }
460 }
461
gen5_check_filter(PicturePtr picture)462 static uint32_t gen5_check_filter(PicturePtr picture)
463 {
464 switch (picture->filter) {
465 case PictFilterNearest:
466 case PictFilterBilinear:
467 return true;
468 default:
469 DBG(("%s: unknown filter: %x\n", __FUNCTION__, picture->filter));
470 return false;
471 }
472 }
473
gen5_repeat(uint32_t repeat)474 static uint32_t gen5_repeat(uint32_t repeat)
475 {
476 switch (repeat) {
477 default:
478 assert(0);
479 case RepeatNone:
480 return SAMPLER_EXTEND_NONE;
481 case RepeatNormal:
482 return SAMPLER_EXTEND_REPEAT;
483 case RepeatPad:
484 return SAMPLER_EXTEND_PAD;
485 case RepeatReflect:
486 return SAMPLER_EXTEND_REFLECT;
487 }
488 }
489
gen5_check_repeat(PicturePtr picture)490 static bool gen5_check_repeat(PicturePtr picture)
491 {
492 if (!picture->repeat)
493 return true;
494
495 switch (picture->repeatType) {
496 case RepeatNone:
497 case RepeatNormal:
498 case RepeatPad:
499 case RepeatReflect:
500 return true;
501 default:
502 DBG(("%s: unknown repeat: %x\n",
503 __FUNCTION__, picture->repeatType));
504 return false;
505 }
506 }
507
508 static uint32_t
gen5_tiling_bits(uint32_t tiling)509 gen5_tiling_bits(uint32_t tiling)
510 {
511 switch (tiling) {
512 default: assert(0);
513 case I915_TILING_NONE: return 0;
514 case I915_TILING_X: return GEN5_SURFACE_TILED;
515 case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
516 }
517 }
518
519 /**
520 * Sets up the common fields for a surface state buffer for the given
521 * picture in the given surface state buffer.
522 */
523 static uint32_t
gen5_bind_bo(struct sna * sna,struct kgem_bo * bo,uint32_t width,uint32_t height,uint32_t format,bool is_dst)524 gen5_bind_bo(struct sna *sna,
525 struct kgem_bo *bo,
526 uint32_t width,
527 uint32_t height,
528 uint32_t format,
529 bool is_dst)
530 {
531 uint32_t domains;
532 uint16_t offset;
533 uint32_t *ss;
534
535 /* After the first bind, we manage the cache domains within the batch */
536 if (!DBG_NO_SURFACE_CACHE) {
537 offset = kgem_bo_get_binding(bo, format | is_dst << 31);
538 if (offset) {
539 if (is_dst)
540 kgem_bo_mark_dirty(bo);
541 assert(offset >= sna->kgem.surface);
542 return offset * sizeof(uint32_t);
543 }
544 }
545
546 offset = sna->kgem.surface -=
547 sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
548 ss = sna->kgem.batch + offset;
549
550 ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
551 GEN5_SURFACE_BLEND_ENABLED |
552 format << GEN5_SURFACE_FORMAT_SHIFT);
553
554 if (is_dst) {
555 ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
556 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
557 } else
558 domains = I915_GEM_DOMAIN_SAMPLER << 16;
559 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
560
561 ss[2] = ((width - 1) << GEN5_SURFACE_WIDTH_SHIFT |
562 (height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
563 ss[3] = (gen5_tiling_bits(bo->tiling) |
564 (bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
565 ss[4] = 0;
566 ss[5] = 0;
567
568 kgem_bo_set_binding(bo, format | is_dst << 31, offset);
569
570 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
571 offset, bo->handle, ss[1],
572 format, width, height, bo->pitch, bo->tiling,
573 domains & 0xffff ? "render" : "sampler"));
574
575 return offset * sizeof(uint32_t);
576 }
577
gen5_emit_vertex_buffer(struct sna * sna,const struct sna_composite_op * op)578 static void gen5_emit_vertex_buffer(struct sna *sna,
579 const struct sna_composite_op *op)
580 {
581 int id = op->u.gen5.ve_id;
582
583 assert((sna->render.vb_id & (1 << id)) == 0);
584
585 OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
586 OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
587 (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
588 assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
589 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
590 OUT_BATCH(0);
591 OUT_BATCH(~0); /* max address: disabled */
592 OUT_BATCH(0);
593
594 sna->render.vb_id |= 1 << id;
595 }
596
gen5_emit_primitive(struct sna * sna)597 static void gen5_emit_primitive(struct sna *sna)
598 {
599 if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
600 sna->render.vertex_offset = sna->kgem.nbatch - 5;
601 return;
602 }
603
604 OUT_BATCH(GEN5_3DPRIMITIVE |
605 GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
606 (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
607 (0 << 9) |
608 4);
609 sna->render.vertex_offset = sna->kgem.nbatch;
610 OUT_BATCH(0); /* vertex count, to be filled in later */
611 OUT_BATCH(sna->render.vertex_index);
612 OUT_BATCH(1); /* single instance */
613 OUT_BATCH(0); /* start instance location */
614 OUT_BATCH(0); /* index buffer offset, ignored */
615 sna->render.vertex_start = sna->render.vertex_index;
616
617 sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
618 }
619
gen5_rectangle_begin(struct sna * sna,const struct sna_composite_op * op)620 static bool gen5_rectangle_begin(struct sna *sna,
621 const struct sna_composite_op *op)
622 {
623 int id = op->u.gen5.ve_id;
624 int ndwords;
625
626 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
627 return true;
628
629 ndwords = op->need_magic_ca_pass ? 20 : 6;
630 if ((sna->render.vb_id & (1 << id)) == 0)
631 ndwords += 5;
632
633 if (!kgem_check_batch(&sna->kgem, ndwords))
634 return false;
635
636 if ((sna->render.vb_id & (1 << id)) == 0)
637 gen5_emit_vertex_buffer(sna, op);
638 if (sna->render.vertex_offset == 0)
639 gen5_emit_primitive(sna);
640
641 return true;
642 }
643
gen5_get_rectangles__flush(struct sna * sna,const struct sna_composite_op * op)644 static int gen5_get_rectangles__flush(struct sna *sna,
645 const struct sna_composite_op *op)
646 {
647 /* Preventing discarding new vbo after lock contention */
648 if (sna_vertex_wait__locked(&sna->render)) {
649 int rem = vertex_space(sna);
650 if (rem > op->floats_per_rect)
651 return rem;
652 }
653
654 if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 40 : 6))
655 return 0;
656 if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
657 return 0;
658
659 if (sna->render.vertex_offset) {
660 gen4_vertex_flush(sna);
661 if (gen5_magic_ca_pass(sna, op))
662 gen5_emit_pipelined_pointers(sna, op, op->op,
663 op->u.gen5.wm_kernel);
664 }
665
666 return gen4_vertex_finish(sna);
667 }
668
gen5_get_rectangles(struct sna * sna,const struct sna_composite_op * op,int want,void (* emit_state)(struct sna * sna,const struct sna_composite_op * op))669 inline static int gen5_get_rectangles(struct sna *sna,
670 const struct sna_composite_op *op,
671 int want,
672 void (*emit_state)(struct sna *sna,
673 const struct sna_composite_op *op))
674 {
675 int rem;
676
677 assert(want);
678
679 start:
680 rem = vertex_space(sna);
681 if (unlikely(rem < op->floats_per_rect)) {
682 DBG(("flushing vbo for %s: %d < %d\n",
683 __FUNCTION__, rem, op->floats_per_rect));
684 rem = gen5_get_rectangles__flush(sna, op);
685 if (unlikely (rem == 0))
686 goto flush;
687 }
688
689 if (unlikely(sna->render.vertex_offset == 0)) {
690 if (!gen5_rectangle_begin(sna, op))
691 goto flush;
692 else
693 goto start;
694 }
695
696 assert(rem <= vertex_space(sna));
697 assert(op->floats_per_rect <= rem);
698 if (want > 1 && want * op->floats_per_rect > rem)
699 want = rem / op->floats_per_rect;
700
701 sna->render.vertex_index += 3*want;
702 return want;
703
704 flush:
705 if (sna->render.vertex_offset) {
706 gen4_vertex_flush(sna);
707 gen5_magic_ca_pass(sna, op);
708 }
709 sna_vertex_wait__locked(&sna->render);
710 _kgem_submit(&sna->kgem);
711 emit_state(sna, op);
712 goto start;
713 }
714
715 static uint32_t *
gen5_composite_get_binding_table(struct sna * sna,uint16_t * offset)716 gen5_composite_get_binding_table(struct sna *sna,
717 uint16_t *offset)
718 {
719 sna->kgem.surface -=
720 sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
721
722 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
723
724 /* Clear all surplus entries to zero in case of prefetch */
725 *offset = sna->kgem.surface;
726 return memset(sna->kgem.batch + sna->kgem.surface,
727 0, sizeof(struct gen5_surface_state_padded));
728 }
729
730 static void
gen5_emit_urb(struct sna * sna)731 gen5_emit_urb(struct sna *sna)
732 {
733 int urb_vs_start, urb_vs_size;
734 int urb_gs_start, urb_gs_size;
735 int urb_clip_start, urb_clip_size;
736 int urb_sf_start, urb_sf_size;
737 int urb_cs_start, urb_cs_size;
738
739 urb_vs_start = 0;
740 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
741 urb_gs_start = urb_vs_start + urb_vs_size;
742 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
743 urb_clip_start = urb_gs_start + urb_gs_size;
744 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
745 urb_sf_start = urb_clip_start + urb_clip_size;
746 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
747 urb_cs_start = urb_sf_start + urb_sf_size;
748 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
749
750 OUT_BATCH(GEN5_URB_FENCE |
751 UF0_CS_REALLOC |
752 UF0_SF_REALLOC |
753 UF0_CLIP_REALLOC |
754 UF0_GS_REALLOC |
755 UF0_VS_REALLOC |
756 1);
757 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
758 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
759 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
760 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
761 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
762
763 /* Constant buffer state */
764 OUT_BATCH(GEN5_CS_URB_STATE | 0);
765 OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
766 }
767
768 static void
gen5_emit_state_base_address(struct sna * sna)769 gen5_emit_state_base_address(struct sna *sna)
770 {
771 assert(sna->render_state.gen5.general_bo->proxy == NULL);
772 OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
773 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
774 sna->kgem.nbatch,
775 sna->render_state.gen5.general_bo,
776 I915_GEM_DOMAIN_INSTRUCTION << 16,
777 BASE_ADDRESS_MODIFY));
778 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
779 sna->kgem.nbatch,
780 NULL,
781 I915_GEM_DOMAIN_INSTRUCTION << 16,
782 BASE_ADDRESS_MODIFY));
783 OUT_BATCH(0); /* media */
784 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
785 sna->kgem.nbatch,
786 sna->render_state.gen5.general_bo,
787 I915_GEM_DOMAIN_INSTRUCTION << 16,
788 BASE_ADDRESS_MODIFY));
789
790 /* upper bounds, all disabled */
791 OUT_BATCH(BASE_ADDRESS_MODIFY);
792 OUT_BATCH(0);
793 OUT_BATCH(BASE_ADDRESS_MODIFY);
794 }
795
796 static void
gen5_emit_invariant(struct sna * sna)797 gen5_emit_invariant(struct sna *sna)
798 {
799 /* Ironlake errata workaround: Before disabling the clipper,
800 * you have to MI_FLUSH to get the pipeline idle.
801 *
802 * However, the kernel flushes the pipeline between batches,
803 * so we should be safe....
804 *
805 * On the other hand, after using BLT we must use a non-pipelined
806 * operation...
807 */
808 if (sna->kgem.nreloc)
809 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
810
811 OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
812
813 gen5_emit_state_base_address(sna);
814
815 sna->render_state.gen5.needs_invariant = false;
816 }
817
818 static void
gen5_get_batch(struct sna * sna,const struct sna_composite_op * op)819 gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
820 {
821 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
822
823 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
824 DBG(("%s: flushing batch: %d < %d+%d\n",
825 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
826 150, 4*8));
827 kgem_submit(&sna->kgem);
828 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
829 }
830
831 if (sna->render_state.gen5.needs_invariant)
832 gen5_emit_invariant(sna);
833 }
834
835 static void
gen5_align_vertex(struct sna * sna,const struct sna_composite_op * op)836 gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
837 {
838 assert(op->floats_per_rect == 3*op->floats_per_vertex);
839 if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
840 DBG(("aligning vertex: was %d, now %d floats per vertex\n",
841 sna->render_state.gen5.floats_per_vertex,
842 op->floats_per_vertex));
843 gen4_vertex_align(sna, op);
844 sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
845 }
846 }
847
848 static void
gen5_emit_binding_table(struct sna * sna,uint16_t offset)849 gen5_emit_binding_table(struct sna *sna, uint16_t offset)
850 {
851 if (!DBG_NO_STATE_CACHE &&
852 sna->render_state.gen5.surface_table == offset)
853 return;
854
855 sna->render_state.gen5.surface_table = offset;
856
857 /* Binding table pointers */
858 OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
859 OUT_BATCH(0); /* vs */
860 OUT_BATCH(0); /* gs */
861 OUT_BATCH(0); /* clip */
862 OUT_BATCH(0); /* sf */
863 /* Only the PS uses the binding table */
864 OUT_BATCH(offset*4);
865 }
866
867 static bool
gen5_emit_pipelined_pointers(struct sna * sna,const struct sna_composite_op * op,int blend,int kernel)868 gen5_emit_pipelined_pointers(struct sna *sna,
869 const struct sna_composite_op *op,
870 int blend, int kernel)
871 {
872 uint16_t sp, bp;
873 uint32_t key;
874
875 DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
876 __FUNCTION__, op->u.gen5.ve_id & 2,
877 op->src.filter, op->src.repeat,
878 op->mask.filter, op->mask.repeat,
879 kernel, blend, op->has_component_alpha, (int)op->dst.format));
880
881 sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
882 op->mask.filter, op->mask.repeat,
883 kernel);
884 bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
885
886 key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
887 DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
888 __FUNCTION__, sp, bp, key,
889 sna->render_state.gen5.last_pipelined_pointers & 0xffff,
890 (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
891 sna->render_state.gen5.last_pipelined_pointers));
892 if (key == sna->render_state.gen5.last_pipelined_pointers)
893 return false;
894
895 OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
896 OUT_BATCH(sna->render_state.gen5.vs);
897 OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
898 OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
899 OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
900 OUT_BATCH(sna->render_state.gen5.wm + sp);
901 OUT_BATCH(sna->render_state.gen5.cc + bp);
902
903 bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
904 sna->render_state.gen5.last_pipelined_pointers = key;
905
906 gen5_emit_urb(sna);
907
908 return bp;
909 }
910
911 static bool
gen5_emit_drawing_rectangle(struct sna * sna,const struct sna_composite_op * op)912 gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
913 {
914 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
915 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
916
917 assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
918 assert(!too_large(op->dst.width, op->dst.height));
919
920 if (!DBG_NO_STATE_CACHE &&
921 sna->render_state.gen5.drawrect_limit == limit &&
922 sna->render_state.gen5.drawrect_offset == offset)
923 return false;
924
925 sna->render_state.gen5.drawrect_offset = offset;
926 sna->render_state.gen5.drawrect_limit = limit;
927
928 OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
929 OUT_BATCH(0x00000000);
930 OUT_BATCH(limit);
931 OUT_BATCH(offset);
932 return true;
933 }
934
935 static void
gen5_emit_vertex_elements(struct sna * sna,const struct sna_composite_op * op)936 gen5_emit_vertex_elements(struct sna *sna,
937 const struct sna_composite_op *op)
938 {
939 /*
940 * vertex data in vertex buffer
941 * position: (x, y)
942 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
943 * texture coordinate 1 if (has_mask is true): same as above
944 */
945 struct gen5_render_state *render = &sna->render_state.gen5;
946 int id = op->u.gen5.ve_id;
947 bool has_mask = id >> 2;
948 uint32_t format, dw;
949
950 if (!DBG_NO_STATE_CACHE && render->ve_id == id)
951 return;
952
953 DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
954 render->ve_id = id;
955
956 /* The VUE layout
957 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
958 * dword 4-7: position (x, y, 1.0, 1.0),
959 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
960 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
961 *
962 * dword 4-15 are fetched from vertex buffer
963 */
964 OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
965 ((2 * (has_mask ? 4 : 3)) + 1 - 2));
966
967 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
968 (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
969 (0 << VE0_OFFSET_SHIFT));
970 OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
971 (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
972 (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
973 (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
974
975 /* x,y */
976 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
977 GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
978 0 << VE0_OFFSET_SHIFT);
979 OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
980 VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
981 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
982 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
983
984 /* u0, v0, w0 */
985 DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
986 id, id & 3));
987 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
988 switch (id & 3) {
989 default:
990 assert(0);
991 case 0:
992 format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
993 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
994 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
995 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
996 break;
997 case 1:
998 format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
999 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1000 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1001 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1002 break;
1003 case 2:
1004 format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
1005 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1006 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1007 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1008 break;
1009 case 3:
1010 format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
1011 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1012 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1013 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1014 break;
1015 }
1016 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1017 format | 4 << VE0_OFFSET_SHIFT);
1018 OUT_BATCH(dw);
1019
1020 /* u1, v1, w1 */
1021 if (has_mask) {
1022 unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1023 DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
1024 id, id >> 2, offset));
1025 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1026 switch (id >> 2) {
1027 case 1:
1028 format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
1029 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1030 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1031 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1032 break;
1033 default:
1034 assert(0);
1035 case 2:
1036 format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
1037 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1038 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1039 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1040 break;
1041 case 3:
1042 format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
1043 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1044 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1045 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1046 break;
1047 }
1048 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1049 format | offset << VE0_OFFSET_SHIFT);
1050 OUT_BATCH(dw);
1051 }
1052 }
1053
1054 inline static void
gen5_emit_pipe_flush(struct sna * sna)1055 gen5_emit_pipe_flush(struct sna *sna)
1056 {
1057 #if 1
1058 OUT_BATCH(GEN5_PIPE_CONTROL |
1059 GEN5_PIPE_CONTROL_WC_FLUSH |
1060 (4 - 2));
1061 OUT_BATCH(0);
1062 OUT_BATCH(0);
1063 OUT_BATCH(0);
1064 #else
1065 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
1066 #endif
1067 }
1068
1069 static void
gen5_emit_state(struct sna * sna,const struct sna_composite_op * op,uint16_t offset)1070 gen5_emit_state(struct sna *sna,
1071 const struct sna_composite_op *op,
1072 uint16_t offset)
1073 {
1074 bool flush = false;
1075
1076 assert(op->dst.bo->exec);
1077
1078 /* drawrect must be first for Ironlake BLT workaround */
1079 if (gen5_emit_drawing_rectangle(sna, op))
1080 offset &= ~1;
1081 gen5_emit_binding_table(sna, offset & ~1);
1082 if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
1083 DBG(("%s: changed blend state, flush required? %d\n",
1084 __FUNCTION__, (offset & 1) && op->op > PictOpSrc));
1085 flush = (offset & 1) && op->op > PictOpSrc;
1086 }
1087 gen5_emit_vertex_elements(sna, op);
1088
1089 if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1090 DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
1091 kgem_bo_is_dirty(op->src.bo),
1092 kgem_bo_is_dirty(op->mask.bo)));
1093 OUT_BATCH(MI_FLUSH);
1094 kgem_clear_dirty(&sna->kgem);
1095 kgem_bo_mark_dirty(op->dst.bo);
1096 flush = false;
1097 }
1098 if (flush) {
1099 DBG(("%s: forcing flush\n", __FUNCTION__));
1100 gen5_emit_pipe_flush(sna);
1101 }
1102 }
1103
gen5_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1104 static void gen5_bind_surfaces(struct sna *sna,
1105 const struct sna_composite_op *op)
1106 {
1107 bool dirty = kgem_bo_is_dirty(op->dst.bo);
1108 uint32_t *binding_table;
1109 uint16_t offset;
1110
1111 gen5_get_batch(sna, op);
1112
1113 binding_table = gen5_composite_get_binding_table(sna, &offset);
1114
1115 binding_table[0] =
1116 gen5_bind_bo(sna,
1117 op->dst.bo, op->dst.width, op->dst.height,
1118 gen5_get_dest_format(op->dst.format),
1119 true);
1120 binding_table[1] =
1121 gen5_bind_bo(sna,
1122 op->src.bo, op->src.width, op->src.height,
1123 op->src.card_format,
1124 false);
1125 if (op->mask.bo) {
1126 assert(op->u.gen5.ve_id >> 2);
1127 binding_table[2] =
1128 gen5_bind_bo(sna,
1129 op->mask.bo,
1130 op->mask.width,
1131 op->mask.height,
1132 op->mask.card_format,
1133 false);
1134 }
1135
1136 if (sna->kgem.surface == offset &&
1137 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
1138 (op->mask.bo == NULL ||
1139 sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
1140 sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1141 offset = sna->render_state.gen5.surface_table;
1142 }
1143
1144 gen5_emit_state(sna, op, offset | dirty);
1145 }
1146
1147 fastcall static void
gen5_render_composite_blt(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1148 gen5_render_composite_blt(struct sna *sna,
1149 const struct sna_composite_op *op,
1150 const struct sna_composite_rectangles *r)
1151 {
1152 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1153 __FUNCTION__,
1154 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1155 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1156 r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1157 r->width, r->height));
1158
1159 gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1160 op->prim_emit(sna, op, r);
1161 }
1162
1163 fastcall static void
gen5_render_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1164 gen5_render_composite_box(struct sna *sna,
1165 const struct sna_composite_op *op,
1166 const BoxRec *box)
1167 {
1168 struct sna_composite_rectangles r;
1169
1170 DBG((" %s: (%d, %d), (%d, %d)\n",
1171 __FUNCTION__,
1172 box->x1, box->y1, box->x2, box->y2));
1173
1174 gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1175
1176 r.dst.x = box->x1;
1177 r.dst.y = box->y1;
1178 r.width = box->x2 - box->x1;
1179 r.height = box->y2 - box->y1;
1180 r.mask = r.src = r.dst;
1181
1182 op->prim_emit(sna, op, &r);
1183 }
1184
1185 static void
gen5_render_composite_boxes__blt(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1186 gen5_render_composite_boxes__blt(struct sna *sna,
1187 const struct sna_composite_op *op,
1188 const BoxRec *box, int nbox)
1189 {
1190 DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1191 __FUNCTION__, nbox, op->dst.x, op->dst.y,
1192 op->src.offset[0], op->src.offset[1],
1193 op->src.width, op->src.height,
1194 op->mask.offset[0], op->mask.offset[1],
1195 op->mask.width, op->mask.height));
1196
1197 do {
1198 int nbox_this_time;
1199
1200 nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1201 gen5_bind_surfaces);
1202 nbox -= nbox_this_time;
1203
1204 do {
1205 struct sna_composite_rectangles r;
1206
1207 DBG((" %s: (%d, %d), (%d, %d)\n",
1208 __FUNCTION__,
1209 box->x1, box->y1, box->x2, box->y2));
1210
1211 r.dst.x = box->x1;
1212 r.dst.y = box->y1;
1213 r.width = box->x2 - box->x1;
1214 r.height = box->y2 - box->y1;
1215 r.mask = r.src = r.dst;
1216 op->prim_emit(sna, op, &r);
1217 box++;
1218 } while (--nbox_this_time);
1219 } while (nbox);
1220 }
1221
1222 static void
gen5_render_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1223 gen5_render_composite_boxes(struct sna *sna,
1224 const struct sna_composite_op *op,
1225 const BoxRec *box, int nbox)
1226 {
1227 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1228
1229 do {
1230 int nbox_this_time;
1231 float *v;
1232
1233 nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1234 gen5_bind_surfaces);
1235 assert(nbox_this_time);
1236 nbox -= nbox_this_time;
1237
1238 v = sna->render.vertices + sna->render.vertex_used;
1239 sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1240
1241 op->emit_boxes(op, box, nbox_this_time, v);
1242 box += nbox_this_time;
1243 } while (nbox);
1244 }
1245
1246 static void
gen5_render_composite_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1247 gen5_render_composite_boxes__thread(struct sna *sna,
1248 const struct sna_composite_op *op,
1249 const BoxRec *box, int nbox)
1250 {
1251 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1252
1253 sna_vertex_lock(&sna->render);
1254 do {
1255 int nbox_this_time;
1256 float *v;
1257
1258 nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1259 gen5_bind_surfaces);
1260 assert(nbox_this_time);
1261 nbox -= nbox_this_time;
1262
1263 v = sna->render.vertices + sna->render.vertex_used;
1264 sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1265
1266 sna_vertex_acquire__locked(&sna->render);
1267 sna_vertex_unlock(&sna->render);
1268
1269 op->emit_boxes(op, box, nbox_this_time, v);
1270 box += nbox_this_time;
1271
1272 sna_vertex_lock(&sna->render);
1273 sna_vertex_release__locked(&sna->render);
1274 } while (nbox);
1275 sna_vertex_unlock(&sna->render);
1276 }
1277
1278 #ifndef MAX
1279 #define MAX(a,b) ((a) > (b) ? (a) : (b))
1280 #endif
1281
gen5_bind_video_source(struct sna * sna,struct kgem_bo * src_bo,uint32_t src_offset,int src_width,int src_height,int src_pitch,uint32_t src_surf_format)1282 static uint32_t gen5_bind_video_source(struct sna *sna,
1283 struct kgem_bo *src_bo,
1284 uint32_t src_offset,
1285 int src_width,
1286 int src_height,
1287 int src_pitch,
1288 uint32_t src_surf_format)
1289 {
1290 struct gen5_surface_state *ss;
1291
1292 sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1293
1294 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1295 ss->ss0.surface_type = GEN5_SURFACE_2D;
1296 ss->ss0.surface_format = src_surf_format;
1297 ss->ss0.color_blend = 1;
1298
1299 ss->ss1.base_addr =
1300 kgem_add_reloc(&sna->kgem,
1301 sna->kgem.surface + 1,
1302 src_bo,
1303 I915_GEM_DOMAIN_SAMPLER << 16,
1304 src_offset);
1305
1306 ss->ss2.width = src_width - 1;
1307 ss->ss2.height = src_height - 1;
1308 ss->ss3.pitch = src_pitch - 1;
1309
1310 return sna->kgem.surface * sizeof(uint32_t);
1311 }
1312
gen5_video_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1313 static void gen5_video_bind_surfaces(struct sna *sna,
1314 const struct sna_composite_op *op)
1315 {
1316 bool dirty = kgem_bo_is_dirty(op->dst.bo);
1317 struct sna_video_frame *frame = op->priv;
1318 uint32_t src_surf_format[6];
1319 uint32_t src_surf_base[6];
1320 int src_width[6];
1321 int src_height[6];
1322 int src_pitch[6];
1323 uint32_t *binding_table;
1324 uint16_t offset;
1325 int n_src, n;
1326
1327 src_surf_base[0] = 0;
1328 src_surf_base[1] = 0;
1329 src_surf_base[2] = frame->VBufOffset;
1330 src_surf_base[3] = frame->VBufOffset;
1331 src_surf_base[4] = frame->UBufOffset;
1332 src_surf_base[5] = frame->UBufOffset;
1333
1334 if (is_planar_fourcc(frame->id)) {
1335 for (n = 0; n < 2; n++) {
1336 src_surf_format[n] = GEN5_SURFACEFORMAT_R8_UNORM;
1337 src_width[n] = frame->width;
1338 src_height[n] = frame->height;
1339 src_pitch[n] = frame->pitch[1];
1340 }
1341 for (; n < 6; n++) {
1342 if (is_nv12_fourcc(frame->id))
1343 src_surf_format[n] = GEN5_SURFACEFORMAT_R8G8_UNORM;
1344 else
1345 src_surf_format[n] = GEN5_SURFACEFORMAT_R8_UNORM;
1346 src_width[n] = frame->width / 2;
1347 src_height[n] = frame->height / 2;
1348 src_pitch[n] = frame->pitch[0];
1349 }
1350 n_src = 6;
1351 } else {
1352 if (frame->id == FOURCC_UYVY)
1353 src_surf_format[0] = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
1354 else
1355 src_surf_format[0] = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
1356
1357 src_width[0] = frame->width;
1358 src_height[0] = frame->height;
1359 src_pitch[0] = frame->pitch[0];
1360 n_src = 1;
1361 }
1362
1363 gen5_get_batch(sna, op);
1364
1365 binding_table = gen5_composite_get_binding_table(sna, &offset);
1366 binding_table[0] =
1367 gen5_bind_bo(sna,
1368 op->dst.bo, op->dst.width, op->dst.height,
1369 gen5_get_dest_format(op->dst.format),
1370 true);
1371 for (n = 0; n < n_src; n++) {
1372 binding_table[1+n] =
1373 gen5_bind_video_source(sna,
1374 frame->bo,
1375 src_surf_base[n],
1376 src_width[n],
1377 src_height[n],
1378 src_pitch[n],
1379 src_surf_format[n]);
1380 }
1381
1382 gen5_emit_state(sna, op, offset | dirty);
1383 }
1384
select_video_kernel(const struct sna_video * video,const struct sna_video_frame * frame)1385 static unsigned select_video_kernel(const struct sna_video *video,
1386 const struct sna_video_frame *frame)
1387 {
1388 switch (frame->id) {
1389 case FOURCC_YV12:
1390 case FOURCC_I420:
1391 case FOURCC_XVMC:
1392 return video->colorspace ?
1393 WM_KERNEL_VIDEO_PLANAR_BT709 :
1394 WM_KERNEL_VIDEO_PLANAR_BT601;
1395
1396 case FOURCC_NV12:
1397 return video->colorspace ?
1398 WM_KERNEL_VIDEO_NV12_BT709 :
1399 WM_KERNEL_VIDEO_NV12_BT601;
1400
1401 default:
1402 return video->colorspace ?
1403 WM_KERNEL_VIDEO_PACKED_BT709 :
1404 WM_KERNEL_VIDEO_PACKED_BT601;
1405 }
1406 }
1407
1408 static bool
gen5_render_video(struct sna * sna,struct sna_video * video,struct sna_video_frame * frame,RegionPtr dstRegion,PixmapPtr pixmap)1409 gen5_render_video(struct sna *sna,
1410 struct sna_video *video,
1411 struct sna_video_frame *frame,
1412 RegionPtr dstRegion,
1413 PixmapPtr pixmap)
1414 {
1415 struct sna_composite_op tmp;
1416 struct sna_pixmap *priv = sna_pixmap(pixmap);
1417 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1418 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1419 int src_width = frame->src.x2 - frame->src.x1;
1420 int src_height = frame->src.y2 - frame->src.y1;
1421 float src_offset_x, src_offset_y;
1422 float src_scale_x, src_scale_y;
1423 const BoxRec *box;
1424 int nbox;
1425
1426 DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1427 src_width, src_height, dst_width, dst_height));
1428
1429 assert(priv->gpu_bo);
1430 memset(&tmp, 0, sizeof(tmp));
1431
1432 tmp.op = PictOpSrc;
1433 tmp.dst.pixmap = pixmap;
1434 tmp.dst.width = pixmap->drawable.width;
1435 tmp.dst.height = pixmap->drawable.height;
1436 tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1437 tmp.dst.bo = priv->gpu_bo;
1438
1439 if (src_width == dst_width && src_height == dst_height)
1440 tmp.src.filter = SAMPLER_FILTER_NEAREST;
1441 else
1442 tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1443 tmp.src.repeat = SAMPLER_EXTEND_PAD;
1444 tmp.src.bo = frame->bo;
1445 tmp.mask.bo = NULL;
1446 tmp.u.gen5.wm_kernel = select_video_kernel(video, frame);
1447 tmp.u.gen5.ve_id = 2;
1448 tmp.is_affine = true;
1449 tmp.floats_per_vertex = 3;
1450 tmp.floats_per_rect = 9;
1451 tmp.priv = frame;
1452
1453 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1454 kgem_submit(&sna->kgem);
1455 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1456 return false;
1457 }
1458
1459 gen5_align_vertex(sna, &tmp);
1460 gen5_video_bind_surfaces(sna, &tmp);
1461
1462 src_scale_x = (float)src_width / dst_width / frame->width;
1463 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1464
1465 src_scale_y = (float)src_height / dst_height / frame->height;
1466 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1467
1468 box = region_rects(dstRegion);
1469 nbox = region_num_rects(dstRegion);
1470 while (nbox--) {
1471 gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
1472
1473 OUT_VERTEX(box->x2, box->y2);
1474 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1475 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1476
1477 OUT_VERTEX(box->x1, box->y2);
1478 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1479 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1480
1481 OUT_VERTEX(box->x1, box->y1);
1482 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1483 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1484
1485 box++;
1486 }
1487 gen4_vertex_flush(sna);
1488
1489 if (!DAMAGE_IS_ALL(priv->gpu_damage))
1490 sna_damage_add(&priv->gpu_damage, dstRegion);
1491
1492 return true;
1493 }
1494
1495 static int
gen5_composite_picture(struct sna * sna,PicturePtr picture,struct sna_composite_channel * channel,int x,int y,int w,int h,int dst_x,int dst_y,bool precise)1496 gen5_composite_picture(struct sna *sna,
1497 PicturePtr picture,
1498 struct sna_composite_channel *channel,
1499 int x, int y,
1500 int w, int h,
1501 int dst_x, int dst_y,
1502 bool precise)
1503 {
1504 PixmapPtr pixmap;
1505 uint32_t color;
1506 int16_t dx, dy;
1507
1508 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1509 __FUNCTION__, x, y, w, h, dst_x, dst_y));
1510
1511 channel->is_solid = false;
1512 channel->card_format = -1;
1513
1514 if (sna_picture_is_solid(picture, &color))
1515 return gen4_channel_init_solid(sna, channel, color);
1516
1517 if (picture->pDrawable == NULL) {
1518 int ret;
1519
1520 if (picture->pSourcePict->type == SourcePictTypeLinear)
1521 return gen4_channel_init_linear(sna, picture, channel,
1522 x, y,
1523 w, h,
1524 dst_x, dst_y);
1525
1526 DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1527 ret = -1;
1528 if (!precise)
1529 ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1530 x, y, w, h, dst_x, dst_y);
1531 if (ret == -1)
1532 ret = sna_render_picture_fixup(sna, picture, channel,
1533 x, y, w, h, dst_x, dst_y);
1534 return ret;
1535 }
1536
1537 if (picture->alphaMap) {
1538 DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1539 return sna_render_picture_fixup(sna, picture, channel,
1540 x, y, w, h, dst_x, dst_y);
1541 }
1542
1543 if (!gen5_check_repeat(picture))
1544 return sna_render_picture_fixup(sna, picture, channel,
1545 x, y, w, h, dst_x, dst_y);
1546
1547 if (!gen5_check_filter(picture))
1548 return sna_render_picture_fixup(sna, picture, channel,
1549 x, y, w, h, dst_x, dst_y);
1550
1551 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1552 channel->filter = picture->filter;
1553
1554 pixmap = get_drawable_pixmap(picture->pDrawable);
1555 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1556
1557 x += dx + picture->pDrawable->x;
1558 y += dy + picture->pDrawable->y;
1559
1560 channel->is_affine = sna_transform_is_affine(picture->transform);
1561 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1562 DBG(("%s: integer translation (%d, %d), removing\n",
1563 __FUNCTION__, dx, dy));
1564 x += dx;
1565 y += dy;
1566 channel->transform = NULL;
1567 channel->filter = PictFilterNearest;
1568
1569 if (channel->repeat ||
1570 (x >= 0 &&
1571 y >= 0 &&
1572 x + w <= pixmap->drawable.width &&
1573 y + h <= pixmap->drawable.height)) {
1574 struct sna_pixmap *priv = sna_pixmap(pixmap);
1575 if (priv && priv->clear) {
1576 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1577 return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color));
1578 }
1579 }
1580 } else
1581 channel->transform = picture->transform;
1582
1583 channel->pict_format = picture->format;
1584 channel->card_format = gen5_get_card_format(picture->format);
1585 if (channel->card_format == -1)
1586 return sna_render_picture_convert(sna, picture, channel, pixmap,
1587 x, y, w, h, dst_x, dst_y,
1588 false);
1589
1590 if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1591 return sna_render_picture_extract(sna, picture, channel,
1592 x, y, w, h, dst_x, dst_y);
1593
1594 DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
1595 __FUNCTION__,
1596 channel->repeat, channel->filter,
1597 channel->transform != NULL, channel->is_affine,
1598 channel->pict_format));
1599 if (channel->transform) {
1600 DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n",
1601 __FUNCTION__,
1602 channel->transform->matrix[0][0] / 65536.,
1603 channel->transform->matrix[0][1] / 65536.,
1604 channel->transform->matrix[0][2] / 65536.,
1605 channel->transform->matrix[1][0] / 65536.,
1606 channel->transform->matrix[1][1] / 65536.,
1607 channel->transform->matrix[1][2] / 65536.,
1608 channel->transform->matrix[2][0] / 65536.,
1609 channel->transform->matrix[2][1] / 65536.,
1610 channel->transform->matrix[2][2] / 65536.));
1611 }
1612
1613 return sna_render_pixmap_bo(sna, channel, pixmap,
1614 x, y, w, h, dst_x, dst_y);
1615 }
1616
gen5_composite_channel_convert(struct sna_composite_channel * channel)1617 static void gen5_composite_channel_convert(struct sna_composite_channel *channel)
1618 {
1619 channel->repeat = gen5_repeat(channel->repeat);
1620 channel->filter = gen5_filter(channel->filter);
1621 if (channel->card_format == (unsigned)-1)
1622 channel->card_format = gen5_get_card_format(channel->pict_format);
1623 }
1624
1625 static void
gen5_render_composite_done(struct sna * sna,const struct sna_composite_op * op)1626 gen5_render_composite_done(struct sna *sna,
1627 const struct sna_composite_op *op)
1628 {
1629 if (sna->render.vertex_offset) {
1630 gen4_vertex_flush(sna);
1631 gen5_magic_ca_pass(sna,op);
1632 }
1633
1634 DBG(("%s()\n", __FUNCTION__));
1635
1636 if (op->mask.bo)
1637 kgem_bo_destroy(&sna->kgem, op->mask.bo);
1638 if (op->src.bo)
1639 kgem_bo_destroy(&sna->kgem, op->src.bo);
1640
1641 sna_render_composite_redirect_done(sna, op);
1642 }
1643
1644 static bool
gen5_composite_set_target(struct sna * sna,struct sna_composite_op * op,PicturePtr dst,int x,int y,int w,int h,bool partial)1645 gen5_composite_set_target(struct sna *sna,
1646 struct sna_composite_op *op,
1647 PicturePtr dst,
1648 int x, int y, int w, int h,
1649 bool partial)
1650 {
1651 BoxRec box;
1652 unsigned hint;
1653
1654 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1655 op->dst.width = op->dst.pixmap->drawable.width;
1656 op->dst.height = op->dst.pixmap->drawable.height;
1657 op->dst.format = dst->format;
1658 if (w && h) {
1659 box.x1 = x;
1660 box.y1 = y;
1661 box.x2 = x + w;
1662 box.y2 = y + h;
1663 } else
1664 sna_render_picture_extents(dst, &box);
1665
1666 hint = PREFER_GPU | RENDER_GPU;
1667 if (!need_tiling(sna, op->dst.width, op->dst.height))
1668 hint |= FORCE_GPU;
1669 if (!partial) {
1670 hint |= IGNORE_DAMAGE;
1671 if (w == op->dst.width && h == op->dst.height)
1672 hint |= REPLACES;
1673 }
1674
1675 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1676 if (op->dst.bo == NULL)
1677 return false;
1678
1679 if (hint & REPLACES) {
1680 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1681 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1682 }
1683
1684 get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1685 &op->dst.x, &op->dst.y);
1686
1687 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1688 __FUNCTION__,
1689 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1690 op->dst.width, op->dst.height,
1691 op->dst.bo->pitch,
1692 op->dst.x, op->dst.y,
1693 op->damage ? *op->damage : (void *)-1));
1694
1695 assert(op->dst.bo->proxy == NULL);
1696
1697 if (too_large(op->dst.width, op->dst.height) &&
1698 !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1699 return false;
1700
1701 return true;
1702 }
1703
1704 static bool
is_gradient(PicturePtr picture,bool precise)1705 is_gradient(PicturePtr picture, bool precise)
1706 {
1707 if (picture->pDrawable)
1708 return false;
1709
1710 switch (picture->pSourcePict->type) {
1711 case SourcePictTypeSolidFill:
1712 case SourcePictTypeLinear:
1713 return false;
1714 default:
1715 return precise;
1716 }
1717 }
1718
1719 static bool
has_alphamap(PicturePtr p)1720 has_alphamap(PicturePtr p)
1721 {
1722 return p->alphaMap != NULL;
1723 }
1724
1725 static bool
need_upload(struct sna * sna,PicturePtr p)1726 need_upload(struct sna *sna, PicturePtr p)
1727 {
1728 return p->pDrawable && untransformed(p) &&
1729 !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1730 }
1731
1732 static bool
source_is_busy(PixmapPtr pixmap)1733 source_is_busy(PixmapPtr pixmap)
1734 {
1735 struct sna_pixmap *priv = sna_pixmap(pixmap);
1736 if (priv == NULL)
1737 return false;
1738
1739 if (priv->clear)
1740 return false;
1741
1742 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1743 return true;
1744
1745 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1746 return true;
1747
1748 return priv->gpu_damage && !priv->cpu_damage;
1749 }
1750
1751 static bool
source_fallback(struct sna * sna,PicturePtr p,PixmapPtr pixmap,bool precise)1752 source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1753 {
1754 if (sna_picture_is_solid(p, NULL))
1755 return false;
1756
1757 if (is_gradient(p, precise) ||
1758 !gen5_check_repeat(p) ||
1759 !gen5_check_format(p->format))
1760 return true;
1761
1762 if (pixmap && source_is_busy(pixmap))
1763 return false;
1764
1765 return has_alphamap(p) || !gen5_check_filter(p) || need_upload(sna, p);
1766 }
1767
1768 static bool
gen5_composite_fallback(struct sna * sna,PicturePtr src,PicturePtr mask,PicturePtr dst)1769 gen5_composite_fallback(struct sna *sna,
1770 PicturePtr src,
1771 PicturePtr mask,
1772 PicturePtr dst)
1773 {
1774 PixmapPtr src_pixmap;
1775 PixmapPtr mask_pixmap;
1776 PixmapPtr dst_pixmap;
1777 bool src_fallback, mask_fallback;
1778
1779 if (!gen5_check_dst_format(dst->format)) {
1780 DBG(("%s: unknown destination format: %d\n",
1781 __FUNCTION__, dst->format));
1782 return true;
1783 }
1784
1785 dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1786
1787 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1788 src_fallback = source_fallback(sna, src, src_pixmap,
1789 dst->polyMode == PolyModePrecise);
1790
1791 if (mask) {
1792 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1793 mask_fallback = source_fallback(sna, mask, mask_pixmap,
1794 dst->polyMode == PolyModePrecise);
1795 } else {
1796 mask_pixmap = NULL;
1797 mask_fallback = false;
1798 }
1799
1800 /* If we are using the destination as a source and need to
1801 * readback in order to upload the source, do it all
1802 * on the cpu.
1803 */
1804 if (src_pixmap == dst_pixmap && src_fallback) {
1805 DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1806 return true;
1807 }
1808 if (mask_pixmap == dst_pixmap && mask_fallback) {
1809 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1810 return true;
1811 }
1812
1813 /* If anything is on the GPU, push everything out to the GPU */
1814 if (dst_use_gpu(dst_pixmap)) {
1815 DBG(("%s: dst is already on the GPU, try to use GPU\n",
1816 __FUNCTION__));
1817 return false;
1818 }
1819
1820 if (src_pixmap && !src_fallback) {
1821 DBG(("%s: src is already on the GPU, try to use GPU\n",
1822 __FUNCTION__));
1823 return false;
1824 }
1825 if (mask_pixmap && !mask_fallback) {
1826 DBG(("%s: mask is already on the GPU, try to use GPU\n",
1827 __FUNCTION__));
1828 return false;
1829 }
1830
1831 /* However if the dst is not on the GPU and we need to
1832 * render one of the sources using the CPU, we may
1833 * as well do the entire operation in place onthe CPU.
1834 */
1835 if (src_fallback) {
1836 DBG(("%s: dst is on the CPU and src will fallback\n",
1837 __FUNCTION__));
1838 return true;
1839 }
1840
1841 if (mask_fallback) {
1842 DBG(("%s: dst is on the CPU and mask will fallback\n",
1843 __FUNCTION__));
1844 return true;
1845 }
1846
1847 if (too_large(dst_pixmap->drawable.width,
1848 dst_pixmap->drawable.height) &&
1849 dst_is_cpu(dst_pixmap)) {
1850 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1851 return true;
1852 }
1853
1854 DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1855 __FUNCTION__));
1856 return dst_use_cpu(dst_pixmap);
1857 }
1858
1859 static int
reuse_source(struct sna * sna,PicturePtr src,struct sna_composite_channel * sc,int src_x,int src_y,PicturePtr mask,struct sna_composite_channel * mc,int msk_x,int msk_y)1860 reuse_source(struct sna *sna,
1861 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1862 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1863 {
1864 uint32_t color;
1865
1866 if (src_x != msk_x || src_y != msk_y)
1867 return false;
1868
1869 if (src == mask) {
1870 DBG(("%s: mask is source\n", __FUNCTION__));
1871 *mc = *sc;
1872 mc->bo = kgem_bo_reference(mc->bo);
1873 return true;
1874 }
1875
1876 if (sna_picture_is_solid(mask, &color))
1877 return gen4_channel_init_solid(sna, mc, color);
1878
1879 if (sc->is_solid)
1880 return false;
1881
1882 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1883 return false;
1884
1885 DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1886
1887 if (!sna_transform_equal(src->transform, mask->transform))
1888 return false;
1889
1890 if (!sna_picture_alphamap_equal(src, mask))
1891 return false;
1892
1893 if (!gen5_check_repeat(mask))
1894 return false;
1895
1896 if (!gen5_check_filter(mask))
1897 return false;
1898
1899 if (!gen5_check_format(mask->format))
1900 return false;
1901
1902 DBG(("%s: reusing source channel for mask with a twist\n",
1903 __FUNCTION__));
1904
1905 *mc = *sc;
1906 mc->repeat = gen5_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1907 mc->filter = gen5_filter(mask->filter);
1908 mc->pict_format = mask->format;
1909 mc->card_format = gen5_get_card_format(mask->format);
1910 mc->bo = kgem_bo_reference(mc->bo);
1911 return true;
1912 }
1913
1914 static bool
gen5_render_composite(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr mask,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t msk_x,int16_t msk_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)1915 gen5_render_composite(struct sna *sna,
1916 uint8_t op,
1917 PicturePtr src,
1918 PicturePtr mask,
1919 PicturePtr dst,
1920 int16_t src_x, int16_t src_y,
1921 int16_t msk_x, int16_t msk_y,
1922 int16_t dst_x, int16_t dst_y,
1923 int16_t width, int16_t height,
1924 unsigned flags,
1925 struct sna_composite_op *tmp)
1926 {
1927 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1928 width, height, sna->kgem.mode));
1929
1930 if (op >= ARRAY_SIZE(gen5_blend_op)) {
1931 DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
1932 return false;
1933 }
1934
1935 if (mask == NULL &&
1936 sna_blt_composite(sna, op,
1937 src, dst,
1938 src_x, src_y,
1939 dst_x, dst_y,
1940 width, height,
1941 flags, tmp))
1942 return true;
1943
1944 if (gen5_composite_fallback(sna, src, mask, dst))
1945 goto fallback;
1946
1947 if (need_tiling(sna, width, height))
1948 return sna_tiling_composite(op, src, mask, dst,
1949 src_x, src_y,
1950 msk_x, msk_y,
1951 dst_x, dst_y,
1952 width, height,
1953 tmp);
1954
1955 if (!gen5_composite_set_target(sna, tmp, dst,
1956 dst_x, dst_y, width, height,
1957 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
1958 DBG(("%s: failed to set composite target\n", __FUNCTION__));
1959 goto fallback;
1960 }
1961
1962 DBG(("%s: preparing source\n", __FUNCTION__));
1963 tmp->op = op;
1964 switch (gen5_composite_picture(sna, src, &tmp->src,
1965 src_x, src_y,
1966 width, height,
1967 dst_x, dst_y,
1968 dst->polyMode == PolyModePrecise)) {
1969 case -1:
1970 DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
1971 goto cleanup_dst;
1972 case 0:
1973 if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1974 goto cleanup_dst;
1975 /* fall through */
1976 case 1:
1977 if (mask == NULL &&
1978 sna_blt_composite__convert(sna,
1979 dst_x, dst_y, width, height,
1980 tmp))
1981 return true;
1982
1983 gen5_composite_channel_convert(&tmp->src);
1984 break;
1985 }
1986
1987 tmp->is_affine = tmp->src.is_affine;
1988 tmp->has_component_alpha = false;
1989 tmp->need_magic_ca_pass = false;
1990
1991 if (mask) {
1992 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1993 tmp->has_component_alpha = true;
1994
1995 /* Check if it's component alpha that relies on a source alpha and on
1996 * the source value. We can only get one of those into the single
1997 * source value that we get to blend with.
1998 */
1999 if (gen5_blend_op[op].src_alpha &&
2000 (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
2001 if (op != PictOpOver) {
2002 DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
2003 goto cleanup_src;
2004 }
2005
2006 tmp->need_magic_ca_pass = true;
2007 tmp->op = PictOpOutReverse;
2008 }
2009 }
2010
2011 if (!reuse_source(sna,
2012 src, &tmp->src, src_x, src_y,
2013 mask, &tmp->mask, msk_x, msk_y)) {
2014 DBG(("%s: preparing mask\n", __FUNCTION__));
2015 switch (gen5_composite_picture(sna, mask, &tmp->mask,
2016 msk_x, msk_y,
2017 width, height,
2018 dst_x, dst_y,
2019 dst->polyMode == PolyModePrecise)) {
2020 case -1:
2021 DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
2022 goto cleanup_src;
2023 case 0:
2024 if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2025 goto cleanup_src;
2026 /* fall through */
2027 case 1:
2028 gen5_composite_channel_convert(&tmp->mask);
2029 break;
2030 }
2031 }
2032
2033 tmp->is_affine &= tmp->mask.is_affine;
2034 }
2035
2036 tmp->u.gen5.wm_kernel =
2037 gen5_choose_composite_kernel(tmp->op,
2038 tmp->mask.bo != NULL,
2039 tmp->has_component_alpha,
2040 tmp->is_affine);
2041 tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
2042
2043 tmp->blt = gen5_render_composite_blt;
2044 tmp->box = gen5_render_composite_box;
2045 tmp->boxes = gen5_render_composite_boxes__blt;
2046 if (tmp->emit_boxes) {
2047 tmp->boxes = gen5_render_composite_boxes;
2048 tmp->thread_boxes = gen5_render_composite_boxes__thread;
2049 }
2050 tmp->done = gen5_render_composite_done;
2051
2052 if (!kgem_check_bo(&sna->kgem,
2053 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
2054 kgem_submit(&sna->kgem);
2055 if (!kgem_check_bo(&sna->kgem,
2056 tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
2057 goto cleanup_mask;
2058 }
2059
2060 gen5_align_vertex(sna, tmp);
2061 gen5_bind_surfaces(sna, tmp);
2062 return true;
2063
2064 cleanup_mask:
2065 if (tmp->mask.bo) {
2066 kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2067 tmp->mask.bo = NULL;
2068 }
2069 cleanup_src:
2070 if (tmp->src.bo) {
2071 kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2072 tmp->src.bo = NULL;
2073 }
2074 cleanup_dst:
2075 if (tmp->redirect.real_bo) {
2076 kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2077 tmp->redirect.real_bo = NULL;
2078 }
2079 fallback:
2080 return (mask == NULL &&
2081 sna_blt_composite(sna, op,
2082 src, dst,
2083 src_x, src_y,
2084 dst_x, dst_y,
2085 width, height,
2086 flags | COMPOSITE_FALLBACK, tmp));
2087 }
2088
2089 #if !NO_COMPOSITE_SPANS
2090 fastcall static void
gen5_render_composite_spans_box(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,float opacity)2091 gen5_render_composite_spans_box(struct sna *sna,
2092 const struct sna_composite_spans_op *op,
2093 const BoxRec *box, float opacity)
2094 {
2095 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2096 __FUNCTION__,
2097 op->base.src.offset[0], op->base.src.offset[1],
2098 opacity,
2099 op->base.dst.x, op->base.dst.y,
2100 box->x1, box->y1,
2101 box->x2 - box->x1,
2102 box->y2 - box->y1));
2103
2104 gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
2105 op->prim_emit(sna, op, box, opacity);
2106 }
2107
2108 static void
gen5_render_composite_spans_boxes(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,int nbox,float opacity)2109 gen5_render_composite_spans_boxes(struct sna *sna,
2110 const struct sna_composite_spans_op *op,
2111 const BoxRec *box, int nbox,
2112 float opacity)
2113 {
2114 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2115 __FUNCTION__, nbox,
2116 op->base.src.offset[0], op->base.src.offset[1],
2117 opacity,
2118 op->base.dst.x, op->base.dst.y));
2119
2120 do {
2121 int nbox_this_time;
2122
2123 nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2124 gen5_bind_surfaces);
2125 nbox -= nbox_this_time;
2126
2127 do {
2128 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2129 box->x1, box->y1,
2130 box->x2 - box->x1,
2131 box->y2 - box->y1));
2132
2133 op->prim_emit(sna, op, box++, opacity);
2134 } while (--nbox_this_time);
2135 } while (nbox);
2136 }
2137
2138 fastcall static void
gen5_render_composite_spans_boxes__thread(struct sna * sna,const struct sna_composite_spans_op * op,const struct sna_opacity_box * box,int nbox)2139 gen5_render_composite_spans_boxes__thread(struct sna *sna,
2140 const struct sna_composite_spans_op *op,
2141 const struct sna_opacity_box *box,
2142 int nbox)
2143 {
2144 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2145 __FUNCTION__, nbox,
2146 op->base.src.offset[0], op->base.src.offset[1],
2147 op->base.dst.x, op->base.dst.y));
2148
2149 sna_vertex_lock(&sna->render);
2150 do {
2151 int nbox_this_time;
2152 float *v;
2153
2154 nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2155 gen5_bind_surfaces);
2156 assert(nbox_this_time);
2157 nbox -= nbox_this_time;
2158
2159 v = sna->render.vertices + sna->render.vertex_used;
2160 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2161
2162 sna_vertex_acquire__locked(&sna->render);
2163 sna_vertex_unlock(&sna->render);
2164
2165 op->emit_boxes(op, box, nbox_this_time, v);
2166 box += nbox_this_time;
2167
2168 sna_vertex_lock(&sna->render);
2169 sna_vertex_release__locked(&sna->render);
2170 } while (nbox);
2171 sna_vertex_unlock(&sna->render);
2172 }
2173
2174 fastcall static void
gen5_render_composite_spans_done(struct sna * sna,const struct sna_composite_spans_op * op)2175 gen5_render_composite_spans_done(struct sna *sna,
2176 const struct sna_composite_spans_op *op)
2177 {
2178 if (sna->render.vertex_offset)
2179 gen4_vertex_flush(sna);
2180
2181 DBG(("%s()\n", __FUNCTION__));
2182
2183 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2184 sna_render_composite_redirect_done(sna, &op->base);
2185 }
2186
2187 static bool
gen5_check_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t width,int16_t height,unsigned flags)2188 gen5_check_composite_spans(struct sna *sna,
2189 uint8_t op, PicturePtr src, PicturePtr dst,
2190 int16_t width, int16_t height,
2191 unsigned flags)
2192 {
2193 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2194 __FUNCTION__, op, width, height, flags));
2195
2196 if (op >= ARRAY_SIZE(gen5_blend_op))
2197 return false;
2198
2199 if (gen5_composite_fallback(sna, src, NULL, dst)) {
2200 DBG(("%s: operation would fallback\n", __FUNCTION__));
2201 return false;
2202 }
2203
2204 if (need_tiling(sna, width, height) &&
2205 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2206 DBG(("%s: fallback, tiled operation not on GPU\n",
2207 __FUNCTION__));
2208 return false;
2209 }
2210
2211 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2212 struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2213 assert(priv);
2214
2215 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2216 return true;
2217
2218 if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2219 return false;
2220
2221 if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
2222 dst->format == PICT_a8)
2223 return false;
2224
2225 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2226 }
2227
2228 return true;
2229 }
2230
2231 static bool
gen5_render_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_spans_op * tmp)2232 gen5_render_composite_spans(struct sna *sna,
2233 uint8_t op,
2234 PicturePtr src,
2235 PicturePtr dst,
2236 int16_t src_x, int16_t src_y,
2237 int16_t dst_x, int16_t dst_y,
2238 int16_t width, int16_t height,
2239 unsigned flags,
2240 struct sna_composite_spans_op *tmp)
2241 {
2242 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2243 width, height, flags, sna->kgem.ring));
2244
2245 assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
2246
2247 if (need_tiling(sna, width, height)) {
2248 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2249 __FUNCTION__, width, height));
2250 return sna_tiling_composite_spans(op, src, dst,
2251 src_x, src_y, dst_x, dst_y,
2252 width, height, flags, tmp);
2253 }
2254
2255 tmp->base.op = op;
2256 if (!gen5_composite_set_target(sna, &tmp->base, dst,
2257 dst_x, dst_y, width, height,
2258 true))
2259 return false;
2260
2261 switch (gen5_composite_picture(sna, src, &tmp->base.src,
2262 src_x, src_y,
2263 width, height,
2264 dst_x, dst_y,
2265 dst->polyMode == PolyModePrecise)) {
2266 case -1:
2267 goto cleanup_dst;
2268 case 0:
2269 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2270 goto cleanup_dst;
2271 /* fall through */
2272 case 1:
2273 gen5_composite_channel_convert(&tmp->base.src);
2274 break;
2275 }
2276
2277 tmp->base.mask.bo = NULL;
2278
2279 tmp->base.is_affine = tmp->base.src.is_affine;
2280 tmp->base.has_component_alpha = false;
2281 tmp->base.need_magic_ca_pass = false;
2282
2283 tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
2284 tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
2285
2286 tmp->box = gen5_render_composite_spans_box;
2287 tmp->boxes = gen5_render_composite_spans_boxes;
2288 if (tmp->emit_boxes)
2289 tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
2290 tmp->done = gen5_render_composite_spans_done;
2291
2292 if (!kgem_check_bo(&sna->kgem,
2293 tmp->base.dst.bo, tmp->base.src.bo,
2294 NULL)) {
2295 kgem_submit(&sna->kgem);
2296 if (!kgem_check_bo(&sna->kgem,
2297 tmp->base.dst.bo, tmp->base.src.bo,
2298 NULL))
2299 goto cleanup_src;
2300 }
2301
2302 gen5_align_vertex(sna, &tmp->base);
2303 gen5_bind_surfaces(sna, &tmp->base);
2304 return true;
2305
2306 cleanup_src:
2307 if (tmp->base.src.bo)
2308 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2309 cleanup_dst:
2310 if (tmp->base.redirect.real_bo)
2311 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2312 return false;
2313 }
2314 #endif
2315
2316 static void
gen5_copy_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2317 gen5_copy_bind_surfaces(struct sna *sna,
2318 const struct sna_composite_op *op)
2319 {
2320 bool dirty = kgem_bo_is_dirty(op->dst.bo);
2321 uint32_t *binding_table;
2322 uint16_t offset;
2323
2324 gen5_get_batch(sna, op);
2325
2326 binding_table = gen5_composite_get_binding_table(sna, &offset);
2327
2328 binding_table[0] =
2329 gen5_bind_bo(sna,
2330 op->dst.bo, op->dst.width, op->dst.height,
2331 gen5_get_dest_format(op->dst.format),
2332 true);
2333 binding_table[1] =
2334 gen5_bind_bo(sna,
2335 op->src.bo, op->src.width, op->src.height,
2336 op->src.card_format,
2337 false);
2338
2339 if (sna->kgem.surface == offset &&
2340 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
2341 sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
2342 offset = sna->render_state.gen5.surface_table;
2343 }
2344
2345 gen5_emit_state(sna, op, offset | dirty);
2346 }
2347
2348 static bool
gen5_render_copy_boxes(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,unsigned flags)2349 gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
2350 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2351 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2352 const BoxRec *box, int n, unsigned flags)
2353 {
2354 struct sna_composite_op tmp;
2355
2356 DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
2357 __FUNCTION__, alu,
2358 src->serialNumber, src_bo->handle,
2359 dst->serialNumber, dst_bo->handle,
2360 n, box->x1, box->y1, box->x2, box->y2,
2361 flags));
2362
2363 if (sna_blt_compare_depth(src, dst) &&
2364 sna_blt_copy_boxes(sna, alu,
2365 src_bo, src_dx, src_dy,
2366 dst_bo, dst_dx, dst_dy,
2367 dst->bitsPerPixel,
2368 box, n))
2369 return true;
2370
2371 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
2372 fallback_blt:
2373 if (!sna_blt_compare_depth(src, dst))
2374 return false;
2375
2376 return sna_blt_copy_boxes_fallback(sna, alu,
2377 src, src_bo, src_dx, src_dy,
2378 dst, dst_bo, dst_dx, dst_dy,
2379 box, n);
2380 }
2381
2382 memset(&tmp, 0, sizeof(tmp));
2383
2384 if (dst->depth == src->depth) {
2385 tmp.dst.format = sna_render_format_for_depth(dst->depth);
2386 tmp.src.pict_format = tmp.dst.format;
2387 } else {
2388 tmp.dst.format = sna_format_for_depth(dst->depth);
2389 tmp.src.pict_format = sna_format_for_depth(src->depth);
2390 }
2391 if (!gen5_check_format(tmp.src.pict_format)) {
2392 DBG(("%s: unsupported source format, %x, use BLT\n",
2393 __FUNCTION__, tmp.src.pict_format));
2394 goto fallback_blt;
2395 }
2396
2397 DBG(("%s (%d, %d)->(%d, %d) x %d\n",
2398 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
2399
2400 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2401
2402 tmp.dst.pixmap = (PixmapPtr)dst;
2403 tmp.dst.width = dst->width;
2404 tmp.dst.height = dst->height;
2405 tmp.dst.x = tmp.dst.y = 0;
2406 tmp.dst.bo = dst_bo;
2407 tmp.damage = NULL;
2408
2409 sna_render_composite_redirect_init(&tmp);
2410 if (too_large(tmp.dst.width, tmp.dst.height)) {
2411 BoxRec extents = box[0];
2412 int i;
2413
2414 for (i = 1; i < n; i++) {
2415 if (box[i].x1 < extents.x1)
2416 extents.x1 = box[i].x1;
2417 if (box[i].y1 < extents.y1)
2418 extents.y1 = box[i].y1;
2419
2420 if (box[i].x2 > extents.x2)
2421 extents.x2 = box[i].x2;
2422 if (box[i].y2 > extents.y2)
2423 extents.y2 = box[i].y2;
2424 }
2425 if (!sna_render_composite_redirect(sna, &tmp,
2426 extents.x1 + dst_dx,
2427 extents.y1 + dst_dy,
2428 extents.x2 - extents.x1,
2429 extents.y2 - extents.y1,
2430 n > 1))
2431 goto fallback_tiled;
2432 }
2433
2434 tmp.src.filter = SAMPLER_FILTER_NEAREST;
2435 tmp.src.repeat = SAMPLER_EXTEND_NONE;
2436 tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
2437 if (too_large(src->width, src->height)) {
2438 BoxRec extents = box[0];
2439 int i;
2440
2441 for (i = 1; i < n; i++) {
2442 if (box[i].x1 < extents.x1)
2443 extents.x1 = box[i].x1;
2444 if (box[i].y1 < extents.y1)
2445 extents.y1 = box[i].y1;
2446
2447 if (box[i].x2 > extents.x2)
2448 extents.x2 = box[i].x2;
2449 if (box[i].y2 > extents.y2)
2450 extents.y2 = box[i].y2;
2451 }
2452
2453 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2454 extents.x1 + src_dx,
2455 extents.y1 + src_dy,
2456 extents.x2 - extents.x1,
2457 extents.y2 - extents.y1))
2458 goto fallback_tiled_dst;
2459 } else {
2460 tmp.src.bo = kgem_bo_reference(src_bo);
2461 tmp.src.width = src->width;
2462 tmp.src.height = src->height;
2463 tmp.src.offset[0] = tmp.src.offset[1] = 0;
2464 tmp.src.scale[0] = 1.f/src->width;
2465 tmp.src.scale[1] = 1.f/src->height;
2466 }
2467
2468 tmp.is_affine = true;
2469 tmp.floats_per_vertex = 3;
2470 tmp.floats_per_rect = 9;
2471 tmp.u.gen5.wm_kernel = WM_KERNEL;
2472 tmp.u.gen5.ve_id = 2;
2473
2474 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2475 kgem_submit(&sna->kgem);
2476 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2477 DBG(("%s: aperture check failed\n", __FUNCTION__));
2478 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2479 if (tmp.redirect.real_bo)
2480 kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2481
2482 goto fallback_blt;
2483 }
2484 }
2485
2486 dst_dx += tmp.dst.x;
2487 dst_dy += tmp.dst.y;
2488 tmp.dst.x = tmp.dst.y = 0;
2489
2490 src_dx += tmp.src.offset[0];
2491 src_dy += tmp.src.offset[1];
2492
2493 gen5_align_vertex(sna, &tmp);
2494 gen5_copy_bind_surfaces(sna, &tmp);
2495
2496 do {
2497 int n_this_time;
2498
2499 n_this_time = gen5_get_rectangles(sna, &tmp, n,
2500 gen5_copy_bind_surfaces);
2501 n -= n_this_time;
2502
2503 do {
2504 DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
2505 box->x1 + src_dx, box->y1 + src_dy,
2506 box->x1 + dst_dx, box->y1 + dst_dy,
2507 box->x2 - box->x1, box->y2 - box->y1));
2508 OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
2509 OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
2510 OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
2511
2512 OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
2513 OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
2514 OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
2515
2516 OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
2517 OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
2518 OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
2519
2520 box++;
2521 } while (--n_this_time);
2522 } while (n);
2523
2524 gen4_vertex_flush(sna);
2525 sna_render_composite_redirect_done(sna, &tmp);
2526 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2527 return true;
2528
2529 fallback_tiled_dst:
2530 if (tmp.redirect.real_bo)
2531 kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2532 fallback_tiled:
2533 if (sna_blt_compare_depth(src, dst) &&
2534 sna_blt_copy_boxes(sna, alu,
2535 src_bo, src_dx, src_dy,
2536 dst_bo, dst_dx, dst_dy,
2537 dst->bitsPerPixel,
2538 box, n))
2539 return true;
2540
2541 DBG(("%s: tiled fallback\n", __FUNCTION__));
2542 return sna_tiling_copy_boxes(sna, alu,
2543 src, src_bo, src_dx, src_dy,
2544 dst, dst_bo, dst_dx, dst_dy,
2545 box, n);
2546 }
2547
2548 static void
gen5_render_copy_blt(struct sna * sna,const struct sna_copy_op * op,int16_t sx,int16_t sy,int16_t w,int16_t h,int16_t dx,int16_t dy)2549 gen5_render_copy_blt(struct sna *sna,
2550 const struct sna_copy_op *op,
2551 int16_t sx, int16_t sy,
2552 int16_t w, int16_t h,
2553 int16_t dx, int16_t dy)
2554 {
2555 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
2556 sx, sy, dx, dy, w, h));
2557
2558 gen5_get_rectangles(sna, &op->base, 1, gen5_copy_bind_surfaces);
2559
2560 OUT_VERTEX(dx+w, dy+h);
2561 OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
2562 OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
2563
2564 OUT_VERTEX(dx, dy+h);
2565 OUT_VERTEX_F(sx*op->base.src.scale[0]);
2566 OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
2567
2568 OUT_VERTEX(dx, dy);
2569 OUT_VERTEX_F(sx*op->base.src.scale[0]);
2570 OUT_VERTEX_F(sy*op->base.src.scale[1]);
2571 }
2572
2573 static void
gen5_render_copy_done(struct sna * sna,const struct sna_copy_op * op)2574 gen5_render_copy_done(struct sna *sna,
2575 const struct sna_copy_op *op)
2576 {
2577 if (sna->render.vertex_offset)
2578 gen4_vertex_flush(sna);
2579
2580 DBG(("%s()\n", __FUNCTION__));
2581 }
2582
2583 static bool
gen5_render_copy(struct sna * sna,uint8_t alu,PixmapPtr src,struct kgem_bo * src_bo,PixmapPtr dst,struct kgem_bo * dst_bo,struct sna_copy_op * op)2584 gen5_render_copy(struct sna *sna, uint8_t alu,
2585 PixmapPtr src, struct kgem_bo *src_bo,
2586 PixmapPtr dst, struct kgem_bo *dst_bo,
2587 struct sna_copy_op *op)
2588 {
2589 DBG(("%s (alu=%d)\n", __FUNCTION__, alu));
2590
2591 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2592 sna_blt_copy(sna, alu,
2593 src_bo, dst_bo,
2594 dst->drawable.bitsPerPixel,
2595 op))
2596 return true;
2597
2598 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2599 too_large(src->drawable.width, src->drawable.height) ||
2600 too_large(dst->drawable.width, dst->drawable.height)) {
2601 fallback:
2602 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2603 return false;
2604
2605 return sna_blt_copy(sna, alu, src_bo, dst_bo,
2606 dst->drawable.bitsPerPixel,
2607 op);
2608 }
2609
2610 if (dst->drawable.depth == src->drawable.depth) {
2611 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2612 op->base.src.pict_format = op->base.dst.format;
2613 } else {
2614 op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2615 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2616 }
2617 if (!gen5_check_format(op->base.src.pict_format))
2618 goto fallback;
2619
2620 op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2621
2622 op->base.dst.pixmap = dst;
2623 op->base.dst.width = dst->drawable.width;
2624 op->base.dst.height = dst->drawable.height;
2625 op->base.dst.bo = dst_bo;
2626
2627 op->base.src.bo = src_bo;
2628 op->base.src.card_format =
2629 gen5_get_card_format(op->base.src.pict_format);
2630 op->base.src.width = src->drawable.width;
2631 op->base.src.height = src->drawable.height;
2632 op->base.src.scale[0] = 1.f/src->drawable.width;
2633 op->base.src.scale[1] = 1.f/src->drawable.height;
2634 op->base.src.filter = SAMPLER_FILTER_NEAREST;
2635 op->base.src.repeat = SAMPLER_EXTEND_NONE;
2636
2637 op->base.is_affine = true;
2638 op->base.floats_per_vertex = 3;
2639 op->base.floats_per_rect = 9;
2640 op->base.u.gen5.wm_kernel = WM_KERNEL;
2641 op->base.u.gen5.ve_id = 2;
2642
2643 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2644 kgem_submit(&sna->kgem);
2645 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2646 goto fallback;
2647 }
2648
2649 if (kgem_bo_is_dirty(src_bo)) {
2650 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2651 sna_blt_copy(sna, alu,
2652 src_bo, dst_bo,
2653 dst->drawable.bitsPerPixel,
2654 op))
2655 return true;
2656 }
2657
2658 gen5_align_vertex(sna, &op->base);
2659 gen5_copy_bind_surfaces(sna, &op->base);
2660
2661 op->blt = gen5_render_copy_blt;
2662 op->done = gen5_render_copy_done;
2663 return true;
2664 }
2665
2666 static void
gen5_fill_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2667 gen5_fill_bind_surfaces(struct sna *sna,
2668 const struct sna_composite_op *op)
2669 {
2670 bool dirty = kgem_bo_is_dirty(op->dst.bo);
2671 uint32_t *binding_table;
2672 uint16_t offset;
2673
2674 gen5_get_batch(sna, op);
2675
2676 binding_table = gen5_composite_get_binding_table(sna, &offset);
2677
2678 binding_table[0] =
2679 gen5_bind_bo(sna,
2680 op->dst.bo, op->dst.width, op->dst.height,
2681 gen5_get_dest_format(op->dst.format),
2682 true);
2683 binding_table[1] =
2684 gen5_bind_bo(sna,
2685 op->src.bo, 1, 1,
2686 GEN5_SURFACEFORMAT_B8G8R8A8_UNORM,
2687 false);
2688
2689 if (sna->kgem.surface == offset &&
2690 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
2691 sna->kgem.surface +=
2692 sizeof(struct gen5_surface_state_padded)/sizeof(uint32_t);
2693 offset = sna->render_state.gen5.surface_table;
2694 }
2695
2696 gen5_emit_state(sna, op, offset | dirty);
2697 }
2698
prefer_blt_fill(struct sna * sna)2699 static inline bool prefer_blt_fill(struct sna *sna)
2700 {
2701 #if PREFER_BLT_FILL
2702 return true;
2703 #else
2704 return sna->kgem.mode != KGEM_RENDER;
2705 #endif
2706 }
2707
2708 static bool
gen5_render_fill_boxes(struct sna * sna,CARD8 op,PictFormat format,const xRenderColor * color,const DrawableRec * dst,struct kgem_bo * dst_bo,const BoxRec * box,int n)2709 gen5_render_fill_boxes(struct sna *sna,
2710 CARD8 op,
2711 PictFormat format,
2712 const xRenderColor *color,
2713 const DrawableRec *dst, struct kgem_bo *dst_bo,
2714 const BoxRec *box, int n)
2715 {
2716 struct sna_composite_op tmp;
2717 uint32_t pixel;
2718
2719 DBG(("%s op=%x, color=(%04x,%04x,%04x,%04x), boxes=%d x [((%d, %d), (%d, %d))...]\n",
2720 __FUNCTION__, op,
2721 color->red, color->green, color->blue, color->alpha,
2722 n, box->x1, box->y1, box->x2, box->y2));
2723
2724 if (op >= ARRAY_SIZE(gen5_blend_op)) {
2725 DBG(("%s: fallback due to unhandled blend op: %d\n",
2726 __FUNCTION__, op));
2727 return false;
2728 }
2729
2730 if (op <= PictOpSrc &&
2731 (prefer_blt_fill(sna) ||
2732 too_large(dst->width, dst->height) ||
2733 !gen5_check_dst_format(format))) {
2734 uint8_t alu = GXinvalid;
2735
2736 pixel = 0;
2737 if (op == PictOpClear)
2738 alu = GXclear;
2739 else if (sna_get_pixel_from_rgba(&pixel,
2740 color->red,
2741 color->green,
2742 color->blue,
2743 color->alpha,
2744 format))
2745 alu = GXcopy;
2746
2747 if (alu != GXinvalid &&
2748 sna_blt_fill_boxes(sna, alu,
2749 dst_bo, dst->bitsPerPixel,
2750 pixel, box, n))
2751 return true;
2752
2753 if (!gen5_check_dst_format(format))
2754 return false;
2755
2756 if (too_large(dst->width, dst->height))
2757 return sna_tiling_fill_boxes(sna, op, format, color,
2758 dst, dst_bo, box, n);
2759 }
2760
2761 if (op == PictOpClear) {
2762 pixel = 0;
2763 op = PictOpSrc;
2764 } else if (!sna_get_pixel_from_rgba(&pixel,
2765 color->red,
2766 color->green,
2767 color->blue,
2768 color->alpha,
2769 PICT_a8r8g8b8))
2770 return false;
2771
2772 DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
2773
2774 memset(&tmp, 0, sizeof(tmp));
2775
2776 tmp.op = op;
2777
2778 tmp.dst.pixmap = (PixmapPtr)dst;
2779 tmp.dst.width = dst->width;
2780 tmp.dst.height = dst->height;
2781 tmp.dst.format = format;
2782 tmp.dst.bo = dst_bo;
2783
2784 if (too_large(dst->width, dst->height)) {
2785 BoxRec extents;
2786
2787 boxes_extents(box, n, &extents);
2788 if (!sna_render_composite_redirect(sna, &tmp,
2789 extents.x1, extents.y1,
2790 extents.x2 - extents.x1,
2791 extents.y2 - extents.y1,
2792 n > 1))
2793 return sna_tiling_fill_boxes(sna, op, format, color,
2794 dst, dst_bo, box, n);
2795 }
2796
2797 tmp.src.bo = sna_render_get_solid(sna, pixel);
2798 tmp.src.filter = SAMPLER_FILTER_NEAREST;
2799 tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
2800
2801 tmp.is_affine = true;
2802 tmp.floats_per_vertex = 2;
2803 tmp.floats_per_rect = 6;
2804 tmp.u.gen5.wm_kernel = WM_KERNEL;
2805 tmp.u.gen5.ve_id = 1;
2806
2807 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2808 kgem_submit(&sna->kgem);
2809 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2810 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2811 return false;
2812 }
2813 }
2814
2815 gen5_align_vertex(sna, &tmp);
2816 gen5_fill_bind_surfaces(sna, &tmp);
2817
2818 do {
2819 int n_this_time;
2820
2821 n_this_time = gen5_get_rectangles(sna, &tmp, n,
2822 gen5_fill_bind_surfaces);
2823 n -= n_this_time;
2824
2825 do {
2826 DBG((" (%d, %d), (%d, %d)\n",
2827 box->x1, box->y1, box->x2, box->y2));
2828 OUT_VERTEX(box->x2, box->y2);
2829 OUT_VERTEX_F(.5);
2830
2831 OUT_VERTEX(box->x1, box->y2);
2832 OUT_VERTEX_F(.5);
2833
2834 OUT_VERTEX(box->x1, box->y1);
2835 OUT_VERTEX_F(.5);
2836
2837 box++;
2838 } while (--n_this_time);
2839 } while (n);
2840
2841 gen4_vertex_flush(sna);
2842 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2843 sna_render_composite_redirect_done(sna, &tmp);
2844 return true;
2845 }
2846
2847 static void
gen5_render_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t w,int16_t h)2848 gen5_render_fill_op_blt(struct sna *sna,
2849 const struct sna_fill_op *op,
2850 int16_t x, int16_t y, int16_t w, int16_t h)
2851 {
2852 DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
2853
2854 gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
2855
2856 OUT_VERTEX(x+w, y+h);
2857 OUT_VERTEX_F(.5);
2858
2859 OUT_VERTEX(x, y+h);
2860 OUT_VERTEX_F(.5);
2861
2862 OUT_VERTEX(x, y);
2863 OUT_VERTEX_F(.5);
2864 }
2865
2866 fastcall static void
gen5_render_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)2867 gen5_render_fill_op_box(struct sna *sna,
2868 const struct sna_fill_op *op,
2869 const BoxRec *box)
2870 {
2871 DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
2872 box->x1, box->y1, box->x2, box->y2));
2873
2874 gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
2875
2876 OUT_VERTEX(box->x2, box->y2);
2877 OUT_VERTEX_F(.5);
2878
2879 OUT_VERTEX(box->x1, box->y2);
2880 OUT_VERTEX_F(.5);
2881
2882 OUT_VERTEX(box->x1, box->y1);
2883 OUT_VERTEX_F(.5);
2884 }
2885
2886 fastcall static void
gen5_render_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)2887 gen5_render_fill_op_boxes(struct sna *sna,
2888 const struct sna_fill_op *op,
2889 const BoxRec *box,
2890 int nbox)
2891 {
2892 DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2893 box->x1, box->y1, box->x2, box->y2, nbox));
2894
2895 do {
2896 int nbox_this_time;
2897
2898 nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2899 gen5_fill_bind_surfaces);
2900 nbox -= nbox_this_time;
2901
2902 do {
2903 OUT_VERTEX(box->x2, box->y2);
2904 OUT_VERTEX_F(.5);
2905
2906 OUT_VERTEX(box->x1, box->y2);
2907 OUT_VERTEX_F(.5);
2908
2909 OUT_VERTEX(box->x1, box->y1);
2910 OUT_VERTEX_F(.5);
2911 box++;
2912 } while (--nbox_this_time);
2913 } while (nbox);
2914 }
2915
2916 static void
gen5_render_fill_op_done(struct sna * sna,const struct sna_fill_op * op)2917 gen5_render_fill_op_done(struct sna *sna,
2918 const struct sna_fill_op *op)
2919 {
2920 if (sna->render.vertex_offset)
2921 gen4_vertex_flush(sna);
2922 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2923
2924 DBG(("%s()\n", __FUNCTION__));
2925 }
2926
2927 static bool
gen5_render_fill(struct sna * sna,uint8_t alu,PixmapPtr dst,struct kgem_bo * dst_bo,uint32_t color,unsigned flags,struct sna_fill_op * op)2928 gen5_render_fill(struct sna *sna, uint8_t alu,
2929 PixmapPtr dst, struct kgem_bo *dst_bo,
2930 uint32_t color, unsigned flags,
2931 struct sna_fill_op *op)
2932 {
2933 DBG(("%s(alu=%d, color=%08x)\n", __FUNCTION__, alu, color));
2934
2935 if (prefer_blt_fill(sna) &&
2936 sna_blt_fill(sna, alu,
2937 dst_bo, dst->drawable.bitsPerPixel,
2938 color,
2939 op))
2940 return true;
2941
2942 if (!(alu == GXcopy || alu == GXclear) ||
2943 too_large(dst->drawable.width, dst->drawable.height))
2944 return sna_blt_fill(sna, alu,
2945 dst_bo, dst->drawable.bitsPerPixel,
2946 color,
2947 op);
2948
2949 if (alu == GXclear)
2950 color = 0;
2951
2952 op->base.op = color == 0 ? PictOpClear : PictOpSrc;
2953
2954 op->base.dst.pixmap = dst;
2955 op->base.dst.width = dst->drawable.width;
2956 op->base.dst.height = dst->drawable.height;
2957 op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2958 op->base.dst.bo = dst_bo;
2959 op->base.dst.x = op->base.dst.y = 0;
2960
2961 op->base.need_magic_ca_pass = 0;
2962 op->base.has_component_alpha = 0;
2963
2964 op->base.src.bo =
2965 sna_render_get_solid(sna,
2966 sna_rgba_for_color(color,
2967 dst->drawable.depth));
2968 op->base.src.filter = SAMPLER_FILTER_NEAREST;
2969 op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
2970
2971 op->base.mask.bo = NULL;
2972 op->base.mask.filter = SAMPLER_FILTER_NEAREST;
2973 op->base.mask.repeat = SAMPLER_EXTEND_NONE;
2974
2975 op->base.is_affine = true;
2976 op->base.floats_per_vertex = 2;
2977 op->base.floats_per_rect = 6;
2978 op->base.u.gen5.wm_kernel = WM_KERNEL;
2979 op->base.u.gen5.ve_id = 1;
2980
2981 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2982 kgem_submit(&sna->kgem);
2983 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2984 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2985 return false;
2986 }
2987 }
2988
2989 gen5_align_vertex(sna, &op->base);
2990 gen5_fill_bind_surfaces(sna, &op->base);
2991
2992 op->blt = gen5_render_fill_op_blt;
2993 op->box = gen5_render_fill_op_box;
2994 op->boxes = gen5_render_fill_op_boxes;
2995 op->points = NULL;
2996 op->done = gen5_render_fill_op_done;
2997 return true;
2998 }
2999
3000 static bool
gen5_render_fill_one_try_blt(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)3001 gen5_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3002 uint32_t color,
3003 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3004 uint8_t alu)
3005 {
3006 BoxRec box;
3007
3008 box.x1 = x1;
3009 box.y1 = y1;
3010 box.x2 = x2;
3011 box.y2 = y2;
3012
3013 return sna_blt_fill_boxes(sna, alu,
3014 bo, dst->drawable.bitsPerPixel,
3015 color, &box, 1);
3016 }
3017
3018 static bool
gen5_render_fill_one(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)3019 gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3020 uint32_t color,
3021 int16_t x1, int16_t y1,
3022 int16_t x2, int16_t y2,
3023 uint8_t alu)
3024 {
3025 struct sna_composite_op tmp;
3026
3027 #if NO_FILL_ONE
3028 return gen5_render_fill_one_try_blt(sna, dst, bo, color,
3029 x1, y1, x2, y2, alu);
3030 #endif
3031
3032 /* Prefer to use the BLT if already engaged */
3033 if (prefer_blt_fill(sna) &&
3034 gen5_render_fill_one_try_blt(sna, dst, bo, color,
3035 x1, y1, x2, y2, alu))
3036 return true;
3037
3038 /* Must use the BLT if we can't RENDER... */
3039 if (!(alu == GXcopy || alu == GXclear) ||
3040 too_large(dst->drawable.width, dst->drawable.height))
3041 return gen5_render_fill_one_try_blt(sna, dst, bo, color,
3042 x1, y1, x2, y2, alu);
3043
3044 if (alu == GXclear)
3045 color = 0;
3046
3047 tmp.op = color == 0 ? PictOpClear : PictOpSrc;
3048
3049 tmp.dst.pixmap = dst;
3050 tmp.dst.width = dst->drawable.width;
3051 tmp.dst.height = dst->drawable.height;
3052 tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3053 tmp.dst.bo = bo;
3054 tmp.dst.x = tmp.dst.y = 0;
3055
3056 tmp.src.bo =
3057 sna_render_get_solid(sna,
3058 sna_rgba_for_color(color,
3059 dst->drawable.depth));
3060 tmp.src.filter = SAMPLER_FILTER_NEAREST;
3061 tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
3062
3063 tmp.mask.bo = NULL;
3064 tmp.mask.filter = SAMPLER_FILTER_NEAREST;
3065 tmp.mask.repeat = SAMPLER_EXTEND_NONE;
3066
3067 tmp.is_affine = true;
3068 tmp.floats_per_vertex = 2;
3069 tmp.floats_per_rect = 6;
3070 tmp.has_component_alpha = 0;
3071 tmp.need_magic_ca_pass = false;
3072
3073 tmp.u.gen5.wm_kernel = WM_KERNEL;
3074 tmp.u.gen5.ve_id = 1;
3075
3076 if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3077 kgem_submit(&sna->kgem);
3078 if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3079 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3080 return false;
3081 }
3082 }
3083
3084 gen5_align_vertex(sna, &tmp);
3085 gen5_fill_bind_surfaces(sna, &tmp);
3086
3087 gen5_get_rectangles(sna, &tmp, 1, gen5_fill_bind_surfaces);
3088
3089 DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3090 OUT_VERTEX(x2, y2);
3091 OUT_VERTEX_F(.5);
3092
3093 OUT_VERTEX(x1, y2);
3094 OUT_VERTEX_F(.5);
3095
3096 OUT_VERTEX(x1, y1);
3097 OUT_VERTEX_F(.5);
3098
3099 gen4_vertex_flush(sna);
3100 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3101
3102 return true;
3103 }
3104 static void
gen5_render_context_switch(struct kgem * kgem,int new_mode)3105 gen5_render_context_switch(struct kgem *kgem,
3106 int new_mode)
3107 {
3108 if (!kgem->nbatch)
3109 return;
3110
3111 /* WaNonPipelinedStateCommandFlush
3112 *
3113 * Ironlake has a limitation that a 3D or Media command can't
3114 * be the first command after a BLT, unless it's
3115 * non-pipelined.
3116 *
3117 * We do this by ensuring that the non-pipelined drawrect
3118 * is always emitted first following a switch from BLT.
3119 */
3120 if (kgem->mode == KGEM_BLT) {
3121 struct sna *sna = to_sna_from_kgem(kgem);
3122 DBG(("%s: forcing drawrect on next state emission\n",
3123 __FUNCTION__));
3124 sna->render_state.gen5.drawrect_limit = -1;
3125 }
3126
3127 if (kgem_ring_is_idle(kgem, kgem->ring)) {
3128 DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
3129 _kgem_submit(kgem);
3130 }
3131 }
3132
gen5_render_reset(struct sna * sna)3133 static void gen5_render_reset(struct sna *sna)
3134 {
3135 sna->render_state.gen5.needs_invariant = true;
3136 sna->render_state.gen5.ve_id = -1;
3137 sna->render_state.gen5.last_primitive = -1;
3138 sna->render_state.gen5.last_pipelined_pointers = 0;
3139
3140 sna->render_state.gen5.drawrect_offset = -1;
3141 sna->render_state.gen5.drawrect_limit = -1;
3142 sna->render_state.gen5.surface_table = -1;
3143
3144 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3145 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3146 discard_vbo(sna);
3147 }
3148
3149 sna->render.vertex_offset = 0;
3150 sna->render.nvertex_reloc = 0;
3151 sna->render.vb_id = 0;
3152 }
3153
gen5_render_fini(struct sna * sna)3154 static void gen5_render_fini(struct sna *sna)
3155 {
3156 kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
3157 }
3158
gen5_create_vs_unit_state(struct sna_static_stream * stream)3159 static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
3160 {
3161 struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
3162
3163 /* Set up the vertex shader to be disabled (passthrough) */
3164 vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
3165 vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
3166 vs->vs6.vs_enable = 0;
3167 vs->vs6.vert_cache_disable = 1;
3168
3169 return sna_static_stream_offsetof(stream, vs);
3170 }
3171
gen5_create_sf_state(struct sna_static_stream * stream,uint32_t kernel)3172 static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
3173 uint32_t kernel)
3174 {
3175 struct gen5_sf_unit_state *sf_state;
3176
3177 sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
3178
3179 sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
3180 sf_state->thread0.kernel_start_pointer = kernel >> 6;
3181
3182 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
3183 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
3184 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
3185 /* don't smash vertex header, read start from dw8 */
3186 sf_state->thread3.urb_entry_read_offset = 1;
3187 sf_state->thread3.dispatch_grf_start_reg = 3;
3188 sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
3189 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
3190 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
3191 sf_state->sf5.viewport_transform = false; /* skip viewport */
3192 sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
3193 sf_state->sf6.scissor = 0;
3194 sf_state->sf7.trifan_pv = 2;
3195 sf_state->sf6.dest_org_vbias = 0x8;
3196 sf_state->sf6.dest_org_hbias = 0x8;
3197
3198 return sna_static_stream_offsetof(stream, sf_state);
3199 }
3200
gen5_create_sampler_state(struct sna_static_stream * stream,sampler_filter_t src_filter,sampler_extend_t src_extend,sampler_filter_t mask_filter,sampler_extend_t mask_extend)3201 static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
3202 sampler_filter_t src_filter,
3203 sampler_extend_t src_extend,
3204 sampler_filter_t mask_filter,
3205 sampler_extend_t mask_extend)
3206 {
3207 struct gen5_sampler_state *sampler_state;
3208
3209 sampler_state = sna_static_stream_map(stream,
3210 sizeof(struct gen5_sampler_state) * 2,
3211 32);
3212 sampler_state_init(&sampler_state[0], src_filter, src_extend);
3213 sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
3214
3215 return sna_static_stream_offsetof(stream, sampler_state);
3216 }
3217
gen5_init_wm_state(struct gen5_wm_unit_state * state,bool has_mask,uint32_t kernel,uint32_t sampler)3218 static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
3219 bool has_mask,
3220 uint32_t kernel,
3221 uint32_t sampler)
3222 {
3223 state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
3224 state->thread0.kernel_start_pointer = kernel >> 6;
3225
3226 state->thread1.single_program_flow = 0;
3227
3228 /* scratch space is not used in our kernel */
3229 state->thread2.scratch_space_base_pointer = 0;
3230 state->thread2.per_thread_scratch_space = 0;
3231
3232 state->thread3.const_urb_entry_read_length = 0;
3233 state->thread3.const_urb_entry_read_offset = 0;
3234
3235 state->thread3.urb_entry_read_offset = 0;
3236 /* wm kernel use urb from 3, see wm_program in compiler module */
3237 state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
3238
3239 state->wm4.sampler_count = 0; /* hardware requirement */
3240
3241 state->wm4.sampler_state_pointer = sampler >> 5;
3242 state->wm5.max_threads = PS_MAX_THREADS - 1;
3243 state->wm5.transposed_urb_read = 0;
3244 state->wm5.thread_dispatch_enable = 1;
3245 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
3246 * start point
3247 */
3248 state->wm5.enable_16_pix = 1;
3249 state->wm5.enable_8_pix = 0;
3250 state->wm5.early_depth_test = 1;
3251
3252 /* Each pair of attributes (src/mask coords) is two URB entries */
3253 if (has_mask) {
3254 state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
3255 state->thread3.urb_entry_read_length = 4;
3256 } else {
3257 state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
3258 state->thread3.urb_entry_read_length = 2;
3259 }
3260
3261 /* binding table entry count is only used for prefetching,
3262 * and it has to be set 0 for Ironlake
3263 */
3264 state->thread1.binding_table_entry_count = 0;
3265 }
3266
gen5_create_cc_unit_state(struct sna_static_stream * stream)3267 static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
3268 {
3269 uint8_t *ptr, *base;
3270 int i, j;
3271
3272 base = ptr =
3273 sna_static_stream_map(stream,
3274 GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
3275 64);
3276
3277 for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
3278 for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
3279 struct gen5_cc_unit_state *state =
3280 (struct gen5_cc_unit_state *)ptr;
3281
3282 state->cc3.blend_enable =
3283 !(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
3284
3285 state->cc5.logicop_func = 0xc; /* COPY */
3286 state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
3287
3288 /* Fill in alpha blend factors same as color, for the future. */
3289 state->cc5.ia_src_blend_factor = i;
3290 state->cc5.ia_dest_blend_factor = j;
3291
3292 state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
3293 state->cc6.clamp_post_alpha_blend = 1;
3294 state->cc6.clamp_pre_alpha_blend = 1;
3295 state->cc6.src_blend_factor = i;
3296 state->cc6.dest_blend_factor = j;
3297
3298 ptr += 64;
3299 }
3300 }
3301
3302 return sna_static_stream_offsetof(stream, base);
3303 }
3304
gen5_render_setup(struct sna * sna)3305 static bool gen5_render_setup(struct sna *sna)
3306 {
3307 struct gen5_render_state *state = &sna->render_state.gen5;
3308 struct sna_static_stream general;
3309 struct gen5_wm_unit_state_padded *wm_state;
3310 uint32_t sf[2], wm[KERNEL_COUNT];
3311 int i, j, k, l, m;
3312
3313 sna_static_stream_init(&general);
3314
3315 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3316 * dumps, you know it points to zero.
3317 */
3318 null_create(&general);
3319
3320 /* Set up the two SF states (one for blending with a mask, one without) */
3321 sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
3322 sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
3323
3324 for (m = 0; m < KERNEL_COUNT; m++) {
3325 if (wm_kernels[m].size) {
3326 wm[m] = sna_static_stream_add(&general,
3327 wm_kernels[m].data,
3328 wm_kernels[m].size,
3329 64);
3330 } else {
3331 wm[m] = sna_static_stream_compile_wm(sna, &general,
3332 wm_kernels[m].data,
3333 16);
3334 }
3335 assert(wm[m]);
3336 }
3337
3338 state->vs = gen5_create_vs_unit_state(&general);
3339
3340 state->sf[0] = gen5_create_sf_state(&general, sf[0]);
3341 state->sf[1] = gen5_create_sf_state(&general, sf[1]);
3342
3343
3344 /* Set up the WM states: each filter/extend type for source and mask, per
3345 * kernel.
3346 */
3347 wm_state = sna_static_stream_map(&general,
3348 sizeof(*wm_state) * KERNEL_COUNT *
3349 FILTER_COUNT * EXTEND_COUNT *
3350 FILTER_COUNT * EXTEND_COUNT,
3351 64);
3352 state->wm = sna_static_stream_offsetof(&general, wm_state);
3353 for (i = 0; i < FILTER_COUNT; i++) {
3354 for (j = 0; j < EXTEND_COUNT; j++) {
3355 for (k = 0; k < FILTER_COUNT; k++) {
3356 for (l = 0; l < EXTEND_COUNT; l++) {
3357 uint32_t sampler_state;
3358
3359 sampler_state =
3360 gen5_create_sampler_state(&general,
3361 i, j,
3362 k, l);
3363
3364 for (m = 0; m < KERNEL_COUNT; m++) {
3365 gen5_init_wm_state(&wm_state->state,
3366 wm_kernels[m].has_mask,
3367 wm[m], sampler_state);
3368 wm_state++;
3369 }
3370 }
3371 }
3372 }
3373 }
3374
3375 state->cc = gen5_create_cc_unit_state(&general);
3376
3377 state->general_bo = sna_static_stream_fini(sna, &general);
3378 return state->general_bo != NULL;
3379 }
3380
gen5_render_init(struct sna * sna,const char * backend)3381 const char *gen5_render_init(struct sna *sna, const char *backend)
3382 {
3383 if (!gen5_render_setup(sna))
3384 return backend;
3385
3386 sna->kgem.context_switch = gen5_render_context_switch;
3387 sna->kgem.retire = gen4_render_retire;
3388 sna->kgem.expire = gen4_render_expire;
3389
3390 #if !NO_COMPOSITE
3391 sna->render.composite = gen5_render_composite;
3392 sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3393 #endif
3394 #if !NO_COMPOSITE_SPANS
3395 sna->render.check_composite_spans = gen5_check_composite_spans;
3396 sna->render.composite_spans = gen5_render_composite_spans;
3397 if (intel_get_device_id(sna->dev) == 0x0044)
3398 sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3399 #endif
3400 sna->render.video = gen5_render_video;
3401
3402 sna->render.copy_boxes = gen5_render_copy_boxes;
3403 sna->render.copy = gen5_render_copy;
3404
3405 sna->render.fill_boxes = gen5_render_fill_boxes;
3406 sna->render.fill = gen5_render_fill;
3407 sna->render.fill_one = gen5_render_fill_one;
3408
3409 sna->render.flush = gen4_render_flush;
3410 sna->render.reset = gen5_render_reset;
3411 sna->render.fini = gen5_render_fini;
3412
3413 sna->render.max_3d_size = MAX_3D_SIZE;
3414 sna->render.max_3d_pitch = 1 << 18;
3415 return "Ironlake (gen5)";
3416 }
3417