1 /*
2 * Copyright © 2006,2008,2011 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Wang Zhenyu <zhenyu.z.wang@sna.com>
26 * Eric Anholt <eric@anholt.net>
27 * Carl Worth <cworth@redhat.com>
28 * Keith Packard <keithp@keithp.com>
29 * Chris Wilson <chris@chris-wilson.co.uk>
30 *
31 */
32
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36
37 #include "sna.h"
38 #include "sna_reg.h"
39 #include "sna_render.h"
40 #include "sna_render_inline.h"
41 #include "sna_video.h"
42
43 #include "brw/brw.h"
44 #include "gen4_common.h"
45 #include "gen4_render.h"
46 #include "gen4_source.h"
47 #include "gen4_vertex.h"
48
49 /* gen4 has a serious issue with its shaders that we need to flush
50 * after every rectangle... So until that is resolved, prefer
51 * the BLT engine.
52 */
53 #define FORCE_SPANS 0
54 #define FORCE_NONRECTILINEAR_SPANS -1
55 #define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
56
57 #define ALWAYS_FLUSH 1
58
59 #define NO_COMPOSITE 0
60 #define NO_COMPOSITE_SPANS 0
61 #define NO_COPY 0
62 #define NO_COPY_BOXES 0
63 #define NO_FILL 0
64 #define NO_FILL_ONE 0
65 #define NO_FILL_BOXES 0
66 #define NO_VIDEO 0
67
68 #define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
69
70 #define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
71
72 /* Set up a default static partitioning of the URB, which is supposed to
73 * allow anything we would want to do, at potentially lower performance.
74 */
75 #define URB_CS_ENTRY_SIZE 1
76 #define URB_CS_ENTRIES 0
77
78 #define URB_VS_ENTRY_SIZE 1
79 #define URB_VS_ENTRIES 32
80
81 #define URB_GS_ENTRY_SIZE 0
82 #define URB_GS_ENTRIES 0
83
84 #define URB_CL_ENTRY_SIZE 0
85 #define URB_CL_ENTRIES 0
86
87 #define URB_SF_ENTRY_SIZE 2
88 #define URB_SF_ENTRIES 64
89
90 /*
91 * this program computes dA/dx and dA/dy for the texture coordinates along
92 * with the base texture coordinate. It was extracted from the Mesa driver
93 */
94
95 #define SF_KERNEL_NUM_GRF 16
96 #define PS_KERNEL_NUM_GRF 32
97
98 #define GEN4_MAX_SF_THREADS 24
99 #define GEN4_MAX_WM_THREADS 32
100 #define G4X_MAX_WM_THREADS 50
101
102 static const uint32_t ps_kernel_packed_bt601_static[][4] = {
103 #include "exa_wm_xy.g4b"
104 #include "exa_wm_src_affine.g4b"
105 #include "exa_wm_src_sample_argb.g4b"
106 #include "exa_wm_yuv_rgb_bt601.g4b"
107 #include "exa_wm_write.g4b"
108 };
109
110 static const uint32_t ps_kernel_planar_bt601_static[][4] = {
111 #include "exa_wm_xy.g4b"
112 #include "exa_wm_src_affine.g4b"
113 #include "exa_wm_src_sample_planar.g4b"
114 #include "exa_wm_yuv_rgb_bt601.g4b"
115 #include "exa_wm_write.g4b"
116 };
117
118 static const uint32_t ps_kernel_nv12_bt601_static[][4] = {
119 #include "exa_wm_xy.g4b"
120 #include "exa_wm_src_affine.g4b"
121 #include "exa_wm_src_sample_nv12.g4b"
122 #include "exa_wm_yuv_rgb_bt601.g4b"
123 #include "exa_wm_write.g4b"
124 };
125
126 static const uint32_t ps_kernel_packed_bt709_static[][4] = {
127 #include "exa_wm_xy.g4b"
128 #include "exa_wm_src_affine.g4b"
129 #include "exa_wm_src_sample_argb.g4b"
130 #include "exa_wm_yuv_rgb_bt709.g4b"
131 #include "exa_wm_write.g4b"
132 };
133
134 static const uint32_t ps_kernel_planar_bt709_static[][4] = {
135 #include "exa_wm_xy.g4b"
136 #include "exa_wm_src_affine.g4b"
137 #include "exa_wm_src_sample_planar.g4b"
138 #include "exa_wm_yuv_rgb_bt709.g4b"
139 #include "exa_wm_write.g4b"
140 };
141
142 static const uint32_t ps_kernel_nv12_bt709_static[][4] = {
143 #include "exa_wm_xy.g4b"
144 #include "exa_wm_src_affine.g4b"
145 #include "exa_wm_src_sample_nv12.g4b"
146 #include "exa_wm_yuv_rgb_bt709.g4b"
147 #include "exa_wm_write.g4b"
148 };
149
150 #define NOKERNEL(kernel_enum, func, masked) \
151 [kernel_enum] = {func, 0, masked}
152 #define KERNEL(kernel_enum, kernel, masked) \
153 [kernel_enum] = {&kernel, sizeof(kernel), masked}
154 static const struct wm_kernel_info {
155 const void *data;
156 unsigned int size;
157 bool has_mask;
158 } wm_kernels[] = {
159 NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
160 NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
161
162 NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
163 NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
164
165 NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
166 NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
167
168 NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
169 NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
170
171 NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
172 NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
173
174 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT601, ps_kernel_planar_bt601_static, false),
175 KERNEL(WM_KERNEL_VIDEO_NV12_BT601, ps_kernel_nv12_bt601_static, false),
176 KERNEL(WM_KERNEL_VIDEO_PACKED_BT601, ps_kernel_packed_bt601_static, false),
177
178 KERNEL(WM_KERNEL_VIDEO_PLANAR_BT709, ps_kernel_planar_bt709_static, false),
179 KERNEL(WM_KERNEL_VIDEO_NV12_BT709, ps_kernel_nv12_bt709_static, false),
180 KERNEL(WM_KERNEL_VIDEO_PACKED_BT709, ps_kernel_packed_bt709_static, false),
181 };
182 #undef KERNEL
183
184 static const struct blendinfo {
185 bool src_alpha;
186 uint32_t src_blend;
187 uint32_t dst_blend;
188 } gen4_blend_op[] = {
189 /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
190 /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
191 /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
192 /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
193 /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
194 /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
195 /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
196 /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
197 /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
198 /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
199 /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
200 /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
201 /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
202 };
203
204 /**
205 * Highest-valued BLENDFACTOR used in gen4_blend_op.
206 *
207 * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
208 * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
209 * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
210 */
211 #define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
212
213 #define BLEND_OFFSET(s, d) \
214 (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
215
216 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
217 ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
218
219 static void
220 gen4_emit_pipelined_pointers(struct sna *sna,
221 const struct sna_composite_op *op,
222 int blend, int kernel);
223
224 #define OUT_BATCH(v) batch_emit(sna, v)
225 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
226 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
227
228 #define GEN4_MAX_3D_SIZE 8192
229
too_large(int width,int height)230 static inline bool too_large(int width, int height)
231 {
232 return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
233 }
234
235 static int
gen4_choose_composite_kernel(int op,bool has_mask,bool is_ca,bool is_affine)236 gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
237 {
238 int base;
239
240 if (has_mask) {
241 if (is_ca) {
242 if (gen4_blend_op[op].src_alpha)
243 base = WM_KERNEL_MASKSA;
244 else
245 base = WM_KERNEL_MASKCA;
246 } else
247 base = WM_KERNEL_MASK;
248 } else
249 base = WM_KERNEL;
250
251 return base + !is_affine;
252 }
253
gen4_magic_ca_pass(struct sna * sna,const struct sna_composite_op * op)254 static bool gen4_magic_ca_pass(struct sna *sna,
255 const struct sna_composite_op *op)
256 {
257 struct gen4_render_state *state = &sna->render_state.gen4;
258
259 if (!op->need_magic_ca_pass)
260 return false;
261
262 assert(sna->render.vertex_index > sna->render.vertex_start);
263
264 DBG(("%s: CA fixup\n", __FUNCTION__));
265 assert(op->mask.bo != NULL);
266 assert(op->has_component_alpha);
267
268 gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
269 gen4_choose_composite_kernel(PictOpAdd,
270 true, true, op->is_affine));
271
272 OUT_BATCH(GEN4_3DPRIMITIVE |
273 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
274 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
275 (0 << 9) |
276 4);
277 OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
278 OUT_BATCH(sna->render.vertex_start);
279 OUT_BATCH(1); /* single instance */
280 OUT_BATCH(0); /* start instance location */
281 OUT_BATCH(0); /* index buffer offset, ignored */
282
283 state->last_primitive = sna->kgem.nbatch;
284 return true;
285 }
286
gen4_get_blend(int op,bool has_component_alpha,uint32_t dst_format)287 static uint32_t gen4_get_blend(int op,
288 bool has_component_alpha,
289 uint32_t dst_format)
290 {
291 uint32_t src, dst;
292
293 src = gen4_blend_op[op].src_blend;
294 dst = gen4_blend_op[op].dst_blend;
295
296 /* If there's no dst alpha channel, adjust the blend op so that we'll treat
297 * it as always 1.
298 */
299 if (PICT_FORMAT_A(dst_format) == 0) {
300 if (src == GEN4_BLENDFACTOR_DST_ALPHA)
301 src = GEN4_BLENDFACTOR_ONE;
302 else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
303 src = GEN4_BLENDFACTOR_ZERO;
304 }
305
306 /* If the source alpha is being used, then we should only be in a
307 * case where the source blend factor is 0, and the source blend
308 * value is the mask channels multiplied by the source picture's alpha.
309 */
310 if (has_component_alpha && gen4_blend_op[op].src_alpha) {
311 if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
312 dst = GEN4_BLENDFACTOR_SRC_COLOR;
313 else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
314 dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
315 }
316
317 DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
318 op, dst_format, PICT_FORMAT_A(dst_format),
319 src, dst, BLEND_OFFSET(src, dst)));
320 return BLEND_OFFSET(src, dst);
321 }
322
gen4_get_card_format(PictFormat format)323 static uint32_t gen4_get_card_format(PictFormat format)
324 {
325 switch (format) {
326 default:
327 return -1;
328 case PICT_a8r8g8b8:
329 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
330 case PICT_x8r8g8b8:
331 return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
332 case PICT_a8b8g8r8:
333 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
334 case PICT_x8b8g8r8:
335 return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
336 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
337 case PICT_a2r10g10b10:
338 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
339 case PICT_x2r10g10b10:
340 return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
341 #endif
342 case PICT_r8g8b8:
343 return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
344 case PICT_r5g6b5:
345 return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
346 case PICT_a1r5g5b5:
347 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
348 case PICT_a8:
349 return GEN4_SURFACEFORMAT_A8_UNORM;
350 case PICT_a4r4g4b4:
351 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
352 }
353 }
354
gen4_get_dest_format(PictFormat format)355 static uint32_t gen4_get_dest_format(PictFormat format)
356 {
357 switch (format) {
358 default:
359 return -1;
360 case PICT_a8r8g8b8:
361 case PICT_x8r8g8b8:
362 return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
363 case PICT_a8b8g8r8:
364 case PICT_x8b8g8r8:
365 return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
366 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
367 case PICT_a2r10g10b10:
368 case PICT_x2r10g10b10:
369 return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
370 #endif
371 case PICT_r5g6b5:
372 return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
373 case PICT_x1r5g5b5:
374 case PICT_a1r5g5b5:
375 return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
376 case PICT_a8:
377 return GEN4_SURFACEFORMAT_A8_UNORM;
378 case PICT_a4r4g4b4:
379 case PICT_x4r4g4b4:
380 return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
381 }
382 }
383
gen4_check_dst_format(PictFormat format)384 static bool gen4_check_dst_format(PictFormat format)
385 {
386 if (gen4_get_dest_format(format) != -1)
387 return true;
388
389 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
390 return false;
391 }
392
gen4_check_format(uint32_t format)393 static bool gen4_check_format(uint32_t format)
394 {
395 if (gen4_get_card_format(format) != -1)
396 return true;
397
398 DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
399 return false;
400 }
401
402 typedef struct gen4_surface_state_padded {
403 struct gen4_surface_state state;
404 char pad[32 - sizeof(struct gen4_surface_state)];
405 } gen4_surface_state_padded;
406
null_create(struct sna_static_stream * stream)407 static void null_create(struct sna_static_stream *stream)
408 {
409 /* A bunch of zeros useful for legacy border color and depth-stencil */
410 sna_static_stream_map(stream, 64, 64);
411 }
412
413 static void
sampler_state_init(struct gen4_sampler_state * sampler_state,sampler_filter_t filter,sampler_extend_t extend)414 sampler_state_init(struct gen4_sampler_state *sampler_state,
415 sampler_filter_t filter,
416 sampler_extend_t extend)
417 {
418 sampler_state->ss0.lod_preclamp = 1; /* GL mode */
419
420 /* We use the legacy mode to get the semantics specified by
421 * the Render extension. */
422 sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
423
424 switch (filter) {
425 default:
426 case SAMPLER_FILTER_NEAREST:
427 sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
428 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
429 break;
430 case SAMPLER_FILTER_BILINEAR:
431 sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
432 sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
433 break;
434 }
435
436 switch (extend) {
437 default:
438 case SAMPLER_EXTEND_NONE:
439 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
440 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
441 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
442 break;
443 case SAMPLER_EXTEND_REPEAT:
444 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
445 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
446 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
447 break;
448 case SAMPLER_EXTEND_PAD:
449 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
450 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
451 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
452 break;
453 case SAMPLER_EXTEND_REFLECT:
454 sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
455 sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
456 sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
457 break;
458 }
459 }
460
gen4_filter(uint32_t filter)461 static uint32_t gen4_filter(uint32_t filter)
462 {
463 switch (filter) {
464 default:
465 assert(0);
466 case PictFilterNearest:
467 return SAMPLER_FILTER_NEAREST;
468 case PictFilterBilinear:
469 return SAMPLER_FILTER_BILINEAR;
470 }
471 }
472
gen4_check_filter(PicturePtr picture)473 static uint32_t gen4_check_filter(PicturePtr picture)
474 {
475 switch (picture->filter) {
476 case PictFilterNearest:
477 case PictFilterBilinear:
478 return true;
479 default:
480 DBG(("%s: unknown filter: %s [%d]\n",
481 __FUNCTION__,
482 PictureGetFilterName(picture->filter),
483 picture->filter));
484 return false;
485 }
486 }
487
gen4_repeat(uint32_t repeat)488 static uint32_t gen4_repeat(uint32_t repeat)
489 {
490 switch (repeat) {
491 default:
492 assert(0);
493 case RepeatNone:
494 return SAMPLER_EXTEND_NONE;
495 case RepeatNormal:
496 return SAMPLER_EXTEND_REPEAT;
497 case RepeatPad:
498 return SAMPLER_EXTEND_PAD;
499 case RepeatReflect:
500 return SAMPLER_EXTEND_REFLECT;
501 }
502 }
503
gen4_check_repeat(PicturePtr picture)504 static bool gen4_check_repeat(PicturePtr picture)
505 {
506 if (!picture->repeat)
507 return true;
508
509 switch (picture->repeatType) {
510 case RepeatNone:
511 case RepeatNormal:
512 case RepeatPad:
513 case RepeatReflect:
514 return true;
515 default:
516 DBG(("%s: unknown repeat: %d\n",
517 __FUNCTION__, picture->repeatType));
518 return false;
519 }
520 }
521
522 static uint32_t
gen4_tiling_bits(uint32_t tiling)523 gen4_tiling_bits(uint32_t tiling)
524 {
525 switch (tiling) {
526 default: assert(0);
527 case I915_TILING_NONE: return 0;
528 case I915_TILING_X: return GEN4_SURFACE_TILED;
529 case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
530 }
531 }
532
533 /**
534 * Sets up the common fields for a surface state buffer for the given
535 * picture in the given surface state buffer.
536 */
537 static uint32_t
gen4_bind_bo(struct sna * sna,struct kgem_bo * bo,uint32_t width,uint32_t height,uint32_t format,bool is_dst)538 gen4_bind_bo(struct sna *sna,
539 struct kgem_bo *bo,
540 uint32_t width,
541 uint32_t height,
542 uint32_t format,
543 bool is_dst)
544 {
545 uint32_t domains;
546 uint16_t offset;
547 uint32_t *ss;
548
549 assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
550
551 /* After the first bind, we manage the cache domains within the batch */
552 offset = kgem_bo_get_binding(bo, format | is_dst << 31);
553 if (offset) {
554 assert(offset >= sna->kgem.surface);
555 if (is_dst)
556 kgem_bo_mark_dirty(bo);
557 return offset * sizeof(uint32_t);
558 }
559
560 offset = sna->kgem.surface -=
561 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
562 ss = sna->kgem.batch + offset;
563
564 ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
565 GEN4_SURFACE_BLEND_ENABLED |
566 format << GEN4_SURFACE_FORMAT_SHIFT);
567
568 if (is_dst) {
569 ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
570 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
571 } else
572 domains = I915_GEM_DOMAIN_SAMPLER << 16;
573 ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
574
575 ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
576 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
577 ss[3] = (gen4_tiling_bits(bo->tiling) |
578 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
579 ss[4] = 0;
580 ss[5] = 0;
581
582 kgem_bo_set_binding(bo, format | is_dst << 31, offset);
583
584 DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
585 offset, bo->handle, ss[1],
586 format, width, height, bo->pitch, bo->tiling,
587 domains & 0xffff ? "render" : "sampler"));
588
589 return offset * sizeof(uint32_t);
590 }
591
gen4_emit_vertex_buffer(struct sna * sna,const struct sna_composite_op * op)592 static void gen4_emit_vertex_buffer(struct sna *sna,
593 const struct sna_composite_op *op)
594 {
595 int id = op->u.gen4.ve_id;
596
597 assert((sna->render.vb_id & (1 << id)) == 0);
598
599 OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
600 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
601 (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
602 assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
603 sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
604 OUT_BATCH(0);
605 OUT_BATCH(0);
606 OUT_BATCH(0);
607
608 sna->render.vb_id |= 1 << id;
609 }
610
611 inline static void
gen4_emit_pipe_flush(struct sna * sna)612 gen4_emit_pipe_flush(struct sna *sna)
613 {
614 #if 1
615 OUT_BATCH(GEN4_PIPE_CONTROL |
616 GEN4_PIPE_CONTROL_WC_FLUSH |
617 (4 - 2));
618 OUT_BATCH(0);
619 OUT_BATCH(0);
620 OUT_BATCH(0);
621 #else
622 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
623 #endif
624 }
625
626 inline static void
gen4_emit_pipe_break(struct sna * sna)627 gen4_emit_pipe_break(struct sna *sna)
628 {
629 #if !ALWAYS_FLUSH
630 OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2));
631 OUT_BATCH(0);
632 OUT_BATCH(0);
633 OUT_BATCH(0);
634 #else
635 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
636 #endif
637 }
638
639 inline static void
gen4_emit_pipe_invalidate(struct sna * sna)640 gen4_emit_pipe_invalidate(struct sna *sna)
641 {
642 #if 0
643 OUT_BATCH(GEN4_PIPE_CONTROL |
644 GEN4_PIPE_CONTROL_WC_FLUSH |
645 (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) |
646 (4 - 2));
647 OUT_BATCH(0);
648 OUT_BATCH(0);
649 OUT_BATCH(0);
650 #else
651 OUT_BATCH(MI_FLUSH);
652 #endif
653 }
654
gen4_emit_primitive(struct sna * sna)655 static void gen4_emit_primitive(struct sna *sna)
656 {
657 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
658 sna->render.vertex_offset = sna->kgem.nbatch - 5;
659 return;
660 }
661
662 OUT_BATCH(GEN4_3DPRIMITIVE |
663 GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
664 (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
665 (0 << 9) |
666 4);
667 sna->render.vertex_offset = sna->kgem.nbatch;
668 OUT_BATCH(0); /* vertex count, to be filled in later */
669 OUT_BATCH(sna->render.vertex_index);
670 OUT_BATCH(1); /* single instance */
671 OUT_BATCH(0); /* start instance location */
672 OUT_BATCH(0); /* index buffer offset, ignored */
673 sna->render.vertex_start = sna->render.vertex_index;
674
675 sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
676 }
677
gen4_rectangle_begin(struct sna * sna,const struct sna_composite_op * op)678 static bool gen4_rectangle_begin(struct sna *sna,
679 const struct sna_composite_op *op)
680 {
681 unsigned int id = 1 << op->u.gen4.ve_id;
682 int ndwords;
683
684 if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
685 return true;
686
687 /* 7xpipelined pointers + 6xprimitive + 1xflush */
688 ndwords = op->need_magic_ca_pass? 19 : 6;
689 if ((sna->render.vb_id & id) == 0)
690 ndwords += 5;
691 ndwords += 8*FORCE_FLUSH;
692
693 if (!kgem_check_batch(&sna->kgem, ndwords))
694 return false;
695
696 if ((sna->render.vb_id & id) == 0)
697 gen4_emit_vertex_buffer(sna, op);
698 if (sna->render.vertex_offset == 0)
699 gen4_emit_primitive(sna);
700
701 return true;
702 }
703
gen4_get_rectangles__flush(struct sna * sna,const struct sna_composite_op * op)704 static int gen4_get_rectangles__flush(struct sna *sna,
705 const struct sna_composite_op *op)
706 {
707 /* Preventing discarding new vbo after lock contention */
708 if (sna_vertex_wait__locked(&sna->render)) {
709 int rem = vertex_space(sna);
710 if (rem > op->floats_per_rect)
711 return rem;
712 }
713
714 if (!kgem_check_batch(&sna->kgem,
715 8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6)))
716 return 0;
717 if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
718 return 0;
719
720 if (sna->render.vertex_offset) {
721 gen4_vertex_flush(sna);
722 if (gen4_magic_ca_pass(sna, op))
723 gen4_emit_pipelined_pointers(sna, op, op->op,
724 op->u.gen4.wm_kernel);
725 }
726
727 return gen4_vertex_finish(sna);
728 }
729
gen4_get_rectangles(struct sna * sna,const struct sna_composite_op * op,int want,void (* emit_state)(struct sna * sna,const struct sna_composite_op * op))730 inline static int gen4_get_rectangles(struct sna *sna,
731 const struct sna_composite_op *op,
732 int want,
733 void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
734 {
735 int rem;
736
737 assert(want);
738 #if FORCE_FLUSH
739 rem = sna->render.vertex_offset;
740 if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
741 rem = sna->kgem.nbatch - 5;
742 if (rem) {
743 rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
744 if (rem <= 0) {
745 if (sna->render.vertex_offset) {
746 gen4_vertex_flush(sna);
747 if (gen4_magic_ca_pass(sna, op)) {
748 if (kgem_check_batch(&sna->kgem, 19+6))
749 gen4_emit_pipelined_pointers(sna, op, op->op,
750 op->u.gen4.wm_kernel);
751 }
752 }
753 gen4_emit_pipe_break(sna);
754 rem = MAX_FLUSH_VERTICES;
755 }
756 } else
757 rem = MAX_FLUSH_VERTICES;
758 if (want > rem)
759 want = rem;
760 #endif
761
762 start:
763 rem = vertex_space(sna);
764 if (unlikely(rem < op->floats_per_rect)) {
765 DBG(("flushing vbo for %s: %d < %d\n",
766 __FUNCTION__, rem, op->floats_per_rect));
767 rem = gen4_get_rectangles__flush(sna, op);
768 if (unlikely(rem == 0))
769 goto flush;
770 }
771
772 if (unlikely(sna->render.vertex_offset == 0)) {
773 if (!gen4_rectangle_begin(sna, op))
774 goto flush;
775 else
776 goto start;
777 }
778
779 assert(rem <= vertex_space(sna));
780 assert(op->floats_per_rect <= rem);
781 if (want > 1 && want * op->floats_per_rect > rem)
782 want = rem / op->floats_per_rect;
783
784 sna->render.vertex_index += 3*want;
785 return want;
786
787 flush:
788 if (sna->render.vertex_offset) {
789 gen4_vertex_flush(sna);
790 gen4_magic_ca_pass(sna, op);
791 }
792 sna_vertex_wait__locked(&sna->render);
793 _kgem_submit(&sna->kgem);
794 emit_state(sna, op);
795 goto start;
796 }
797
798 static uint32_t *
gen4_composite_get_binding_table(struct sna * sna,uint16_t * offset)799 gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
800 {
801 sna->kgem.surface -=
802 sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
803
804 DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
805
806 /* Clear all surplus entries to zero in case of prefetch */
807 *offset = sna->kgem.surface;
808 return memset(sna->kgem.batch + sna->kgem.surface,
809 0, sizeof(struct gen4_surface_state_padded));
810 }
811
812 static void
gen4_emit_urb(struct sna * sna)813 gen4_emit_urb(struct sna *sna)
814 {
815 int urb_vs_end;
816 int urb_gs_end;
817 int urb_cl_end;
818 int urb_sf_end;
819 int urb_cs_end;
820
821 if (!sna->render_state.gen4.needs_urb)
822 return;
823
824 urb_vs_end = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
825 urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
826 urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE;
827 urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
828 urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
829 assert(urb_cs_end <= 256);
830
831 while ((sna->kgem.nbatch & 15) > 12)
832 OUT_BATCH(MI_NOOP);
833
834 OUT_BATCH(GEN4_URB_FENCE |
835 UF0_CS_REALLOC |
836 UF0_SF_REALLOC |
837 UF0_CLIP_REALLOC |
838 UF0_GS_REALLOC |
839 UF0_VS_REALLOC |
840 1);
841 OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
842 urb_gs_end << UF1_GS_FENCE_SHIFT |
843 urb_vs_end << UF1_VS_FENCE_SHIFT);
844 OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
845 urb_sf_end << UF2_SF_FENCE_SHIFT);
846
847 /* Constant buffer state */
848 OUT_BATCH(GEN4_CS_URB_STATE | 0);
849 OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
850
851 sna->render_state.gen4.needs_urb = false;
852 }
853
854 static void
gen4_emit_state_base_address(struct sna * sna)855 gen4_emit_state_base_address(struct sna *sna)
856 {
857 assert(sna->render_state.gen4.general_bo->proxy == NULL);
858 OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
859 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
860 sna->kgem.nbatch,
861 sna->render_state.gen4.general_bo,
862 I915_GEM_DOMAIN_INSTRUCTION << 16,
863 BASE_ADDRESS_MODIFY));
864 OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
865 sna->kgem.nbatch,
866 NULL,
867 I915_GEM_DOMAIN_INSTRUCTION << 16,
868 BASE_ADDRESS_MODIFY));
869 OUT_BATCH(0); /* media */
870
871 /* upper bounds, all disabled */
872 OUT_BATCH(BASE_ADDRESS_MODIFY);
873 OUT_BATCH(0);
874 }
875
876 static void
gen4_emit_invariant(struct sna * sna)877 gen4_emit_invariant(struct sna *sna)
878 {
879 assert(sna->kgem.surface == sna->kgem.batch_size);
880
881 if (sna->kgem.gen >= 045)
882 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
883 else
884 OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
885
886 OUT_BATCH(GEN4_CONSTANT_BUFFER);
887 OUT_BATCH(0);
888
889 gen4_emit_state_base_address(sna);
890
891 sna->render_state.gen4.needs_invariant = false;
892 }
893
894 static void
gen4_get_batch(struct sna * sna,const struct sna_composite_op * op)895 gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
896 {
897 kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
898
899 if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
900 DBG(("%s: flushing batch: %d < %d+%d\n",
901 __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
902 150, 4*8));
903 kgem_submit(&sna->kgem);
904 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
905 }
906
907 if (sna->render_state.gen4.needs_invariant)
908 gen4_emit_invariant(sna);
909 }
910
911 static void
gen4_align_vertex(struct sna * sna,const struct sna_composite_op * op)912 gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
913 {
914 assert(op->floats_per_rect == 3*op->floats_per_vertex);
915 if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
916 DBG(("aligning vertex: was %d, now %d floats per vertex\n",
917 sna->render_state.gen4.floats_per_vertex,
918 op->floats_per_vertex));
919 gen4_vertex_align(sna, op);
920 sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
921 }
922 }
923
924 static void
gen4_emit_binding_table(struct sna * sna,uint16_t offset)925 gen4_emit_binding_table(struct sna *sna, uint16_t offset)
926 {
927 if (sna->render_state.gen4.surface_table == offset)
928 return;
929
930 sna->render_state.gen4.surface_table = offset;
931
932 /* Binding table pointers */
933 OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
934 OUT_BATCH(0); /* vs */
935 OUT_BATCH(0); /* gs */
936 OUT_BATCH(0); /* clip */
937 OUT_BATCH(0); /* sf */
938 /* Only the PS uses the binding table */
939 OUT_BATCH(offset*4);
940 }
941
942 static void
gen4_emit_pipelined_pointers(struct sna * sna,const struct sna_composite_op * op,int blend,int kernel)943 gen4_emit_pipelined_pointers(struct sna *sna,
944 const struct sna_composite_op *op,
945 int blend, int kernel)
946 {
947 uint16_t sp, bp;
948 uint32_t key;
949
950 DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
951 __FUNCTION__, op->u.gen4.ve_id & 2,
952 op->src.filter, op->src.repeat,
953 op->mask.filter, op->mask.repeat,
954 kernel, blend, op->has_component_alpha, (int)op->dst.format));
955
956 sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
957 op->mask.filter, op->mask.repeat,
958 kernel);
959 bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
960
961 DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
962 key = sp | (uint32_t)bp << 16;
963 if (key == sna->render_state.gen4.last_pipelined_pointers)
964 return;
965
966 OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
967 OUT_BATCH(sna->render_state.gen4.vs);
968 OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
969 OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
970 OUT_BATCH(sna->render_state.gen4.sf);
971 OUT_BATCH(sna->render_state.gen4.wm + sp);
972 OUT_BATCH(sna->render_state.gen4.cc + bp);
973
974 sna->render_state.gen4.last_pipelined_pointers = key;
975 gen4_emit_urb(sna);
976 }
977
978 static bool
gen4_emit_drawing_rectangle(struct sna * sna,const struct sna_composite_op * op)979 gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
980 {
981 uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
982 uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
983
984 assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
985 assert(!too_large(op->dst.width, op->dst.height));
986
987 if (sna->render_state.gen4.drawrect_limit == limit &&
988 sna->render_state.gen4.drawrect_offset == offset)
989 return true;
990
991 sna->render_state.gen4.drawrect_offset = offset;
992 sna->render_state.gen4.drawrect_limit = limit;
993
994 OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
995 OUT_BATCH(0);
996 OUT_BATCH(limit);
997 OUT_BATCH(offset);
998 return false;
999 }
1000
1001 static void
gen4_emit_vertex_elements(struct sna * sna,const struct sna_composite_op * op)1002 gen4_emit_vertex_elements(struct sna *sna,
1003 const struct sna_composite_op *op)
1004 {
1005 /*
1006 * vertex data in vertex buffer
1007 * position: (x, y)
1008 * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
1009 * texture coordinate 1 if (has_mask is true): same as above
1010 */
1011 struct gen4_render_state *render = &sna->render_state.gen4;
1012 uint32_t src_format, dw;
1013 int id = op->u.gen4.ve_id;
1014
1015 if (render->ve_id == id)
1016 return;
1017 render->ve_id = id;
1018
1019 /* The VUE layout
1020 * dword 0-3: position (x, y, 1.0, 1.0),
1021 * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
1022 * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
1023 */
1024 OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
1025
1026 /* x,y */
1027 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1028 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
1029 0 << VE0_OFFSET_SHIFT);
1030 OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
1031 VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
1032 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
1033 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
1034 (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1035
1036 /* u0, v0, w0 */
1037 /* u0, v0, w0 */
1038 DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
1039 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1040 switch (id & 3) {
1041 default:
1042 assert(0);
1043 case 0:
1044 src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
1045 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1046 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1047 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1048 break;
1049 case 1:
1050 src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1051 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1052 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1053 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1054 break;
1055 case 2:
1056 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1057 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1058 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1059 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1060 break;
1061 case 3:
1062 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1063 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1064 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1065 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1066 break;
1067 }
1068 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1069 src_format << VE0_FORMAT_SHIFT |
1070 4 << VE0_OFFSET_SHIFT);
1071 OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1072
1073 /* u1, v1, w1 */
1074 if (id >> 2) {
1075 unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1076 DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
1077 id >> 2, src_offset));
1078 dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1079 switch (id >> 2) {
1080 case 1:
1081 src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1082 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1083 dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1084 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1085 break;
1086 default:
1087 assert(0);
1088 case 2:
1089 src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1090 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1091 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1092 dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1093 break;
1094 case 3:
1095 src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1096 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1097 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1098 dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1099 break;
1100 }
1101 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1102 src_format << VE0_FORMAT_SHIFT |
1103 src_offset << VE0_OFFSET_SHIFT);
1104 OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1105 } else {
1106 OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1107 GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
1108 0 << VE0_OFFSET_SHIFT);
1109 OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
1110 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
1111 VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
1112 VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
1113 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1114 }
1115 }
1116
1117 static void
gen4_emit_state(struct sna * sna,const struct sna_composite_op * op,uint16_t wm_binding_table)1118 gen4_emit_state(struct sna *sna,
1119 const struct sna_composite_op *op,
1120 uint16_t wm_binding_table)
1121 {
1122 bool flush;
1123
1124 assert(op->dst.bo->exec);
1125
1126 flush = wm_binding_table & 1;
1127 wm_binding_table &= ~1;
1128
1129 if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1130 DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
1131 kgem_bo_is_dirty(op->src.bo),
1132 kgem_bo_is_dirty(op->mask.bo),
1133 flush));
1134 gen4_emit_pipe_invalidate(sna);
1135 kgem_clear_dirty(&sna->kgem);
1136 kgem_bo_mark_dirty(op->dst.bo);
1137 flush = false;
1138 }
1139 flush &= gen4_emit_drawing_rectangle(sna, op);
1140 if (flush && op->op > PictOpSrc)
1141 gen4_emit_pipe_flush(sna);
1142
1143 gen4_emit_binding_table(sna, wm_binding_table);
1144 gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
1145 gen4_emit_vertex_elements(sna, op);
1146 }
1147
1148 static void
gen4_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1149 gen4_bind_surfaces(struct sna *sna,
1150 const struct sna_composite_op *op)
1151 {
1152 uint32_t *binding_table;
1153 uint16_t offset, dirty;
1154
1155 gen4_get_batch(sna, op);
1156 dirty = kgem_bo_is_dirty(op->dst.bo);
1157
1158 binding_table = gen4_composite_get_binding_table(sna, &offset);
1159
1160 binding_table[0] =
1161 gen4_bind_bo(sna,
1162 op->dst.bo, op->dst.width, op->dst.height,
1163 gen4_get_dest_format(op->dst.format),
1164 true);
1165 binding_table[1] =
1166 gen4_bind_bo(sna,
1167 op->src.bo, op->src.width, op->src.height,
1168 op->src.card_format,
1169 false);
1170 if (op->mask.bo) {
1171 assert(op->u.gen4.ve_id >> 2);
1172 binding_table[2] =
1173 gen4_bind_bo(sna,
1174 op->mask.bo,
1175 op->mask.width,
1176 op->mask.height,
1177 op->mask.card_format,
1178 false);
1179 }
1180
1181 if (sna->kgem.surface == offset &&
1182 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
1183 (op->mask.bo == NULL ||
1184 sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
1185 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1186 offset = sna->render_state.gen4.surface_table;
1187 }
1188
1189 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1190 dirty = 0;
1191
1192 gen4_emit_state(sna, op, offset | dirty);
1193 }
1194
1195 fastcall static void
gen4_render_composite_blt(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1196 gen4_render_composite_blt(struct sna *sna,
1197 const struct sna_composite_op *op,
1198 const struct sna_composite_rectangles *r)
1199 {
1200 DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1201 __FUNCTION__,
1202 r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1203 r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1204 r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1205 r->width, r->height));
1206
1207 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1208 op->prim_emit(sna, op, r);
1209 }
1210
1211 fastcall static void
gen4_render_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1212 gen4_render_composite_box(struct sna *sna,
1213 const struct sna_composite_op *op,
1214 const BoxRec *box)
1215 {
1216 struct sna_composite_rectangles r;
1217
1218 DBG((" %s: (%d, %d), (%d, %d)\n",
1219 __FUNCTION__,
1220 box->x1, box->y1, box->x2, box->y2));
1221
1222 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1223
1224 r.dst.x = box->x1;
1225 r.dst.y = box->y1;
1226 r.width = box->x2 - box->x1;
1227 r.height = box->y2 - box->y1;
1228 r.mask = r.src = r.dst;
1229
1230 op->prim_emit(sna, op, &r);
1231 }
1232
1233 static void
gen4_render_composite_boxes__blt(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1234 gen4_render_composite_boxes__blt(struct sna *sna,
1235 const struct sna_composite_op *op,
1236 const BoxRec *box, int nbox)
1237 {
1238 DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1239 __FUNCTION__, nbox, op->dst.x, op->dst.y,
1240 op->src.offset[0], op->src.offset[1],
1241 op->src.width, op->src.height,
1242 op->mask.offset[0], op->mask.offset[1],
1243 op->mask.width, op->mask.height));
1244
1245 do {
1246 int nbox_this_time;
1247
1248 nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1249 gen4_bind_surfaces);
1250 nbox -= nbox_this_time;
1251
1252 do {
1253 struct sna_composite_rectangles r;
1254
1255 DBG((" %s: (%d, %d), (%d, %d)\n",
1256 __FUNCTION__,
1257 box->x1, box->y1, box->x2, box->y2));
1258
1259 r.dst.x = box->x1;
1260 r.dst.y = box->y1;
1261 r.width = box->x2 - box->x1;
1262 r.height = box->y2 - box->y1;
1263 r.mask = r.src = r.dst;
1264 op->prim_emit(sna, op, &r);
1265 box++;
1266 } while (--nbox_this_time);
1267 } while (nbox);
1268 }
1269
1270 static void
gen4_render_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1271 gen4_render_composite_boxes(struct sna *sna,
1272 const struct sna_composite_op *op,
1273 const BoxRec *box, int nbox)
1274 {
1275 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1276
1277 do {
1278 int nbox_this_time;
1279 float *v;
1280
1281 nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1282 gen4_bind_surfaces);
1283 assert(nbox_this_time);
1284 nbox -= nbox_this_time;
1285
1286 v = sna->render.vertices + sna->render.vertex_used;
1287 sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1288
1289 op->emit_boxes(op, box, nbox_this_time, v);
1290 box += nbox_this_time;
1291 } while (nbox);
1292 }
1293
1294 #if !FORCE_FLUSH
1295 static void
gen4_render_composite_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1296 gen4_render_composite_boxes__thread(struct sna *sna,
1297 const struct sna_composite_op *op,
1298 const BoxRec *box, int nbox)
1299 {
1300 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1301
1302 sna_vertex_lock(&sna->render);
1303 do {
1304 int nbox_this_time;
1305 float *v;
1306
1307 nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1308 gen4_bind_surfaces);
1309 assert(nbox_this_time);
1310 nbox -= nbox_this_time;
1311
1312 v = sna->render.vertices + sna->render.vertex_used;
1313 sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1314
1315 sna_vertex_acquire__locked(&sna->render);
1316 sna_vertex_unlock(&sna->render);
1317
1318 op->emit_boxes(op, box, nbox_this_time, v);
1319 box += nbox_this_time;
1320
1321 sna_vertex_lock(&sna->render);
1322 sna_vertex_release__locked(&sna->render);
1323 } while (nbox);
1324 sna_vertex_unlock(&sna->render);
1325 }
1326 #endif
1327
1328 #ifndef MAX
1329 #define MAX(a,b) ((a) > (b) ? (a) : (b))
1330 #endif
1331
gen4_bind_video_source(struct sna * sna,struct kgem_bo * src_bo,uint32_t src_offset,int src_width,int src_height,int src_pitch,uint32_t src_surf_format)1332 static uint32_t gen4_bind_video_source(struct sna *sna,
1333 struct kgem_bo *src_bo,
1334 uint32_t src_offset,
1335 int src_width,
1336 int src_height,
1337 int src_pitch,
1338 uint32_t src_surf_format)
1339 {
1340 struct gen4_surface_state *ss;
1341
1342 sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1343
1344 ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1345 ss->ss0.surface_type = GEN4_SURFACE_2D;
1346 ss->ss0.surface_format = src_surf_format;
1347 ss->ss0.color_blend = 1;
1348
1349 ss->ss1.base_addr =
1350 kgem_add_reloc(&sna->kgem,
1351 sna->kgem.surface + 1,
1352 src_bo,
1353 I915_GEM_DOMAIN_SAMPLER << 16,
1354 src_offset);
1355
1356 ss->ss2.width = src_width - 1;
1357 ss->ss2.height = src_height - 1;
1358 ss->ss3.pitch = src_pitch - 1;
1359
1360 return sna->kgem.surface * sizeof(uint32_t);
1361 }
1362
gen4_video_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1363 static void gen4_video_bind_surfaces(struct sna *sna,
1364 const struct sna_composite_op *op)
1365 {
1366 struct sna_video_frame *frame = op->priv;
1367 uint32_t src_surf_format[6];
1368 uint32_t src_surf_base[6];
1369 int src_width[6];
1370 int src_height[6];
1371 int src_pitch[6];
1372 uint32_t *binding_table;
1373 uint16_t offset, dirty;
1374 int n_src, n;
1375
1376 src_surf_base[0] = 0;
1377 src_surf_base[1] = 0;
1378 src_surf_base[2] = frame->VBufOffset;
1379 src_surf_base[3] = frame->VBufOffset;
1380 src_surf_base[4] = frame->UBufOffset;
1381 src_surf_base[5] = frame->UBufOffset;
1382
1383 if (is_planar_fourcc(frame->id)) {
1384 for (n = 0; n < 2; n++) {
1385 src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM;
1386 src_width[n] = frame->width;
1387 src_height[n] = frame->height;
1388 src_pitch[n] = frame->pitch[1];
1389 }
1390 for (; n < 6; n++) {
1391 if (is_nv12_fourcc(frame->id))
1392 src_surf_format[n] = GEN4_SURFACEFORMAT_R8G8_UNORM;
1393 else
1394 src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM;
1395 src_width[n] = frame->width / 2;
1396 src_height[n] = frame->height / 2;
1397 src_pitch[n] = frame->pitch[0];
1398 }
1399 n_src = 6;
1400 } else {
1401 if (frame->id == FOURCC_UYVY)
1402 src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
1403 else
1404 src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
1405
1406 src_width[0] = frame->width;
1407 src_height[0] = frame->height;
1408 src_pitch[0] = frame->pitch[0];
1409 n_src = 1;
1410 }
1411
1412 gen4_get_batch(sna, op);
1413 dirty = kgem_bo_is_dirty(op->dst.bo);
1414
1415 binding_table = gen4_composite_get_binding_table(sna, &offset);
1416 binding_table[0] =
1417 gen4_bind_bo(sna,
1418 op->dst.bo, op->dst.width, op->dst.height,
1419 gen4_get_dest_format(op->dst.format),
1420 true);
1421 for (n = 0; n < n_src; n++) {
1422 binding_table[1+n] =
1423 gen4_bind_video_source(sna,
1424 frame->bo,
1425 src_surf_base[n],
1426 src_width[n],
1427 src_height[n],
1428 src_pitch[n],
1429 src_surf_format[n]);
1430 }
1431
1432 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1433 dirty = 0;
1434
1435 gen4_emit_state(sna, op, offset | dirty);
1436 }
1437
select_video_kernel(const struct sna_video * video,const struct sna_video_frame * frame)1438 static unsigned select_video_kernel(const struct sna_video *video,
1439 const struct sna_video_frame *frame)
1440 {
1441 switch (frame->id) {
1442 case FOURCC_YV12:
1443 case FOURCC_I420:
1444 case FOURCC_XVMC:
1445 return video->colorspace ?
1446 WM_KERNEL_VIDEO_PLANAR_BT709 :
1447 WM_KERNEL_VIDEO_PLANAR_BT601;
1448
1449 case FOURCC_NV12:
1450 return video->colorspace ?
1451 WM_KERNEL_VIDEO_NV12_BT709 :
1452 WM_KERNEL_VIDEO_NV12_BT601;
1453
1454 default:
1455 return video->colorspace ?
1456 WM_KERNEL_VIDEO_PACKED_BT709 :
1457 WM_KERNEL_VIDEO_PACKED_BT601;
1458 }
1459 }
1460
1461 static bool
gen4_render_video(struct sna * sna,struct sna_video * video,struct sna_video_frame * frame,RegionPtr dstRegion,PixmapPtr pixmap)1462 gen4_render_video(struct sna *sna,
1463 struct sna_video *video,
1464 struct sna_video_frame *frame,
1465 RegionPtr dstRegion,
1466 PixmapPtr pixmap)
1467 {
1468 struct sna_composite_op tmp;
1469 struct sna_pixmap *priv = sna_pixmap(pixmap);
1470 int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1471 int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1472 int src_width = frame->src.x2 - frame->src.x1;
1473 int src_height = frame->src.y2 - frame->src.y1;
1474 float src_offset_x, src_offset_y;
1475 float src_scale_x, src_scale_y;
1476 const BoxRec *box;
1477 int nbox;
1478
1479 DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1480 src_width, src_height, dst_width, dst_height));
1481
1482 assert(priv->gpu_bo);
1483 memset(&tmp, 0, sizeof(tmp));
1484
1485 tmp.op = PictOpSrc;
1486 tmp.dst.pixmap = pixmap;
1487 tmp.dst.width = pixmap->drawable.width;
1488 tmp.dst.height = pixmap->drawable.height;
1489 tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1490 tmp.dst.bo = priv->gpu_bo;
1491
1492 if (src_width == dst_width && src_height == dst_height)
1493 tmp.src.filter = SAMPLER_FILTER_NEAREST;
1494 else
1495 tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1496 tmp.src.repeat = SAMPLER_EXTEND_PAD;
1497 tmp.src.bo = frame->bo;
1498 tmp.mask.bo = NULL;
1499 tmp.u.gen4.wm_kernel = select_video_kernel(video, frame);
1500 tmp.u.gen4.ve_id = 2;
1501 tmp.is_affine = true;
1502 tmp.floats_per_vertex = 3;
1503 tmp.floats_per_rect = 9;
1504 tmp.priv = frame;
1505
1506 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1507 kgem_submit(&sna->kgem);
1508 if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1509 return false;
1510 }
1511
1512 gen4_align_vertex(sna, &tmp);
1513 gen4_video_bind_surfaces(sna, &tmp);
1514
1515 src_scale_x = (float)src_width / dst_width / frame->width;
1516 src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1517
1518 src_scale_y = (float)src_height / dst_height / frame->height;
1519 src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1520
1521 box = region_rects(dstRegion);
1522 nbox = region_num_rects(dstRegion);
1523 do {
1524 int n;
1525
1526 n = gen4_get_rectangles(sna, &tmp, nbox,
1527 gen4_video_bind_surfaces);
1528 assert(n);
1529 nbox -= n;
1530
1531 do {
1532 OUT_VERTEX(box->x2, box->y2);
1533 OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1534 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1535
1536 OUT_VERTEX(box->x1, box->y2);
1537 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1538 OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1539
1540 OUT_VERTEX(box->x1, box->y1);
1541 OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1542 OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1543
1544 box++;
1545 } while (--n);
1546 } while (nbox);
1547 gen4_vertex_flush(sna);
1548
1549 if (!DAMAGE_IS_ALL(priv->gpu_damage))
1550 sna_damage_add(&priv->gpu_damage, dstRegion);
1551
1552 return true;
1553 }
1554
1555 static int
gen4_composite_picture(struct sna * sna,PicturePtr picture,struct sna_composite_channel * channel,int x,int y,int w,int h,int dst_x,int dst_y,bool precise)1556 gen4_composite_picture(struct sna *sna,
1557 PicturePtr picture,
1558 struct sna_composite_channel *channel,
1559 int x, int y,
1560 int w, int h,
1561 int dst_x, int dst_y,
1562 bool precise)
1563 {
1564 PixmapPtr pixmap;
1565 uint32_t color;
1566 int16_t dx, dy;
1567
1568 DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1569 __FUNCTION__, x, y, w, h, dst_x, dst_y));
1570
1571 channel->is_solid = false;
1572 channel->card_format = -1;
1573
1574 if (sna_picture_is_solid(picture, &color))
1575 return gen4_channel_init_solid(sna, channel, color);
1576
1577 if (picture->pDrawable == NULL) {
1578 int ret;
1579
1580 if (picture->pSourcePict->type == SourcePictTypeLinear)
1581 return gen4_channel_init_linear(sna, picture, channel,
1582 x, y,
1583 w, h,
1584 dst_x, dst_y);
1585
1586 DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1587 ret = -1;
1588 if (!precise)
1589 ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1590 x, y, w, h, dst_x, dst_y);
1591 if (ret == -1)
1592 ret = sna_render_picture_fixup(sna, picture, channel,
1593 x, y, w, h, dst_x, dst_y);
1594 return ret;
1595 }
1596
1597 if (picture->alphaMap) {
1598 DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1599 return sna_render_picture_fixup(sna, picture, channel,
1600 x, y, w, h, dst_x, dst_y);
1601 }
1602
1603 if (!gen4_check_repeat(picture)) {
1604 DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
1605 return sna_render_picture_fixup(sna, picture, channel,
1606 x, y, w, h, dst_x, dst_y);
1607 }
1608
1609 if (!gen4_check_filter(picture)) {
1610 DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
1611 return sna_render_picture_fixup(sna, picture, channel,
1612 x, y, w, h, dst_x, dst_y);
1613 }
1614
1615 channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1616 channel->filter = picture->filter;
1617
1618 pixmap = get_drawable_pixmap(picture->pDrawable);
1619 get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1620
1621 x += dx + picture->pDrawable->x;
1622 y += dy + picture->pDrawable->y;
1623
1624 channel->is_affine = sna_transform_is_affine(picture->transform);
1625 if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1626 DBG(("%s: integer translation (%d, %d), removing\n",
1627 __FUNCTION__, dx, dy));
1628 x += dx;
1629 y += dy;
1630 channel->transform = NULL;
1631 channel->filter = PictFilterNearest;
1632
1633 if (channel->repeat &&
1634 (x >= 0 &&
1635 y >= 0 &&
1636 x + w <= pixmap->drawable.width &&
1637 y + h <= pixmap->drawable.height)) {
1638 struct sna_pixmap *priv = sna_pixmap(pixmap);
1639 if (priv && priv->clear) {
1640 DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1641 return gen4_channel_init_solid(sna, channel,
1642 solid_color(picture->format,
1643 priv->clear_color));
1644 }
1645 }
1646 } else
1647 channel->transform = picture->transform;
1648
1649 channel->pict_format = picture->format;
1650 channel->card_format = gen4_get_card_format(picture->format);
1651 if (channel->card_format == -1)
1652 return sna_render_picture_convert(sna, picture, channel, pixmap,
1653 x, y, w, h, dst_x, dst_y,
1654 false);
1655
1656 if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1657 return sna_render_picture_extract(sna, picture, channel,
1658 x, y, w, h, dst_x, dst_y);
1659
1660 return sna_render_pixmap_bo(sna, channel, pixmap,
1661 x, y, w, h, dst_x, dst_y);
1662 }
1663
gen4_composite_channel_convert(struct sna_composite_channel * channel)1664 static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
1665 {
1666 DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
1667 __FUNCTION__,
1668 channel->repeat, gen4_repeat(channel->repeat),
1669 channel->filter, gen4_repeat(channel->filter)));
1670 channel->repeat = gen4_repeat(channel->repeat);
1671 channel->filter = gen4_filter(channel->filter);
1672 if (channel->card_format == (unsigned)-1)
1673 channel->card_format = gen4_get_card_format(channel->pict_format);
1674 }
1675
1676 static void
gen4_render_composite_done(struct sna * sna,const struct sna_composite_op * op)1677 gen4_render_composite_done(struct sna *sna,
1678 const struct sna_composite_op *op)
1679 {
1680 DBG(("%s()\n", __FUNCTION__));
1681
1682 if (sna->render.vertex_offset) {
1683 gen4_vertex_flush(sna);
1684 gen4_magic_ca_pass(sna, op);
1685 }
1686
1687 if (op->mask.bo)
1688 kgem_bo_destroy(&sna->kgem, op->mask.bo);
1689 if (op->src.bo)
1690 kgem_bo_destroy(&sna->kgem, op->src.bo);
1691
1692 sna_render_composite_redirect_done(sna, op);
1693 }
1694
1695 static bool
gen4_composite_set_target(struct sna * sna,struct sna_composite_op * op,PicturePtr dst,int x,int y,int w,int h,bool partial)1696 gen4_composite_set_target(struct sna *sna,
1697 struct sna_composite_op *op,
1698 PicturePtr dst,
1699 int x, int y, int w, int h,
1700 bool partial)
1701 {
1702 BoxRec box;
1703 unsigned hint;
1704
1705 op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1706 op->dst.width = op->dst.pixmap->drawable.width;
1707 op->dst.height = op->dst.pixmap->drawable.height;
1708 op->dst.format = dst->format;
1709 if (w && h) {
1710 box.x1 = x;
1711 box.y1 = y;
1712 box.x2 = x + w;
1713 box.y2 = y + h;
1714 } else
1715 sna_render_picture_extents(dst, &box);
1716
1717 hint = PREFER_GPU | RENDER_GPU;
1718 if (!need_tiling(sna, op->dst.width, op->dst.height))
1719 hint |= FORCE_GPU;
1720 if (!partial) {
1721 hint |= IGNORE_DAMAGE;
1722 if (w == op->dst.width && h == op->dst.height)
1723 hint |= REPLACES;
1724 }
1725
1726 op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1727 if (op->dst.bo == NULL)
1728 return false;
1729
1730 if (hint & REPLACES) {
1731 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1732 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1733 }
1734
1735 get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1736 &op->dst.x, &op->dst.y);
1737
1738 DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1739 __FUNCTION__,
1740 op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1741 op->dst.width, op->dst.height,
1742 op->dst.bo->pitch,
1743 op->dst.x, op->dst.y,
1744 op->damage ? *op->damage : (void *)-1));
1745
1746 assert(op->dst.bo->proxy == NULL);
1747
1748 if (too_large(op->dst.width, op->dst.height) &&
1749 !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1750 return false;
1751
1752 return true;
1753 }
1754
1755 static bool
check_gradient(PicturePtr picture,bool precise)1756 check_gradient(PicturePtr picture, bool precise)
1757 {
1758 switch (picture->pSourcePict->type) {
1759 case SourcePictTypeSolidFill:
1760 case SourcePictTypeLinear:
1761 return false;
1762 default:
1763 return precise;
1764 }
1765 }
1766
1767 static bool
has_alphamap(PicturePtr p)1768 has_alphamap(PicturePtr p)
1769 {
1770 return p->alphaMap != NULL;
1771 }
1772
1773 static bool
need_upload(struct sna * sna,PicturePtr p)1774 need_upload(struct sna *sna, PicturePtr p)
1775 {
1776 return p->pDrawable && untransformed(p) &&
1777 !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1778 }
1779
1780 static bool
source_is_busy(PixmapPtr pixmap)1781 source_is_busy(PixmapPtr pixmap)
1782 {
1783 struct sna_pixmap *priv = sna_pixmap(pixmap);
1784 if (priv == NULL)
1785 return false;
1786
1787 if (priv->clear)
1788 return false;
1789
1790 if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1791 return true;
1792
1793 if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1794 return true;
1795
1796 return priv->gpu_damage && !priv->cpu_damage;
1797 }
1798
1799 static bool
source_fallback(struct sna * sna,PicturePtr p,PixmapPtr pixmap,bool precise)1800 source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1801 {
1802 if (sna_picture_is_solid(p, NULL))
1803 return false;
1804
1805 if (p->pSourcePict)
1806 return check_gradient(p, precise);
1807
1808 if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
1809 return true;
1810
1811 /* soft errors: perfer to upload/compute rather than readback */
1812 if (pixmap && source_is_busy(pixmap))
1813 return false;
1814
1815 return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
1816 }
1817
1818 static bool
gen4_composite_fallback(struct sna * sna,PicturePtr src,PicturePtr mask,PicturePtr dst)1819 gen4_composite_fallback(struct sna *sna,
1820 PicturePtr src,
1821 PicturePtr mask,
1822 PicturePtr dst)
1823 {
1824 PixmapPtr src_pixmap;
1825 PixmapPtr mask_pixmap;
1826 PixmapPtr dst_pixmap;
1827 bool src_fallback, mask_fallback;
1828
1829 if (!gen4_check_dst_format(dst->format)) {
1830 DBG(("%s: unknown destination format: %d\n",
1831 __FUNCTION__, dst->format));
1832 return true;
1833 }
1834
1835 dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1836
1837 src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1838 src_fallback = source_fallback(sna, src, src_pixmap,
1839 dst->polyMode == PolyModePrecise);
1840
1841 if (mask) {
1842 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1843 mask_fallback = source_fallback(sna, mask, mask_pixmap,
1844 dst->polyMode == PolyModePrecise);
1845 } else {
1846 mask_pixmap = NULL;
1847 mask_fallback = false;
1848 }
1849
1850 /* If we are using the destination as a source and need to
1851 * readback in order to upload the source, do it all
1852 * on the cpu.
1853 */
1854 if (src_pixmap == dst_pixmap && src_fallback) {
1855 DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1856 return true;
1857 }
1858 if (mask_pixmap == dst_pixmap && mask_fallback) {
1859 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1860 return true;
1861 }
1862
1863 /* If anything is on the GPU, push everything out to the GPU */
1864 if (dst_use_gpu(dst_pixmap)) {
1865 DBG(("%s: dst is already on the GPU, try to use GPU\n",
1866 __FUNCTION__));
1867 return false;
1868 }
1869
1870 if (src_pixmap && !src_fallback) {
1871 DBG(("%s: src is already on the GPU, try to use GPU\n",
1872 __FUNCTION__));
1873 return false;
1874 }
1875 if (mask_pixmap && !mask_fallback) {
1876 DBG(("%s: mask is already on the GPU, try to use GPU\n",
1877 __FUNCTION__));
1878 return false;
1879 }
1880
1881 /* However if the dst is not on the GPU and we need to
1882 * render one of the sources using the CPU, we may
1883 * as well do the entire operation in place onthe CPU.
1884 */
1885 if (src_fallback) {
1886 DBG(("%s: dst is on the CPU and src will fallback\n",
1887 __FUNCTION__));
1888 return true;
1889 }
1890
1891 if (mask_fallback) {
1892 DBG(("%s: dst is on the CPU and mask will fallback\n",
1893 __FUNCTION__));
1894 return true;
1895 }
1896
1897 if (too_large(dst_pixmap->drawable.width,
1898 dst_pixmap->drawable.height) &&
1899 dst_is_cpu(dst_pixmap)) {
1900 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1901 return true;
1902 }
1903
1904 DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1905 __FUNCTION__));
1906 return dst_use_cpu(dst_pixmap);
1907 }
1908
1909 static int
reuse_source(struct sna * sna,PicturePtr src,struct sna_composite_channel * sc,int src_x,int src_y,PicturePtr mask,struct sna_composite_channel * mc,int msk_x,int msk_y)1910 reuse_source(struct sna *sna,
1911 PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1912 PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1913 {
1914 uint32_t color;
1915
1916 if (src_x != msk_x || src_y != msk_y)
1917 return false;
1918
1919 if (src == mask) {
1920 DBG(("%s: mask is source\n", __FUNCTION__));
1921 *mc = *sc;
1922 mc->bo = kgem_bo_reference(mc->bo);
1923 return true;
1924 }
1925
1926 if (sna_picture_is_solid(mask, &color))
1927 return gen4_channel_init_solid(sna, mc, color);
1928
1929 if (sc->is_solid)
1930 return false;
1931
1932 if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1933 return false;
1934
1935 DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1936
1937 if (!sna_transform_equal(src->transform, mask->transform))
1938 return false;
1939
1940 if (!sna_picture_alphamap_equal(src, mask))
1941 return false;
1942
1943 if (!gen4_check_repeat(mask))
1944 return false;
1945
1946 if (!gen4_check_filter(mask))
1947 return false;
1948
1949 if (!gen4_check_format(mask->format))
1950 return false;
1951
1952 DBG(("%s: reusing source channel for mask with a twist\n",
1953 __FUNCTION__));
1954
1955 *mc = *sc;
1956 mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1957 mc->filter = gen4_filter(mask->filter);
1958 mc->pict_format = mask->format;
1959 mc->card_format = gen4_get_card_format(mask->format);
1960 mc->bo = kgem_bo_reference(mc->bo);
1961 return true;
1962 }
1963
1964 static bool
gen4_render_composite(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr mask,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t msk_x,int16_t msk_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)1965 gen4_render_composite(struct sna *sna,
1966 uint8_t op,
1967 PicturePtr src,
1968 PicturePtr mask,
1969 PicturePtr dst,
1970 int16_t src_x, int16_t src_y,
1971 int16_t msk_x, int16_t msk_y,
1972 int16_t dst_x, int16_t dst_y,
1973 int16_t width, int16_t height,
1974 unsigned flags,
1975 struct sna_composite_op *tmp)
1976 {
1977 DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1978 width, height, sna->kgem.mode));
1979
1980 if (op >= ARRAY_SIZE(gen4_blend_op))
1981 return false;
1982
1983 if (mask == NULL &&
1984 sna_blt_composite(sna, op,
1985 src, dst,
1986 src_x, src_y,
1987 dst_x, dst_y,
1988 width, height,
1989 flags, tmp))
1990 return true;
1991
1992 if (gen4_composite_fallback(sna, src, mask, dst))
1993 goto fallback;
1994
1995 if (need_tiling(sna, width, height))
1996 return sna_tiling_composite(op, src, mask, dst,
1997 src_x, src_y,
1998 msk_x, msk_y,
1999 dst_x, dst_y,
2000 width, height,
2001 tmp);
2002
2003 if (!gen4_composite_set_target(sna, tmp, dst,
2004 dst_x, dst_y, width, height,
2005 flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
2006 DBG(("%s: failed to set composite target\n", __FUNCTION__));
2007 goto fallback;
2008 }
2009
2010 tmp->op = op;
2011 switch (gen4_composite_picture(sna, src, &tmp->src,
2012 src_x, src_y,
2013 width, height,
2014 dst_x, dst_y,
2015 dst->polyMode == PolyModePrecise)) {
2016 case -1:
2017 DBG(("%s: failed to prepare source\n", __FUNCTION__));
2018 goto cleanup_dst;
2019 case 0:
2020 if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2021 goto cleanup_dst;
2022 /* fall through */
2023 case 1:
2024 if (mask == NULL &&
2025 sna_blt_composite__convert(sna,
2026 dst_x, dst_y, width, height,
2027 tmp))
2028 return true;
2029
2030 gen4_composite_channel_convert(&tmp->src);
2031 break;
2032 }
2033
2034 tmp->is_affine = tmp->src.is_affine;
2035 tmp->has_component_alpha = false;
2036 tmp->need_magic_ca_pass = false;
2037
2038 if (mask) {
2039 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2040 tmp->has_component_alpha = true;
2041
2042 /* Check if it's component alpha that relies on a source alpha and on
2043 * the source value. We can only get one of those into the single
2044 * source value that we get to blend with.
2045 */
2046 if (gen4_blend_op[op].src_alpha &&
2047 (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
2048 if (op != PictOpOver) {
2049 DBG(("%s -- fallback: unhandled component alpha blend\n",
2050 __FUNCTION__));
2051
2052 goto cleanup_src;
2053 }
2054
2055 tmp->need_magic_ca_pass = true;
2056 tmp->op = PictOpOutReverse;
2057 }
2058 }
2059
2060 if (!reuse_source(sna,
2061 src, &tmp->src, src_x, src_y,
2062 mask, &tmp->mask, msk_x, msk_y)) {
2063 switch (gen4_composite_picture(sna, mask, &tmp->mask,
2064 msk_x, msk_y,
2065 width, height,
2066 dst_x, dst_y,
2067 dst->polyMode == PolyModePrecise)) {
2068 case -1:
2069 DBG(("%s: failed to prepare mask\n", __FUNCTION__));
2070 goto cleanup_src;
2071 case 0:
2072 if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2073 goto cleanup_src;
2074 /* fall through */
2075 case 1:
2076 gen4_composite_channel_convert(&tmp->mask);
2077 break;
2078 }
2079 }
2080
2081 tmp->is_affine &= tmp->mask.is_affine;
2082 }
2083
2084 tmp->u.gen4.wm_kernel =
2085 gen4_choose_composite_kernel(tmp->op,
2086 tmp->mask.bo != NULL,
2087 tmp->has_component_alpha,
2088 tmp->is_affine);
2089 tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
2090
2091 tmp->blt = gen4_render_composite_blt;
2092 tmp->box = gen4_render_composite_box;
2093 tmp->boxes = gen4_render_composite_boxes__blt;
2094 if (tmp->emit_boxes) {
2095 tmp->boxes = gen4_render_composite_boxes;
2096 #if !FORCE_FLUSH
2097 tmp->thread_boxes = gen4_render_composite_boxes__thread;
2098 #endif
2099 }
2100 tmp->done = gen4_render_composite_done;
2101
2102 if (!kgem_check_bo(&sna->kgem,
2103 tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2104 NULL)) {
2105 kgem_submit(&sna->kgem);
2106 if (!kgem_check_bo(&sna->kgem,
2107 tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2108 NULL))
2109 goto cleanup_mask;
2110 }
2111
2112 gen4_align_vertex(sna, tmp);
2113 gen4_bind_surfaces(sna, tmp);
2114 return true;
2115
2116 cleanup_mask:
2117 if (tmp->mask.bo) {
2118 kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2119 tmp->mask.bo = NULL;
2120 }
2121 cleanup_src:
2122 if (tmp->src.bo) {
2123 kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2124 tmp->src.bo = NULL;
2125 }
2126 cleanup_dst:
2127 if (tmp->redirect.real_bo) {
2128 kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2129 tmp->redirect.real_bo = NULL;
2130 }
2131 fallback:
2132 return (mask == NULL &&
2133 sna_blt_composite(sna, op,
2134 src, dst,
2135 src_x, src_y,
2136 dst_x, dst_y,
2137 width, height,
2138 flags | COMPOSITE_FALLBACK, tmp));
2139 }
2140
2141 #if !NO_COMPOSITE_SPANS
2142 fastcall static void
gen4_render_composite_spans_box(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,float opacity)2143 gen4_render_composite_spans_box(struct sna *sna,
2144 const struct sna_composite_spans_op *op,
2145 const BoxRec *box, float opacity)
2146 {
2147 DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2148 __FUNCTION__,
2149 op->base.src.offset[0], op->base.src.offset[1],
2150 opacity,
2151 op->base.dst.x, op->base.dst.y,
2152 box->x1, box->y1,
2153 box->x2 - box->x1,
2154 box->y2 - box->y1));
2155
2156 gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
2157 op->prim_emit(sna, op, box, opacity);
2158 }
2159
2160 static void
gen4_render_composite_spans_boxes(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,int nbox,float opacity)2161 gen4_render_composite_spans_boxes(struct sna *sna,
2162 const struct sna_composite_spans_op *op,
2163 const BoxRec *box, int nbox,
2164 float opacity)
2165 {
2166 DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2167 __FUNCTION__, nbox,
2168 op->base.src.offset[0], op->base.src.offset[1],
2169 opacity,
2170 op->base.dst.x, op->base.dst.y));
2171
2172 do {
2173 int nbox_this_time;
2174
2175 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2176 gen4_bind_surfaces);
2177 nbox -= nbox_this_time;
2178
2179 do {
2180 DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2181 box->x1, box->y1,
2182 box->x2 - box->x1,
2183 box->y2 - box->y1));
2184
2185 op->prim_emit(sna, op, box++, opacity);
2186 } while (--nbox_this_time);
2187 } while (nbox);
2188 }
2189
2190 fastcall static void
gen4_render_composite_spans_boxes__thread(struct sna * sna,const struct sna_composite_spans_op * op,const struct sna_opacity_box * box,int nbox)2191 gen4_render_composite_spans_boxes__thread(struct sna *sna,
2192 const struct sna_composite_spans_op *op,
2193 const struct sna_opacity_box *box,
2194 int nbox)
2195 {
2196 DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2197 __FUNCTION__, nbox,
2198 op->base.src.offset[0], op->base.src.offset[1],
2199 op->base.dst.x, op->base.dst.y));
2200 assert(nbox);
2201
2202 sna_vertex_lock(&sna->render);
2203 do {
2204 int nbox_this_time;
2205 float *v;
2206
2207 nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2208 gen4_bind_surfaces);
2209 assert(nbox_this_time);
2210 nbox -= nbox_this_time;
2211
2212 v = sna->render.vertices + sna->render.vertex_used;
2213 sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2214
2215 sna_vertex_acquire__locked(&sna->render);
2216 sna_vertex_unlock(&sna->render);
2217
2218 op->emit_boxes(op, box, nbox_this_time, v);
2219 box += nbox_this_time;
2220
2221 sna_vertex_lock(&sna->render);
2222 sna_vertex_release__locked(&sna->render);
2223 } while (nbox);
2224 sna_vertex_unlock(&sna->render);
2225 }
2226
2227 fastcall static void
gen4_render_composite_spans_done(struct sna * sna,const struct sna_composite_spans_op * op)2228 gen4_render_composite_spans_done(struct sna *sna,
2229 const struct sna_composite_spans_op *op)
2230 {
2231 if (sna->render.vertex_offset)
2232 gen4_vertex_flush(sna);
2233
2234 DBG(("%s()\n", __FUNCTION__));
2235
2236 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2237 sna_render_composite_redirect_done(sna, &op->base);
2238 }
2239
2240 static bool
gen4_check_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t width,int16_t height,unsigned flags)2241 gen4_check_composite_spans(struct sna *sna,
2242 uint8_t op, PicturePtr src, PicturePtr dst,
2243 int16_t width, int16_t height,
2244 unsigned flags)
2245 {
2246 DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2247 __FUNCTION__, op, width, height, flags));
2248
2249 if (op >= ARRAY_SIZE(gen4_blend_op))
2250 return false;
2251
2252 if (gen4_composite_fallback(sna, src, NULL, dst)) {
2253 DBG(("%s: operation would fallback\n", __FUNCTION__));
2254 return false;
2255 }
2256
2257 if (need_tiling(sna, width, height) &&
2258 !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2259 DBG(("%s: fallback, tiled operation not on GPU\n",
2260 __FUNCTION__));
2261 return false;
2262 }
2263
2264 if (FORCE_SPANS)
2265 return FORCE_SPANS > 0;
2266
2267 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2268 struct sna_pixmap *priv;
2269
2270 if (FORCE_NONRECTILINEAR_SPANS)
2271 return FORCE_NONRECTILINEAR_SPANS > 0;
2272
2273 if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0)
2274 return false;
2275
2276 priv = sna_pixmap_from_drawable(dst->pDrawable);
2277 assert(priv);
2278
2279 if (priv->cpu_bo &&
2280 __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo))
2281 return true;
2282
2283 if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2284 return false;
2285
2286 return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2287 }
2288
2289 return true;
2290 }
2291
2292 static bool
gen4_render_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_spans_op * tmp)2293 gen4_render_composite_spans(struct sna *sna,
2294 uint8_t op,
2295 PicturePtr src,
2296 PicturePtr dst,
2297 int16_t src_x, int16_t src_y,
2298 int16_t dst_x, int16_t dst_y,
2299 int16_t width, int16_t height,
2300 unsigned flags,
2301 struct sna_composite_spans_op *tmp)
2302 {
2303 DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2304 width, height, flags, sna->kgem.ring));
2305
2306 assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags));
2307
2308 if (need_tiling(sna, width, height)) {
2309 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2310 __FUNCTION__, width, height));
2311 return sna_tiling_composite_spans(op, src, dst,
2312 src_x, src_y, dst_x, dst_y,
2313 width, height, flags, tmp);
2314 }
2315
2316 tmp->base.op = op;
2317 if (!gen4_composite_set_target(sna, &tmp->base, dst,
2318 dst_x, dst_y, width, height, true))
2319 return false;
2320
2321 switch (gen4_composite_picture(sna, src, &tmp->base.src,
2322 src_x, src_y,
2323 width, height,
2324 dst_x, dst_y,
2325 dst->polyMode == PolyModePrecise)) {
2326 case -1:
2327 goto cleanup_dst;
2328 case 0:
2329 if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2330 goto cleanup_dst;
2331 /* fall through */
2332 case 1:
2333 gen4_composite_channel_convert(&tmp->base.src);
2334 break;
2335 }
2336
2337 tmp->base.mask.bo = NULL;
2338 tmp->base.mask.filter = SAMPLER_FILTER_NEAREST;
2339 tmp->base.mask.repeat = SAMPLER_EXTEND_NONE;
2340
2341 tmp->base.is_affine = tmp->base.src.is_affine;
2342 tmp->base.has_component_alpha = false;
2343 tmp->base.need_magic_ca_pass = false;
2344
2345 tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp);
2346 tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
2347
2348 tmp->box = gen4_render_composite_spans_box;
2349 tmp->boxes = gen4_render_composite_spans_boxes;
2350 if (tmp->emit_boxes)
2351 tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
2352 tmp->done = gen4_render_composite_spans_done;
2353
2354 if (!kgem_check_bo(&sna->kgem,
2355 tmp->base.dst.bo, tmp->base.src.bo,
2356 NULL)) {
2357 kgem_submit(&sna->kgem);
2358 if (!kgem_check_bo(&sna->kgem,
2359 tmp->base.dst.bo, tmp->base.src.bo,
2360 NULL))
2361 goto cleanup_src;
2362 }
2363
2364 gen4_align_vertex(sna, &tmp->base);
2365 gen4_bind_surfaces(sna, &tmp->base);
2366 return true;
2367
2368 cleanup_src:
2369 if (tmp->base.src.bo)
2370 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2371 cleanup_dst:
2372 if (tmp->base.redirect.real_bo)
2373 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2374 return false;
2375 }
2376 #endif
2377
2378 static void
gen4_copy_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2379 gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
2380 {
2381 uint32_t *binding_table;
2382 uint16_t offset, dirty;
2383
2384 gen4_get_batch(sna, op);
2385 dirty = kgem_bo_is_dirty(op->dst.bo);
2386
2387 binding_table = gen4_composite_get_binding_table(sna, &offset);
2388
2389 binding_table[0] =
2390 gen4_bind_bo(sna,
2391 op->dst.bo, op->dst.width, op->dst.height,
2392 gen4_get_dest_format(op->dst.format),
2393 true);
2394 binding_table[1] =
2395 gen4_bind_bo(sna,
2396 op->src.bo, op->src.width, op->src.height,
2397 op->src.card_format,
2398 false);
2399
2400 if (sna->kgem.surface == offset &&
2401 *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
2402 sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
2403 offset = sna->render_state.gen4.surface_table;
2404 }
2405
2406 if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
2407 dirty = 0;
2408
2409 gen4_emit_state(sna, op, offset | dirty);
2410 }
2411
2412 static void
gen4_render_copy_one(struct sna * sna,const struct sna_composite_op * op,int sx,int sy,int w,int h,int dx,int dy)2413 gen4_render_copy_one(struct sna *sna,
2414 const struct sna_composite_op *op,
2415 int sx, int sy,
2416 int w, int h,
2417 int dx, int dy)
2418 {
2419 gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
2420
2421 OUT_VERTEX(dx+w, dy+h);
2422 OUT_VERTEX_F((sx+w)*op->src.scale[0]);
2423 OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2424
2425 OUT_VERTEX(dx, dy+h);
2426 OUT_VERTEX_F(sx*op->src.scale[0]);
2427 OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2428
2429 OUT_VERTEX(dx, dy);
2430 OUT_VERTEX_F(sx*op->src.scale[0]);
2431 OUT_VERTEX_F(sy*op->src.scale[1]);
2432 }
2433
2434 static bool
gen4_render_copy_boxes(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,unsigned flags)2435 gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
2436 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2437 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2438 const BoxRec *box, int n, unsigned flags)
2439 {
2440 struct sna_composite_op tmp;
2441
2442 DBG(("%s x %d\n", __FUNCTION__, n));
2443
2444 if (sna_blt_compare_depth(src, dst) &&
2445 sna_blt_copy_boxes(sna, alu,
2446 src_bo, src_dx, src_dy,
2447 dst_bo, dst_dx, dst_dy,
2448 dst->bitsPerPixel,
2449 box, n))
2450 return true;
2451
2452 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
2453 fallback_blt:
2454 if (!sna_blt_compare_depth(src, dst))
2455 return false;
2456
2457 return sna_blt_copy_boxes_fallback(sna, alu,
2458 src, src_bo, src_dx, src_dy,
2459 dst, dst_bo, dst_dx, dst_dy,
2460 box, n);
2461 }
2462
2463 memset(&tmp, 0, sizeof(tmp));
2464
2465 DBG(("%s (%d, %d)->(%d, %d) x %d\n",
2466 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
2467
2468 if (dst->depth == src->depth) {
2469 tmp.dst.format = sna_render_format_for_depth(dst->depth);
2470 tmp.src.pict_format = tmp.dst.format;
2471 } else {
2472 tmp.dst.format = sna_format_for_depth(dst->depth);
2473 tmp.src.pict_format = sna_format_for_depth(src->depth);
2474 }
2475 if (!gen4_check_format(tmp.src.pict_format))
2476 goto fallback_blt;
2477
2478 tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2479
2480 tmp.dst.pixmap = (PixmapPtr)dst;
2481 tmp.dst.width = dst->width;
2482 tmp.dst.height = dst->height;
2483 tmp.dst.x = tmp.dst.y = 0;
2484 tmp.dst.bo = dst_bo;
2485 tmp.damage = NULL;
2486
2487 sna_render_composite_redirect_init(&tmp);
2488 if (too_large(tmp.dst.width, tmp.dst.height)) {
2489 BoxRec extents = box[0];
2490 int i;
2491
2492 for (i = 1; i < n; i++) {
2493 if (box[i].x1 < extents.x1)
2494 extents.x1 = box[i].x1;
2495 if (box[i].y1 < extents.y1)
2496 extents.y1 = box[i].y1;
2497
2498 if (box[i].x2 > extents.x2)
2499 extents.x2 = box[i].x2;
2500 if (box[i].y2 > extents.y2)
2501 extents.y2 = box[i].y2;
2502 }
2503 if (!sna_render_composite_redirect(sna, &tmp,
2504 extents.x1 + dst_dx,
2505 extents.y1 + dst_dy,
2506 extents.x2 - extents.x1,
2507 extents.y2 - extents.y1,
2508 n > 1))
2509 goto fallback_tiled;
2510 }
2511
2512 tmp.src.filter = SAMPLER_FILTER_NEAREST;
2513 tmp.src.repeat = SAMPLER_EXTEND_NONE;
2514 tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format);
2515 if (too_large(src->width, src->height)) {
2516 BoxRec extents = box[0];
2517 int i;
2518
2519 for (i = 1; i < n; i++) {
2520 if (box[i].x1 < extents.x1)
2521 extents.x1 = box[i].x1;
2522 if (box[i].y1 < extents.y1)
2523 extents.y1 = box[i].y1;
2524
2525 if (box[i].x2 > extents.x2)
2526 extents.x2 = box[i].x2;
2527 if (box[i].y2 > extents.y2)
2528 extents.y2 = box[i].y2;
2529 }
2530
2531 if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2532 extents.x1 + src_dx,
2533 extents.y1 + src_dy,
2534 extents.x2 - extents.x1,
2535 extents.y2 - extents.y1))
2536 goto fallback_tiled_dst;
2537 } else {
2538 tmp.src.bo = kgem_bo_reference(src_bo);
2539 tmp.src.width = src->width;
2540 tmp.src.height = src->height;
2541 tmp.src.offset[0] = tmp.src.offset[1] = 0;
2542 tmp.src.scale[0] = 1.f/src->width;
2543 tmp.src.scale[1] = 1.f/src->height;
2544 }
2545
2546 tmp.is_affine = true;
2547 tmp.floats_per_vertex = 3;
2548 tmp.floats_per_rect = 9;
2549 tmp.u.gen4.wm_kernel = WM_KERNEL;
2550 tmp.u.gen4.ve_id = 2;
2551
2552 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2553 kgem_submit(&sna->kgem);
2554 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2555 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2556 if (tmp.redirect.real_bo)
2557 kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2558
2559 goto fallback_blt;
2560 }
2561 }
2562
2563 dst_dx += tmp.dst.x;
2564 dst_dy += tmp.dst.y;
2565 tmp.dst.x = tmp.dst.y = 0;
2566
2567 src_dx += tmp.src.offset[0];
2568 src_dy += tmp.src.offset[1];
2569
2570 gen4_align_vertex(sna, &tmp);
2571 gen4_copy_bind_surfaces(sna, &tmp);
2572
2573 do {
2574 gen4_render_copy_one(sna, &tmp,
2575 box->x1 + src_dx, box->y1 + src_dy,
2576 box->x2 - box->x1, box->y2 - box->y1,
2577 box->x1 + dst_dx, box->y1 + dst_dy);
2578 box++;
2579 } while (--n);
2580
2581 gen4_vertex_flush(sna);
2582 sna_render_composite_redirect_done(sna, &tmp);
2583 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2584 return true;
2585
2586 fallback_tiled_dst:
2587 if (tmp.redirect.real_bo)
2588 kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2589 fallback_tiled:
2590 if (sna_blt_compare_depth(src, dst) &&
2591 sna_blt_copy_boxes(sna, alu,
2592 src_bo, src_dx, src_dy,
2593 dst_bo, dst_dx, dst_dy,
2594 dst->bitsPerPixel,
2595 box, n))
2596 return true;
2597
2598 return sna_tiling_copy_boxes(sna, alu,
2599 src, src_bo, src_dx, src_dy,
2600 dst, dst_bo, dst_dx, dst_dy,
2601 box, n);
2602 }
2603
2604 static void
gen4_render_copy_blt(struct sna * sna,const struct sna_copy_op * op,int16_t sx,int16_t sy,int16_t w,int16_t h,int16_t dx,int16_t dy)2605 gen4_render_copy_blt(struct sna *sna,
2606 const struct sna_copy_op *op,
2607 int16_t sx, int16_t sy,
2608 int16_t w, int16_t h,
2609 int16_t dx, int16_t dy)
2610 {
2611 gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
2612 }
2613
2614 static void
gen4_render_copy_done(struct sna * sna,const struct sna_copy_op * op)2615 gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2616 {
2617 if (sna->render.vertex_offset)
2618 gen4_vertex_flush(sna);
2619 }
2620
2621 static bool
gen4_render_copy(struct sna * sna,uint8_t alu,PixmapPtr src,struct kgem_bo * src_bo,PixmapPtr dst,struct kgem_bo * dst_bo,struct sna_copy_op * op)2622 gen4_render_copy(struct sna *sna, uint8_t alu,
2623 PixmapPtr src, struct kgem_bo *src_bo,
2624 PixmapPtr dst, struct kgem_bo *dst_bo,
2625 struct sna_copy_op *op)
2626 {
2627 DBG(("%s: src=%ld, dst=%ld, alu=%d\n",
2628 __FUNCTION__,
2629 src->drawable.serialNumber,
2630 dst->drawable.serialNumber,
2631 alu));
2632
2633 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2634 sna_blt_copy(sna, alu,
2635 src_bo, dst_bo,
2636 dst->drawable.bitsPerPixel,
2637 op))
2638 return true;
2639
2640 if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2641 too_large(src->drawable.width, src->drawable.height) ||
2642 too_large(dst->drawable.width, dst->drawable.height)) {
2643 fallback:
2644 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2645 return false;
2646
2647 return sna_blt_copy(sna, alu, src_bo, dst_bo,
2648 dst->drawable.bitsPerPixel,
2649 op);
2650 }
2651
2652 if (dst->drawable.depth == src->drawable.depth) {
2653 op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2654 op->base.src.pict_format = op->base.dst.format;
2655 } else {
2656 op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2657 op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2658 }
2659 if (!gen4_check_format(op->base.src.pict_format))
2660 goto fallback;
2661
2662 op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2663
2664 op->base.dst.pixmap = dst;
2665 op->base.dst.width = dst->drawable.width;
2666 op->base.dst.height = dst->drawable.height;
2667 op->base.dst.bo = dst_bo;
2668
2669 op->base.src.bo = src_bo;
2670 op->base.src.card_format =
2671 gen4_get_card_format(op->base.src.pict_format);
2672 op->base.src.width = src->drawable.width;
2673 op->base.src.height = src->drawable.height;
2674 op->base.src.scale[0] = 1.f/src->drawable.width;
2675 op->base.src.scale[1] = 1.f/src->drawable.height;
2676 op->base.src.filter = SAMPLER_FILTER_NEAREST;
2677 op->base.src.repeat = SAMPLER_EXTEND_NONE;
2678
2679 op->base.is_affine = true;
2680 op->base.floats_per_vertex = 3;
2681 op->base.floats_per_rect = 9;
2682 op->base.u.gen4.wm_kernel = WM_KERNEL;
2683 op->base.u.gen4.ve_id = 2;
2684
2685 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2686 kgem_submit(&sna->kgem);
2687 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2688 goto fallback;
2689 }
2690
2691 if (kgem_bo_is_dirty(src_bo)) {
2692 if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2693 sna_blt_copy(sna, alu,
2694 src_bo, dst_bo,
2695 dst->drawable.bitsPerPixel,
2696 op))
2697 return true;
2698 }
2699
2700 gen4_align_vertex(sna, &op->base);
2701 gen4_copy_bind_surfaces(sna, &op->base);
2702
2703 op->blt = gen4_render_copy_blt;
2704 op->done = gen4_render_copy_done;
2705 return true;
2706 }
2707
2708 static void
gen4_render_fill_rectangle(struct sna * sna,const struct sna_composite_op * op,int x,int y,int w,int h)2709 gen4_render_fill_rectangle(struct sna *sna,
2710 const struct sna_composite_op *op,
2711 int x, int y, int w, int h)
2712 {
2713 gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
2714
2715 OUT_VERTEX(x+w, y+h);
2716 OUT_VERTEX_F(.5);
2717
2718 OUT_VERTEX(x, y+h);
2719 OUT_VERTEX_F(.5);
2720
2721 OUT_VERTEX(x, y);
2722 OUT_VERTEX_F(.5);
2723 }
2724
2725 static bool
gen4_render_fill_boxes(struct sna * sna,CARD8 op,PictFormat format,const xRenderColor * color,const DrawableRec * dst,struct kgem_bo * dst_bo,const BoxRec * box,int n)2726 gen4_render_fill_boxes(struct sna *sna,
2727 CARD8 op,
2728 PictFormat format,
2729 const xRenderColor *color,
2730 const DrawableRec *dst, struct kgem_bo *dst_bo,
2731 const BoxRec *box, int n)
2732 {
2733 struct sna_composite_op tmp;
2734 uint32_t pixel;
2735
2736 if (op >= ARRAY_SIZE(gen4_blend_op)) {
2737 DBG(("%s: fallback due to unhandled blend op: %d\n",
2738 __FUNCTION__, op));
2739 return false;
2740 }
2741
2742 if (op <= PictOpSrc) {
2743 uint8_t alu = GXinvalid;
2744
2745 pixel = 0;
2746 if (op == PictOpClear)
2747 alu = GXclear;
2748 else if (sna_get_pixel_from_rgba(&pixel,
2749 color->red,
2750 color->green,
2751 color->blue,
2752 color->alpha,
2753 format))
2754 alu = GXcopy;
2755
2756 if (alu != GXinvalid &&
2757 sna_blt_fill_boxes(sna, alu,
2758 dst_bo, dst->bitsPerPixel,
2759 pixel, box, n))
2760 return true;
2761
2762 if (!gen4_check_dst_format(format))
2763 return false;
2764
2765 if (too_large(dst->width, dst->height))
2766 return sna_tiling_fill_boxes(sna, op, format, color,
2767 dst, dst_bo, box, n);
2768 }
2769
2770 if (op == PictOpClear) {
2771 pixel = 0;
2772 op = PictOpSrc;
2773 } else if (!sna_get_pixel_from_rgba(&pixel,
2774 color->red,
2775 color->green,
2776 color->blue,
2777 color->alpha,
2778 PICT_a8r8g8b8))
2779 return false;
2780
2781 DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
2782
2783 memset(&tmp, 0, sizeof(tmp));
2784
2785 tmp.op = op;
2786
2787 tmp.dst.pixmap = (PixmapPtr)dst;
2788 tmp.dst.width = dst->width;
2789 tmp.dst.height = dst->height;
2790 tmp.dst.format = format;
2791 tmp.dst.bo = dst_bo;
2792
2793 sna_render_composite_redirect_init(&tmp);
2794 if (too_large(dst->width, dst->height)) {
2795 BoxRec extents;
2796
2797 boxes_extents(box, n, &extents);
2798 if (!sna_render_composite_redirect(sna, &tmp,
2799 extents.x1, extents.y1,
2800 extents.x2 - extents.x1,
2801 extents.y2 - extents.y1,
2802 n > 1))
2803 return sna_tiling_fill_boxes(sna, op, format, color,
2804 dst, dst_bo, box, n);
2805 }
2806
2807 gen4_channel_init_solid(sna, &tmp.src, pixel);
2808
2809 tmp.is_affine = true;
2810 tmp.floats_per_vertex = 2;
2811 tmp.floats_per_rect = 6;
2812 tmp.u.gen4.wm_kernel = WM_KERNEL;
2813 tmp.u.gen4.ve_id = 1;
2814
2815 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2816 kgem_submit(&sna->kgem);
2817 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2818 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2819 return false;
2820 }
2821 }
2822
2823 gen4_align_vertex(sna, &tmp);
2824 gen4_bind_surfaces(sna, &tmp);
2825
2826 do {
2827 gen4_render_fill_rectangle(sna, &tmp,
2828 box->x1, box->y1,
2829 box->x2 - box->x1,
2830 box->y2 - box->y1);
2831 box++;
2832 } while (--n);
2833
2834 gen4_vertex_flush(sna);
2835 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2836 sna_render_composite_redirect_done(sna, &tmp);
2837 return true;
2838 }
2839
2840 static void
gen4_render_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t w,int16_t h)2841 gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
2842 int16_t x, int16_t y, int16_t w, int16_t h)
2843 {
2844 gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
2845 }
2846
2847 fastcall static void
gen4_render_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)2848 gen4_render_fill_op_box(struct sna *sna,
2849 const struct sna_fill_op *op,
2850 const BoxRec *box)
2851 {
2852 gen4_render_fill_rectangle(sna, &op->base,
2853 box->x1, box->y1,
2854 box->x2-box->x1, box->y2-box->y1);
2855 }
2856
2857 fastcall static void
gen4_render_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)2858 gen4_render_fill_op_boxes(struct sna *sna,
2859 const struct sna_fill_op *op,
2860 const BoxRec *box,
2861 int nbox)
2862 {
2863 do {
2864 gen4_render_fill_rectangle(sna, &op->base,
2865 box->x1, box->y1,
2866 box->x2-box->x1, box->y2-box->y1);
2867 box++;
2868 } while (--nbox);
2869 }
2870
2871 static void
gen4_render_fill_op_done(struct sna * sna,const struct sna_fill_op * op)2872 gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
2873 {
2874 if (sna->render.vertex_offset)
2875 gen4_vertex_flush(sna);
2876 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2877 }
2878
2879 static bool
gen4_render_fill(struct sna * sna,uint8_t alu,PixmapPtr dst,struct kgem_bo * dst_bo,uint32_t color,unsigned flags,struct sna_fill_op * op)2880 gen4_render_fill(struct sna *sna, uint8_t alu,
2881 PixmapPtr dst, struct kgem_bo *dst_bo,
2882 uint32_t color, unsigned flags,
2883 struct sna_fill_op *op)
2884 {
2885 if (sna_blt_fill(sna, alu,
2886 dst_bo, dst->drawable.bitsPerPixel,
2887 color,
2888 op))
2889 return true;
2890
2891 if (!(alu == GXcopy || alu == GXclear) ||
2892 too_large(dst->drawable.width, dst->drawable.height))
2893 return sna_blt_fill(sna, alu,
2894 dst_bo, dst->drawable.bitsPerPixel,
2895 color,
2896 op);
2897
2898 if (alu == GXclear)
2899 color = 0;
2900
2901 op->base.op = color == 0 ? PictOpClear : PictOpSrc;
2902
2903 op->base.dst.pixmap = dst;
2904 op->base.dst.width = dst->drawable.width;
2905 op->base.dst.height = dst->drawable.height;
2906 op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2907 op->base.dst.bo = dst_bo;
2908 op->base.dst.x = op->base.dst.y = 0;
2909
2910 op->base.need_magic_ca_pass = 0;
2911 op->base.has_component_alpha = 0;
2912
2913 gen4_channel_init_solid(sna, &op->base.src,
2914 sna_rgba_for_color(color,
2915 dst->drawable.depth));
2916 op->base.mask.bo = NULL;
2917
2918 op->base.is_affine = true;
2919 op->base.floats_per_vertex = 2;
2920 op->base.floats_per_rect = 6;
2921 op->base.u.gen4.wm_kernel = WM_KERNEL;
2922 op->base.u.gen4.ve_id = 1;
2923
2924 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2925 kgem_submit(&sna->kgem);
2926 if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2927 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2928 return false;
2929 }
2930 }
2931
2932 gen4_align_vertex(sna, &op->base);
2933 gen4_bind_surfaces(sna, &op->base);
2934
2935 op->blt = gen4_render_fill_op_blt;
2936 op->box = gen4_render_fill_op_box;
2937 op->boxes = gen4_render_fill_op_boxes;
2938 op->points = NULL;
2939 op->done = gen4_render_fill_op_done;
2940 return true;
2941 }
2942
2943 static bool
gen4_render_fill_one_try_blt(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)2944 gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2945 uint32_t color,
2946 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
2947 uint8_t alu)
2948 {
2949 BoxRec box;
2950
2951 box.x1 = x1;
2952 box.y1 = y1;
2953 box.x2 = x2;
2954 box.y2 = y2;
2955
2956 return sna_blt_fill_boxes(sna, alu,
2957 bo, dst->drawable.bitsPerPixel,
2958 color, &box, 1);
2959 }
2960
2961 static bool
gen4_render_fill_one(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)2962 gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2963 uint32_t color,
2964 int16_t x1, int16_t y1,
2965 int16_t x2, int16_t y2,
2966 uint8_t alu)
2967 {
2968 struct sna_composite_op tmp;
2969
2970 DBG(("%s: color=%08x\n", __FUNCTION__, color));
2971
2972 if (gen4_render_fill_one_try_blt(sna, dst, bo, color,
2973 x1, y1, x2, y2, alu))
2974 return true;
2975
2976 /* Must use the BLT if we can't RENDER... */
2977 if (!(alu == GXcopy || alu == GXclear) ||
2978 too_large(dst->drawable.width, dst->drawable.height))
2979 return false;
2980
2981 if (alu == GXclear)
2982 color = 0;
2983
2984 tmp.op = color == 0 ? PictOpClear : PictOpSrc;
2985
2986 tmp.dst.pixmap = dst;
2987 tmp.dst.width = dst->drawable.width;
2988 tmp.dst.height = dst->drawable.height;
2989 tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2990 tmp.dst.bo = bo;
2991 tmp.dst.x = tmp.dst.y = 0;
2992
2993 gen4_channel_init_solid(sna, &tmp.src,
2994 sna_rgba_for_color(color,
2995 dst->drawable.depth));
2996 tmp.mask.bo = NULL;
2997 tmp.mask.filter = SAMPLER_FILTER_NEAREST;
2998 tmp.mask.repeat = SAMPLER_EXTEND_NONE;
2999
3000 tmp.is_affine = true;
3001 tmp.floats_per_vertex = 2;
3002 tmp.floats_per_rect = 6;
3003 tmp.has_component_alpha = false;
3004 tmp.need_magic_ca_pass = false;
3005
3006 tmp.u.gen4.wm_kernel = WM_KERNEL;
3007 tmp.u.gen4.ve_id = 1;
3008
3009 if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3010 kgem_submit(&sna->kgem);
3011 if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3012 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3013 return false;
3014 }
3015 }
3016
3017 gen4_align_vertex(sna, &tmp);
3018 gen4_bind_surfaces(sna, &tmp);
3019
3020 gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
3021
3022 gen4_vertex_flush(sna);
3023 kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3024
3025 return true;
3026 }
3027
gen4_render_reset(struct sna * sna)3028 static void gen4_render_reset(struct sna *sna)
3029 {
3030 sna->render_state.gen4.needs_invariant = true;
3031 sna->render_state.gen4.needs_urb = true;
3032 sna->render_state.gen4.ve_id = -1;
3033 sna->render_state.gen4.last_primitive = -1;
3034 sna->render_state.gen4.last_pipelined_pointers = -1;
3035
3036 sna->render_state.gen4.drawrect_offset = -1;
3037 sna->render_state.gen4.drawrect_limit = -1;
3038 sna->render_state.gen4.surface_table = 0;
3039
3040 if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3041 DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3042 discard_vbo(sna);
3043 }
3044
3045 sna->render.vertex_offset = 0;
3046 sna->render.nvertex_reloc = 0;
3047 sna->render.vb_id = 0;
3048 }
3049
gen4_render_fini(struct sna * sna)3050 static void gen4_render_fini(struct sna *sna)
3051 {
3052 kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
3053 }
3054
gen4_create_vs_unit_state(struct sna_static_stream * stream)3055 static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
3056 {
3057 struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
3058
3059 /* Set up the vertex shader to be disabled (passthrough) */
3060 vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
3061 vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
3062 vs->vs6.vs_enable = 0;
3063 vs->vs6.vert_cache_disable = 1;
3064
3065 return sna_static_stream_offsetof(stream, vs);
3066 }
3067
gen4_create_sf_state(struct sna_static_stream * stream,uint32_t kernel)3068 static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
3069 uint32_t kernel)
3070 {
3071 struct gen4_sf_unit_state *sf;
3072
3073 sf = sna_static_stream_map(stream, sizeof(*sf), 32);
3074
3075 sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
3076 sf->thread0.kernel_start_pointer = kernel >> 6;
3077 sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
3078 sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
3079 sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
3080 /* don't smash vertex header, read start from dw8 */
3081 sf->thread3.urb_entry_read_offset = 1;
3082 sf->thread3.dispatch_grf_start_reg = 3;
3083 sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
3084 sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
3085 sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
3086 sf->sf5.viewport_transform = false; /* skip viewport */
3087 sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
3088 sf->sf6.scissor = 0;
3089 sf->sf7.trifan_pv = 2;
3090 sf->sf6.dest_org_vbias = 0x8;
3091 sf->sf6.dest_org_hbias = 0x8;
3092
3093 return sna_static_stream_offsetof(stream, sf);
3094 }
3095
gen4_create_sampler_state(struct sna_static_stream * stream,sampler_filter_t src_filter,sampler_extend_t src_extend,sampler_filter_t mask_filter,sampler_extend_t mask_extend)3096 static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
3097 sampler_filter_t src_filter,
3098 sampler_extend_t src_extend,
3099 sampler_filter_t mask_filter,
3100 sampler_extend_t mask_extend)
3101 {
3102 struct gen4_sampler_state *sampler_state;
3103
3104 sampler_state = sna_static_stream_map(stream,
3105 sizeof(struct gen4_sampler_state) * 2,
3106 32);
3107 sampler_state_init(&sampler_state[0], src_filter, src_extend);
3108 sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
3109
3110 return sna_static_stream_offsetof(stream, sampler_state);
3111 }
3112
gen4_init_wm_state(struct gen4_wm_unit_state * wm,int gen,bool has_mask,uint32_t kernel,uint32_t sampler)3113 static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
3114 int gen,
3115 bool has_mask,
3116 uint32_t kernel,
3117 uint32_t sampler)
3118 {
3119 assert((kernel & 63) == 0);
3120 wm->thread0.kernel_start_pointer = kernel >> 6;
3121 wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
3122
3123 wm->thread1.single_program_flow = 0;
3124
3125 wm->thread3.const_urb_entry_read_length = 0;
3126 wm->thread3.const_urb_entry_read_offset = 0;
3127
3128 wm->thread3.urb_entry_read_offset = 0;
3129 wm->thread3.dispatch_grf_start_reg = 3;
3130
3131 assert((sampler & 31) == 0);
3132 wm->wm4.sampler_state_pointer = sampler >> 5;
3133 wm->wm4.sampler_count = 1;
3134
3135 wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
3136 wm->wm5.transposed_urb_read = 0;
3137 wm->wm5.thread_dispatch_enable = 1;
3138 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
3139 * start point
3140 */
3141 wm->wm5.enable_16_pix = 1;
3142 wm->wm5.enable_8_pix = 0;
3143 wm->wm5.early_depth_test = 1;
3144
3145 /* Each pair of attributes (src/mask coords) is two URB entries */
3146 if (has_mask) {
3147 wm->thread1.binding_table_entry_count = 3;
3148 wm->thread3.urb_entry_read_length = 4;
3149 } else {
3150 wm->thread1.binding_table_entry_count = 2;
3151 wm->thread3.urb_entry_read_length = 2;
3152 }
3153 }
3154
gen4_create_cc_unit_state(struct sna_static_stream * stream)3155 static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
3156 {
3157 uint8_t *ptr, *base;
3158 int i, j;
3159
3160 base = ptr =
3161 sna_static_stream_map(stream,
3162 GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
3163 64);
3164
3165 for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
3166 for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
3167 struct gen4_cc_unit_state *state =
3168 (struct gen4_cc_unit_state *)ptr;
3169
3170 state->cc3.blend_enable =
3171 !(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
3172
3173 state->cc5.logicop_func = 0xc; /* COPY */
3174 state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
3175
3176 /* Fill in alpha blend factors same as color, for the future. */
3177 state->cc5.ia_src_blend_factor = i;
3178 state->cc5.ia_dest_blend_factor = j;
3179
3180 state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
3181 state->cc6.clamp_post_alpha_blend = 1;
3182 state->cc6.clamp_pre_alpha_blend = 1;
3183 state->cc6.src_blend_factor = i;
3184 state->cc6.dest_blend_factor = j;
3185
3186 ptr += 64;
3187 }
3188 }
3189
3190 return sna_static_stream_offsetof(stream, base);
3191 }
3192
gen4_render_setup(struct sna * sna)3193 static bool gen4_render_setup(struct sna *sna)
3194 {
3195 struct gen4_render_state *state = &sna->render_state.gen4;
3196 struct sna_static_stream general;
3197 struct gen4_wm_unit_state_padded *wm_state;
3198 uint32_t sf, wm[KERNEL_COUNT];
3199 int i, j, k, l, m;
3200
3201 sna_static_stream_init(&general);
3202
3203 /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3204 * dumps, you know it points to zero.
3205 */
3206 null_create(&general);
3207
3208 sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
3209 for (m = 0; m < KERNEL_COUNT; m++) {
3210 if (wm_kernels[m].size) {
3211 wm[m] = sna_static_stream_add(&general,
3212 wm_kernels[m].data,
3213 wm_kernels[m].size,
3214 64);
3215 } else {
3216 wm[m] = sna_static_stream_compile_wm(sna, &general,
3217 wm_kernels[m].data,
3218 16);
3219 }
3220 }
3221
3222 state->vs = gen4_create_vs_unit_state(&general);
3223 state->sf = gen4_create_sf_state(&general, sf);
3224
3225 wm_state = sna_static_stream_map(&general,
3226 sizeof(*wm_state) * KERNEL_COUNT *
3227 FILTER_COUNT * EXTEND_COUNT *
3228 FILTER_COUNT * EXTEND_COUNT,
3229 64);
3230 state->wm = sna_static_stream_offsetof(&general, wm_state);
3231 for (i = 0; i < FILTER_COUNT; i++) {
3232 for (j = 0; j < EXTEND_COUNT; j++) {
3233 for (k = 0; k < FILTER_COUNT; k++) {
3234 for (l = 0; l < EXTEND_COUNT; l++) {
3235 uint32_t sampler_state;
3236
3237 sampler_state =
3238 gen4_create_sampler_state(&general,
3239 i, j,
3240 k, l);
3241
3242 for (m = 0; m < KERNEL_COUNT; m++) {
3243 gen4_init_wm_state(&wm_state->state,
3244 sna->kgem.gen,
3245 wm_kernels[m].has_mask,
3246 wm[m], sampler_state);
3247 wm_state++;
3248 }
3249 }
3250 }
3251 }
3252 }
3253
3254 state->cc = gen4_create_cc_unit_state(&general);
3255
3256 state->general_bo = sna_static_stream_fini(sna, &general);
3257 return state->general_bo != NULL;
3258 }
3259
gen4_render_init(struct sna * sna,const char * backend)3260 const char *gen4_render_init(struct sna *sna, const char *backend)
3261 {
3262 if (!gen4_render_setup(sna))
3263 return backend;
3264
3265 sna->kgem.retire = gen4_render_retire;
3266 sna->kgem.expire = gen4_render_expire;
3267
3268 #if !NO_COMPOSITE
3269 sna->render.composite = gen4_render_composite;
3270 sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3271 #endif
3272 #if !NO_COMPOSITE_SPANS
3273 sna->render.check_composite_spans = gen4_check_composite_spans;
3274 sna->render.composite_spans = gen4_render_composite_spans;
3275 if (0)
3276 sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3277 #endif
3278
3279 #if !NO_VIDEO
3280 sna->render.video = gen4_render_video;
3281 #endif
3282
3283 #if !NO_COPY_BOXES
3284 sna->render.copy_boxes = gen4_render_copy_boxes;
3285 #endif
3286 #if !NO_COPY
3287 sna->render.copy = gen4_render_copy;
3288 #endif
3289
3290 #if !NO_FILL_BOXES
3291 sna->render.fill_boxes = gen4_render_fill_boxes;
3292 #endif
3293 #if !NO_FILL
3294 sna->render.fill = gen4_render_fill;
3295 #endif
3296 #if !NO_FILL_ONE
3297 sna->render.fill_one = gen4_render_fill_one;
3298 #endif
3299
3300 sna->render.flush = gen4_render_flush;
3301 sna->render.reset = gen4_render_reset;
3302 sna->render.fini = gen4_render_fini;
3303
3304 sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
3305 sna->render.max_3d_pitch = 1 << 18;
3306 return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
3307 }
3308