1 /*
2  * Copyright © 2006,2008,2011 Intel Corporation
3  * Copyright © 2007 Red Hat, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26  *    Eric Anholt <eric@anholt.net>
27  *    Carl Worth <cworth@redhat.com>
28  *    Keith Packard <keithp@keithp.com>
29  *    Chris Wilson <chris@chris-wilson.co.uk>
30  *
31  */
32 
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36 
37 #include "sna.h"
38 #include "sna_reg.h"
39 #include "sna_render.h"
40 #include "sna_render_inline.h"
41 #include "sna_video.h"
42 
43 #include "brw/brw.h"
44 #include "gen5_render.h"
45 #include "gen4_common.h"
46 #include "gen4_source.h"
47 #include "gen4_vertex.h"
48 
49 #define NO_COMPOSITE 0
50 #define NO_COMPOSITE_SPANS 0
51 
52 #define PREFER_BLT_FILL 1
53 
54 #define DBG_NO_STATE_CACHE 0
55 #define DBG_NO_SURFACE_CACHE 0
56 
57 #define ALWAYS_FLUSH 0
58 
59 #define MAX_3D_SIZE 8192
60 
61 #define GEN5_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
62 
63 /* Set up a default static partitioning of the URB, which is supposed to
64  * allow anything we would want to do, at potentially lower performance.
65  */
66 #define URB_CS_ENTRY_SIZE     1
67 #define URB_CS_ENTRIES	      0
68 
69 #define URB_VS_ENTRY_SIZE     1
70 #define URB_VS_ENTRIES	      256 /* minimum of 8 */
71 
72 #define URB_GS_ENTRY_SIZE     0
73 #define URB_GS_ENTRIES	      0
74 
75 #define URB_CLIP_ENTRY_SIZE   0
76 #define URB_CLIP_ENTRIES      0
77 
78 #define URB_SF_ENTRY_SIZE     2
79 #define URB_SF_ENTRIES	      64
80 
81 /*
82  * this program computes dA/dx and dA/dy for the texture coordinates along
83  * with the base texture coordinate. It was extracted from the Mesa driver
84  */
85 
86 #define SF_KERNEL_NUM_GRF  16
87 #define SF_MAX_THREADS	   48
88 
89 #define PS_KERNEL_NUM_GRF   32
90 #define PS_MAX_THREADS	    72
91 
92 static const uint32_t ps_kernel_packed_bt601_static[][4] = {
93 #include "exa_wm_xy.g5b"
94 #include "exa_wm_src_affine.g5b"
95 #include "exa_wm_src_sample_argb.g5b"
96 #include "exa_wm_yuv_rgb_bt601.g5b"
97 #include "exa_wm_write.g5b"
98 };
99 
100 static const uint32_t ps_kernel_planar_bt601_static[][4] = {
101 #include "exa_wm_xy.g5b"
102 #include "exa_wm_src_affine.g5b"
103 #include "exa_wm_src_sample_planar.g5b"
104 #include "exa_wm_yuv_rgb_bt601.g5b"
105 #include "exa_wm_write.g5b"
106 };
107 
108 static const uint32_t ps_kernel_nv12_bt601_static[][4] = {
109 #include "exa_wm_xy.g5b"
110 #include "exa_wm_src_affine.g5b"
111 #include "exa_wm_src_sample_nv12.g5b"
112 #include "exa_wm_yuv_rgb_bt601.g5b"
113 #include "exa_wm_write.g5b"
114 };
115 
116 static const uint32_t ps_kernel_packed_bt709_static[][4] = {
117 #include "exa_wm_xy.g5b"
118 #include "exa_wm_src_affine.g5b"
119 #include "exa_wm_src_sample_argb.g5b"
120 #include "exa_wm_yuv_rgb_bt709.g5b"
121 #include "exa_wm_write.g5b"
122 };
123 
124 static const uint32_t ps_kernel_planar_bt709_static[][4] = {
125 #include "exa_wm_xy.g5b"
126 #include "exa_wm_src_affine.g5b"
127 #include "exa_wm_src_sample_planar.g5b"
128 #include "exa_wm_yuv_rgb_bt709.g5b"
129 #include "exa_wm_write.g5b"
130 };
131 
132 static const uint32_t ps_kernel_nv12_bt709_static[][4] = {
133 #include "exa_wm_xy.g5b"
134 #include "exa_wm_src_affine.g5b"
135 #include "exa_wm_src_sample_nv12.g5b"
136 #include "exa_wm_yuv_rgb_bt709.g5b"
137 #include "exa_wm_write.g5b"
138 };
139 
140 #define NOKERNEL(kernel_enum, func, masked) \
141     [kernel_enum] = {func, 0, masked}
142 #define KERNEL(kernel_enum, kernel, masked) \
143     [kernel_enum] = {&kernel, sizeof(kernel), masked}
144 static const struct wm_kernel_info {
145 	const void *data;
146 	unsigned int size;
147 	bool has_mask;
148 } wm_kernels[] = {
149 	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
150 	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
151 
152 	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
153 	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
154 
155 	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
156 	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
157 
158 	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
159 	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
160 
161 	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
162 	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
163 
164 	KERNEL(WM_KERNEL_VIDEO_PLANAR_BT601, ps_kernel_planar_bt601_static, false),
165 	KERNEL(WM_KERNEL_VIDEO_NV12_BT601, ps_kernel_nv12_bt601_static, false),
166 	KERNEL(WM_KERNEL_VIDEO_PACKED_BT601, ps_kernel_packed_bt601_static, false),
167 
168 	KERNEL(WM_KERNEL_VIDEO_PLANAR_BT709, ps_kernel_planar_bt709_static, false),
169 	KERNEL(WM_KERNEL_VIDEO_NV12_BT709, ps_kernel_nv12_bt709_static, false),
170 	KERNEL(WM_KERNEL_VIDEO_PACKED_BT709, ps_kernel_packed_bt709_static, false),
171 };
172 #undef KERNEL
173 
174 static const struct blendinfo {
175 	bool src_alpha;
176 	uint32_t src_blend;
177 	uint32_t dst_blend;
178 } gen5_blend_op[] = {
179 	/* Clear */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
180 	/* Src */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
181 	/* Dst */	{0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
182 	/* Over */	{1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
183 	/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
184 	/* In */	{0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
185 	/* InReverse */	{1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
186 	/* Out */	{0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
187 	/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
188 	/* Atop */	{1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
189 	/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
190 	/* Xor */	{1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
191 	/* Add */	{0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
192 };
193 
194 /**
195  * Highest-valued BLENDFACTOR used in gen5_blend_op.
196  *
197  * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
198  * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
199  * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
200  */
201 #define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
202 
203 #define BLEND_OFFSET(s, d) \
204 	(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
205 
206 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
207 	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
208 
209 static bool
210 gen5_emit_pipelined_pointers(struct sna *sna,
211 			     const struct sna_composite_op *op,
212 			     int blend, int kernel);
213 
214 #define OUT_BATCH(v) batch_emit(sna, v)
215 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
216 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
217 
too_large(int width,int height)218 static inline bool too_large(int width, int height)
219 {
220 	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
221 }
222 
223 static int
gen5_choose_composite_kernel(int op,bool has_mask,bool is_ca,bool is_affine)224 gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
225 {
226 	int base;
227 
228 	if (has_mask) {
229 		if (is_ca) {
230 			if (gen5_blend_op[op].src_alpha)
231 				base = WM_KERNEL_MASKSA;
232 			else
233 				base = WM_KERNEL_MASKCA;
234 		} else
235 			base = WM_KERNEL_MASK;
236 	} else
237 		base = WM_KERNEL;
238 
239 	return base + !is_affine;
240 }
241 
gen5_magic_ca_pass(struct sna * sna,const struct sna_composite_op * op)242 static bool gen5_magic_ca_pass(struct sna *sna,
243 			       const struct sna_composite_op *op)
244 {
245 	struct gen5_render_state *state = &sna->render_state.gen5;
246 
247 	if (!op->need_magic_ca_pass)
248 		return false;
249 
250 	assert(sna->render.vertex_index > sna->render.vertex_start);
251 
252 	DBG(("%s: CA fixup\n", __FUNCTION__));
253 	assert(op->mask.bo != NULL);
254 	assert(op->has_component_alpha);
255 
256 	gen5_emit_pipelined_pointers
257 		(sna, op, PictOpAdd,
258 		 gen5_choose_composite_kernel(PictOpAdd,
259 					      true, true, op->is_affine));
260 
261 	OUT_BATCH(GEN5_3DPRIMITIVE |
262 		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
263 		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
264 		  (0 << 9) |
265 		  4);
266 	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
267 	OUT_BATCH(sna->render.vertex_start);
268 	OUT_BATCH(1);	/* single instance */
269 	OUT_BATCH(0);	/* start instance location */
270 	OUT_BATCH(0);	/* index buffer offset, ignored */
271 
272 	state->last_primitive = sna->kgem.nbatch;
273 	return true;
274 }
275 
gen5_get_blend(int op,bool has_component_alpha,uint32_t dst_format)276 static uint32_t gen5_get_blend(int op,
277 			       bool has_component_alpha,
278 			       uint32_t dst_format)
279 {
280 	uint32_t src, dst;
281 
282 	src = gen5_blend_op[op].src_blend;
283 	dst = gen5_blend_op[op].dst_blend;
284 
285 	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
286 	 * it as always 1.
287 	 */
288 	if (PICT_FORMAT_A(dst_format) == 0) {
289 		if (src == GEN5_BLENDFACTOR_DST_ALPHA)
290 			src = GEN5_BLENDFACTOR_ONE;
291 		else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
292 			src = GEN5_BLENDFACTOR_ZERO;
293 	}
294 
295 	/* If the source alpha is being used, then we should only be in a
296 	 * case where the source blend factor is 0, and the source blend
297 	 * value is the mask channels multiplied by the source picture's alpha.
298 	 */
299 	if (has_component_alpha && gen5_blend_op[op].src_alpha) {
300 		if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
301 			dst = GEN5_BLENDFACTOR_SRC_COLOR;
302 		else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
303 			dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
304 	}
305 
306 	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
307 	     op, dst_format, PICT_FORMAT_A(dst_format),
308 	     src, dst, BLEND_OFFSET(src, dst)));
309 	return BLEND_OFFSET(src, dst);
310 }
311 
gen5_get_card_format(PictFormat format)312 static uint32_t gen5_get_card_format(PictFormat format)
313 {
314 	switch (format) {
315 	default:
316 		return -1;
317 	case PICT_a8r8g8b8:
318 		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
319 	case PICT_x8r8g8b8:
320 		return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
321 	case PICT_a8b8g8r8:
322 		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
323 	case PICT_x8b8g8r8:
324 		return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
325 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
326 	case PICT_a2r10g10b10:
327 		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
328 	case PICT_x2r10g10b10:
329 		return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
330 #endif
331 	case PICT_r8g8b8:
332 		return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
333 	case PICT_r5g6b5:
334 		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
335 	case PICT_a1r5g5b5:
336 		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
337 	case PICT_a8:
338 		return GEN5_SURFACEFORMAT_A8_UNORM;
339 	case PICT_a4r4g4b4:
340 		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
341 	}
342 }
343 
gen5_get_dest_format(PictFormat format)344 static uint32_t gen5_get_dest_format(PictFormat format)
345 {
346 	switch (format) {
347 	default:
348 		return -1;
349 	case PICT_a8r8g8b8:
350 	case PICT_x8r8g8b8:
351 		return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
352 	case PICT_a8b8g8r8:
353 	case PICT_x8b8g8r8:
354 		return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
355 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
356 	case PICT_a2r10g10b10:
357 	case PICT_x2r10g10b10:
358 		return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
359 #endif
360 	case PICT_r5g6b5:
361 		return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
362 	case PICT_x1r5g5b5:
363 	case PICT_a1r5g5b5:
364 		return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
365 	case PICT_a8:
366 		return GEN5_SURFACEFORMAT_A8_UNORM;
367 	case PICT_a4r4g4b4:
368 	case PICT_x4r4g4b4:
369 		return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
370 	}
371 }
372 
gen5_check_dst_format(PictFormat format)373 static bool gen5_check_dst_format(PictFormat format)
374 {
375 	if (gen5_get_dest_format(format) != -1)
376 		return true;
377 
378 	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
379 	return false;
380 }
381 
gen5_check_format(uint32_t format)382 static bool gen5_check_format(uint32_t format)
383 {
384 	if (gen5_get_card_format(format) != -1)
385 		return true;
386 
387 	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
388 	return false;
389 }
390 
391 typedef struct gen5_surface_state_padded {
392 	struct gen5_surface_state state;
393 	char pad[32 - sizeof(struct gen5_surface_state)];
394 } gen5_surface_state_padded;
395 
null_create(struct sna_static_stream * stream)396 static void null_create(struct sna_static_stream *stream)
397 {
398 	/* A bunch of zeros useful for legacy border color and depth-stencil */
399 	sna_static_stream_map(stream, 64, 64);
400 }
401 
402 static void
sampler_state_init(struct gen5_sampler_state * sampler_state,sampler_filter_t filter,sampler_extend_t extend)403 sampler_state_init(struct gen5_sampler_state *sampler_state,
404 		   sampler_filter_t filter,
405 		   sampler_extend_t extend)
406 {
407 	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
408 
409 	/* We use the legacy mode to get the semantics specified by
410 	 * the Render extension. */
411 	sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
412 
413 	switch (filter) {
414 	default:
415 	case SAMPLER_FILTER_NEAREST:
416 		sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
417 		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
418 		break;
419 	case SAMPLER_FILTER_BILINEAR:
420 		sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
421 		sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
422 		break;
423 	}
424 
425 	switch (extend) {
426 	default:
427 	case SAMPLER_EXTEND_NONE:
428 		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
429 		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
430 		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
431 		break;
432 	case SAMPLER_EXTEND_REPEAT:
433 		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
434 		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
435 		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
436 		break;
437 	case SAMPLER_EXTEND_PAD:
438 		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
439 		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
440 		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
441 		break;
442 	case SAMPLER_EXTEND_REFLECT:
443 		sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
444 		sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
445 		sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
446 		break;
447 	}
448 }
449 
gen5_filter(uint32_t filter)450 static uint32_t gen5_filter(uint32_t filter)
451 {
452 	switch (filter) {
453 	default:
454 		assert(0);
455 	case PictFilterNearest:
456 		return SAMPLER_FILTER_NEAREST;
457 	case PictFilterBilinear:
458 		return SAMPLER_FILTER_BILINEAR;
459 	}
460 }
461 
gen5_check_filter(PicturePtr picture)462 static uint32_t gen5_check_filter(PicturePtr picture)
463 {
464 	switch (picture->filter) {
465 	case PictFilterNearest:
466 	case PictFilterBilinear:
467 		return true;
468 	default:
469 		DBG(("%s: unknown filter: %x\n", __FUNCTION__, picture->filter));
470 		return false;
471 	}
472 }
473 
gen5_repeat(uint32_t repeat)474 static uint32_t gen5_repeat(uint32_t repeat)
475 {
476 	switch (repeat) {
477 	default:
478 		assert(0);
479 	case RepeatNone:
480 		return SAMPLER_EXTEND_NONE;
481 	case RepeatNormal:
482 		return SAMPLER_EXTEND_REPEAT;
483 	case RepeatPad:
484 		return SAMPLER_EXTEND_PAD;
485 	case RepeatReflect:
486 		return SAMPLER_EXTEND_REFLECT;
487 	}
488 }
489 
gen5_check_repeat(PicturePtr picture)490 static bool gen5_check_repeat(PicturePtr picture)
491 {
492 	if (!picture->repeat)
493 		return true;
494 
495 	switch (picture->repeatType) {
496 	case RepeatNone:
497 	case RepeatNormal:
498 	case RepeatPad:
499 	case RepeatReflect:
500 		return true;
501 	default:
502 		DBG(("%s: unknown repeat: %x\n",
503 		     __FUNCTION__, picture->repeatType));
504 		return false;
505 	}
506 }
507 
508 static uint32_t
gen5_tiling_bits(uint32_t tiling)509 gen5_tiling_bits(uint32_t tiling)
510 {
511 	switch (tiling) {
512 	default: assert(0);
513 	case I915_TILING_NONE: return 0;
514 	case I915_TILING_X: return GEN5_SURFACE_TILED;
515 	case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
516 	}
517 }
518 
519 /**
520  * Sets up the common fields for a surface state buffer for the given
521  * picture in the given surface state buffer.
522  */
523 static uint32_t
gen5_bind_bo(struct sna * sna,struct kgem_bo * bo,uint32_t width,uint32_t height,uint32_t format,bool is_dst)524 gen5_bind_bo(struct sna *sna,
525 	     struct kgem_bo *bo,
526 	     uint32_t width,
527 	     uint32_t height,
528 	     uint32_t format,
529 	     bool is_dst)
530 {
531 	uint32_t domains;
532 	uint16_t offset;
533 	uint32_t *ss;
534 
535 	/* After the first bind, we manage the cache domains within the batch */
536 	if (!DBG_NO_SURFACE_CACHE) {
537 		offset = kgem_bo_get_binding(bo, format | is_dst << 31);
538 		if (offset) {
539 			if (is_dst)
540 				kgem_bo_mark_dirty(bo);
541 			assert(offset >= sna->kgem.surface);
542 			return offset * sizeof(uint32_t);
543 		}
544 	}
545 
546 	offset = sna->kgem.surface -=
547 		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
548 	ss = sna->kgem.batch + offset;
549 
550 	ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
551 		 GEN5_SURFACE_BLEND_ENABLED |
552 		 format << GEN5_SURFACE_FORMAT_SHIFT);
553 
554 	if (is_dst) {
555 		ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
556 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
557 	} else
558 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
559 	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
560 
561 	ss[2] = ((width - 1)  << GEN5_SURFACE_WIDTH_SHIFT |
562 		 (height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
563 	ss[3] = (gen5_tiling_bits(bo->tiling) |
564 		 (bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
565 	ss[4] = 0;
566 	ss[5] = 0;
567 
568 	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
569 
570 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
571 	     offset, bo->handle, ss[1],
572 	     format, width, height, bo->pitch, bo->tiling,
573 	     domains & 0xffff ? "render" : "sampler"));
574 
575 	return offset * sizeof(uint32_t);
576 }
577 
gen5_emit_vertex_buffer(struct sna * sna,const struct sna_composite_op * op)578 static void gen5_emit_vertex_buffer(struct sna *sna,
579 				    const struct sna_composite_op *op)
580 {
581 	int id = op->u.gen5.ve_id;
582 
583 	assert((sna->render.vb_id & (1 << id)) == 0);
584 
585 	OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
586 	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
587 		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
588 	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
589 	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
590 	OUT_BATCH(0);
591 	OUT_BATCH(~0); /* max address: disabled */
592 	OUT_BATCH(0);
593 
594 	sna->render.vb_id |= 1 << id;
595 }
596 
gen5_emit_primitive(struct sna * sna)597 static void gen5_emit_primitive(struct sna *sna)
598 {
599 	if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
600 		sna->render.vertex_offset = sna->kgem.nbatch - 5;
601 		return;
602 	}
603 
604 	OUT_BATCH(GEN5_3DPRIMITIVE |
605 		  GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
606 		  (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
607 		  (0 << 9) |
608 		  4);
609 	sna->render.vertex_offset = sna->kgem.nbatch;
610 	OUT_BATCH(0);	/* vertex count, to be filled in later */
611 	OUT_BATCH(sna->render.vertex_index);
612 	OUT_BATCH(1);	/* single instance */
613 	OUT_BATCH(0);	/* start instance location */
614 	OUT_BATCH(0);	/* index buffer offset, ignored */
615 	sna->render.vertex_start = sna->render.vertex_index;
616 
617 	sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
618 }
619 
gen5_rectangle_begin(struct sna * sna,const struct sna_composite_op * op)620 static bool gen5_rectangle_begin(struct sna *sna,
621 				 const struct sna_composite_op *op)
622 {
623 	int id = op->u.gen5.ve_id;
624 	int ndwords;
625 
626 	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
627 		return true;
628 
629 	ndwords = op->need_magic_ca_pass ? 20 : 6;
630 	if ((sna->render.vb_id & (1 << id)) == 0)
631 		ndwords += 5;
632 
633 	if (!kgem_check_batch(&sna->kgem, ndwords))
634 		return false;
635 
636 	if ((sna->render.vb_id & (1 << id)) == 0)
637 		gen5_emit_vertex_buffer(sna, op);
638 	if (sna->render.vertex_offset == 0)
639 		gen5_emit_primitive(sna);
640 
641 	return true;
642 }
643 
gen5_get_rectangles__flush(struct sna * sna,const struct sna_composite_op * op)644 static int gen5_get_rectangles__flush(struct sna *sna,
645 				      const struct sna_composite_op *op)
646 {
647 	/* Preventing discarding new vbo after lock contention */
648 	if (sna_vertex_wait__locked(&sna->render)) {
649 		int rem = vertex_space(sna);
650 		if (rem > op->floats_per_rect)
651 			return rem;
652 	}
653 
654 	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 40 : 6))
655 		return 0;
656 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
657 		return 0;
658 
659 	if (sna->render.vertex_offset) {
660 		gen4_vertex_flush(sna);
661 		if (gen5_magic_ca_pass(sna, op))
662 			gen5_emit_pipelined_pointers(sna, op, op->op,
663 						     op->u.gen5.wm_kernel);
664 	}
665 
666 	return gen4_vertex_finish(sna);
667 }
668 
gen5_get_rectangles(struct sna * sna,const struct sna_composite_op * op,int want,void (* emit_state)(struct sna * sna,const struct sna_composite_op * op))669 inline static int gen5_get_rectangles(struct sna *sna,
670 				      const struct sna_composite_op *op,
671 				      int want,
672 				      void (*emit_state)(struct sna *sna,
673 							 const struct sna_composite_op *op))
674 {
675 	int rem;
676 
677 	assert(want);
678 
679 start:
680 	rem = vertex_space(sna);
681 	if (unlikely(rem < op->floats_per_rect)) {
682 		DBG(("flushing vbo for %s: %d < %d\n",
683 		     __FUNCTION__, rem, op->floats_per_rect));
684 		rem = gen5_get_rectangles__flush(sna, op);
685 		if (unlikely (rem == 0))
686 			goto flush;
687 	}
688 
689 	if (unlikely(sna->render.vertex_offset == 0)) {
690 		if (!gen5_rectangle_begin(sna, op))
691 			goto flush;
692 		else
693 			goto start;
694 	}
695 
696 	assert(rem <= vertex_space(sna));
697 	assert(op->floats_per_rect <= rem);
698 	if (want > 1 && want * op->floats_per_rect > rem)
699 		want = rem / op->floats_per_rect;
700 
701 	sna->render.vertex_index += 3*want;
702 	return want;
703 
704 flush:
705 	if (sna->render.vertex_offset) {
706 		gen4_vertex_flush(sna);
707 		gen5_magic_ca_pass(sna, op);
708 	}
709 	sna_vertex_wait__locked(&sna->render);
710 	_kgem_submit(&sna->kgem);
711 	emit_state(sna, op);
712 	goto start;
713 }
714 
715 static uint32_t *
gen5_composite_get_binding_table(struct sna * sna,uint16_t * offset)716 gen5_composite_get_binding_table(struct sna *sna,
717 				 uint16_t *offset)
718 {
719 	sna->kgem.surface -=
720 		sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
721 
722 	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
723 
724 	/* Clear all surplus entries to zero in case of prefetch */
725 	*offset = sna->kgem.surface;
726 	return memset(sna->kgem.batch + sna->kgem.surface,
727 		      0, sizeof(struct gen5_surface_state_padded));
728 }
729 
730 static void
gen5_emit_urb(struct sna * sna)731 gen5_emit_urb(struct sna *sna)
732 {
733 	int urb_vs_start, urb_vs_size;
734 	int urb_gs_start, urb_gs_size;
735 	int urb_clip_start, urb_clip_size;
736 	int urb_sf_start, urb_sf_size;
737 	int urb_cs_start, urb_cs_size;
738 
739 	urb_vs_start = 0;
740 	urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
741 	urb_gs_start = urb_vs_start + urb_vs_size;
742 	urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
743 	urb_clip_start = urb_gs_start + urb_gs_size;
744 	urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
745 	urb_sf_start = urb_clip_start + urb_clip_size;
746 	urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
747 	urb_cs_start = urb_sf_start + urb_sf_size;
748 	urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
749 
750 	OUT_BATCH(GEN5_URB_FENCE |
751 		  UF0_CS_REALLOC |
752 		  UF0_SF_REALLOC |
753 		  UF0_CLIP_REALLOC |
754 		  UF0_GS_REALLOC |
755 		  UF0_VS_REALLOC |
756 		  1);
757 	OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
758 		  ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
759 		  ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
760 	OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
761 		  ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
762 
763 	/* Constant buffer state */
764 	OUT_BATCH(GEN5_CS_URB_STATE | 0);
765 	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
766 }
767 
768 static void
gen5_emit_state_base_address(struct sna * sna)769 gen5_emit_state_base_address(struct sna *sna)
770 {
771 	assert(sna->render_state.gen5.general_bo->proxy == NULL);
772 	OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
773 	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
774 				 sna->kgem.nbatch,
775 				 sna->render_state.gen5.general_bo,
776 				 I915_GEM_DOMAIN_INSTRUCTION << 16,
777 				 BASE_ADDRESS_MODIFY));
778 	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
779 				 sna->kgem.nbatch,
780 				 NULL,
781 				 I915_GEM_DOMAIN_INSTRUCTION << 16,
782 				 BASE_ADDRESS_MODIFY));
783 	OUT_BATCH(0); /* media */
784 	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
785 				 sna->kgem.nbatch,
786 				 sna->render_state.gen5.general_bo,
787 				 I915_GEM_DOMAIN_INSTRUCTION << 16,
788 				 BASE_ADDRESS_MODIFY));
789 
790 	/* upper bounds, all disabled */
791 	OUT_BATCH(BASE_ADDRESS_MODIFY);
792 	OUT_BATCH(0);
793 	OUT_BATCH(BASE_ADDRESS_MODIFY);
794 }
795 
796 static void
gen5_emit_invariant(struct sna * sna)797 gen5_emit_invariant(struct sna *sna)
798 {
799 	/* Ironlake errata workaround: Before disabling the clipper,
800 	 * you have to MI_FLUSH to get the pipeline idle.
801 	 *
802 	 * However, the kernel flushes the pipeline between batches,
803 	 * so we should be safe....
804 	 *
805 	 * On the other hand, after using BLT we must use a non-pipelined
806 	 * operation...
807 	 */
808 	if (sna->kgem.nreloc)
809 		OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
810 
811 	OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
812 
813 	gen5_emit_state_base_address(sna);
814 
815 	sna->render_state.gen5.needs_invariant = false;
816 }
817 
818 static void
gen5_get_batch(struct sna * sna,const struct sna_composite_op * op)819 gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
820 {
821 	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
822 
823 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
824 		DBG(("%s: flushing batch: %d < %d+%d\n",
825 		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
826 		     150, 4*8));
827 		kgem_submit(&sna->kgem);
828 		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
829 	}
830 
831 	if (sna->render_state.gen5.needs_invariant)
832 		gen5_emit_invariant(sna);
833 }
834 
835 static void
gen5_align_vertex(struct sna * sna,const struct sna_composite_op * op)836 gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
837 {
838 	assert(op->floats_per_rect == 3*op->floats_per_vertex);
839 	if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
840 		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
841 		     sna->render_state.gen5.floats_per_vertex,
842 		     op->floats_per_vertex));
843 		gen4_vertex_align(sna, op);
844 		sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
845 	}
846 }
847 
848 static void
gen5_emit_binding_table(struct sna * sna,uint16_t offset)849 gen5_emit_binding_table(struct sna *sna, uint16_t offset)
850 {
851 	if (!DBG_NO_STATE_CACHE &&
852 	    sna->render_state.gen5.surface_table == offset)
853 		return;
854 
855 	sna->render_state.gen5.surface_table = offset;
856 
857 	/* Binding table pointers */
858 	OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
859 	OUT_BATCH(0);		/* vs */
860 	OUT_BATCH(0);		/* gs */
861 	OUT_BATCH(0);		/* clip */
862 	OUT_BATCH(0);		/* sf */
863 	/* Only the PS uses the binding table */
864 	OUT_BATCH(offset*4);
865 }
866 
867 static bool
gen5_emit_pipelined_pointers(struct sna * sna,const struct sna_composite_op * op,int blend,int kernel)868 gen5_emit_pipelined_pointers(struct sna *sna,
869 			     const struct sna_composite_op *op,
870 			     int blend, int kernel)
871 {
872 	uint16_t sp, bp;
873 	uint32_t key;
874 
875 	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
876 	     __FUNCTION__, op->u.gen5.ve_id & 2,
877 	     op->src.filter, op->src.repeat,
878 	     op->mask.filter, op->mask.repeat,
879 	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
880 
881 	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
882 			    op->mask.filter, op->mask.repeat,
883 			    kernel);
884 	bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
885 
886 	key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
887 	DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
888 	     __FUNCTION__, sp, bp, key,
889 	     sna->render_state.gen5.last_pipelined_pointers & 0xffff,
890 	     (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
891 	     sna->render_state.gen5.last_pipelined_pointers));
892 	if (key == sna->render_state.gen5.last_pipelined_pointers)
893 		return false;
894 
895 	OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
896 	OUT_BATCH(sna->render_state.gen5.vs);
897 	OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
898 	OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
899 	OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
900 	OUT_BATCH(sna->render_state.gen5.wm + sp);
901 	OUT_BATCH(sna->render_state.gen5.cc + bp);
902 
903 	bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
904 	sna->render_state.gen5.last_pipelined_pointers = key;
905 
906 	gen5_emit_urb(sna);
907 
908 	return bp;
909 }
910 
911 static bool
gen5_emit_drawing_rectangle(struct sna * sna,const struct sna_composite_op * op)912 gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
913 {
914 	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
915 	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
916 
917 	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
918 	assert(!too_large(op->dst.width, op->dst.height));
919 
920 	if (!DBG_NO_STATE_CACHE &&
921 	    sna->render_state.gen5.drawrect_limit == limit &&
922 	    sna->render_state.gen5.drawrect_offset == offset)
923 		return false;
924 
925 	sna->render_state.gen5.drawrect_offset = offset;
926 	sna->render_state.gen5.drawrect_limit = limit;
927 
928 	OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
929 	OUT_BATCH(0x00000000);
930 	OUT_BATCH(limit);
931 	OUT_BATCH(offset);
932 	return true;
933 }
934 
935 static void
gen5_emit_vertex_elements(struct sna * sna,const struct sna_composite_op * op)936 gen5_emit_vertex_elements(struct sna *sna,
937 			  const struct sna_composite_op *op)
938 {
939 	/*
940 	 * vertex data in vertex buffer
941 	 *    position: (x, y)
942 	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
943 	 *    texture coordinate 1 if (has_mask is true): same as above
944 	 */
945 	struct gen5_render_state *render = &sna->render_state.gen5;
946 	int id = op->u.gen5.ve_id;
947 	bool has_mask = id >> 2;
948 	uint32_t format, dw;
949 
950 	if (!DBG_NO_STATE_CACHE && render->ve_id == id)
951 		return;
952 
953 	DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
954 	render->ve_id = id;
955 
956 	/* The VUE layout
957 	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
958 	 *    dword 4-7: position (x, y, 1.0, 1.0),
959 	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
960 	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
961 	 *
962 	 * dword 4-15 are fetched from vertex buffer
963 	 */
964 	OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
965 		((2 * (has_mask ? 4 : 3)) + 1 - 2));
966 
967 	OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
968 		  (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
969 		  (0 << VE0_OFFSET_SHIFT));
970 	OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
971 		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
972 		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
973 		  (VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
974 
975 	/* x,y */
976 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
977 		  GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
978 		  0 << VE0_OFFSET_SHIFT);
979 	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
980 		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
981 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
982 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
983 
984 	/* u0, v0, w0 */
985 	DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
986 	     id, id & 3));
987 	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
988 	switch (id & 3) {
989 	default:
990 		assert(0);
991 	case 0:
992 		format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
993 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
994 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
995 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
996 		break;
997 	case 1:
998 		format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
999 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1000 		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1001 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1002 		break;
1003 	case 2:
1004 		format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
1005 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1006 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1007 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1008 		break;
1009 	case 3:
1010 		format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
1011 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1012 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1013 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1014 		break;
1015 	}
1016 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1017 		  format | 4 << VE0_OFFSET_SHIFT);
1018 	OUT_BATCH(dw);
1019 
1020 	/* u1, v1, w1 */
1021 	if (has_mask) {
1022 		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1023 		DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
1024 		     id, id >> 2, offset));
1025 		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1026 		switch (id >> 2) {
1027 		case 1:
1028 			format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
1029 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1030 			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1031 			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1032 			break;
1033 		default:
1034 			assert(0);
1035 		case 2:
1036 			format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
1037 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1038 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1039 			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1040 			break;
1041 		case 3:
1042 			format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
1043 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1044 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1045 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1046 			break;
1047 		}
1048 		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1049 			  format | offset << VE0_OFFSET_SHIFT);
1050 		OUT_BATCH(dw);
1051 	}
1052 }
1053 
1054 inline static void
gen5_emit_pipe_flush(struct sna * sna)1055 gen5_emit_pipe_flush(struct sna *sna)
1056 {
1057 #if 1
1058 	OUT_BATCH(GEN5_PIPE_CONTROL |
1059 		  GEN5_PIPE_CONTROL_WC_FLUSH |
1060 		  (4 - 2));
1061 	OUT_BATCH(0);
1062 	OUT_BATCH(0);
1063 	OUT_BATCH(0);
1064 #else
1065 	OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
1066 #endif
1067 }
1068 
1069 static void
gen5_emit_state(struct sna * sna,const struct sna_composite_op * op,uint16_t offset)1070 gen5_emit_state(struct sna *sna,
1071 		const struct sna_composite_op *op,
1072 		uint16_t offset)
1073 {
1074 	bool flush = false;
1075 
1076 	assert(op->dst.bo->exec);
1077 
1078 	/* drawrect must be first for Ironlake BLT workaround */
1079 	if (gen5_emit_drawing_rectangle(sna, op))
1080 		offset &= ~1;
1081 	gen5_emit_binding_table(sna, offset & ~1);
1082 	if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
1083 		DBG(("%s: changed blend state, flush required? %d\n",
1084 		     __FUNCTION__, (offset & 1) && op->op > PictOpSrc));
1085 		flush = (offset & 1) && op->op > PictOpSrc;
1086 	}
1087 	gen5_emit_vertex_elements(sna, op);
1088 
1089 	if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1090 		DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
1091 		     kgem_bo_is_dirty(op->src.bo),
1092 		     kgem_bo_is_dirty(op->mask.bo)));
1093 		OUT_BATCH(MI_FLUSH);
1094 		kgem_clear_dirty(&sna->kgem);
1095 		kgem_bo_mark_dirty(op->dst.bo);
1096 		flush = false;
1097 	}
1098 	if (flush) {
1099 		DBG(("%s: forcing flush\n", __FUNCTION__));
1100 		gen5_emit_pipe_flush(sna);
1101 	}
1102 }
1103 
gen5_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1104 static void gen5_bind_surfaces(struct sna *sna,
1105 			       const struct sna_composite_op *op)
1106 {
1107 	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1108 	uint32_t *binding_table;
1109 	uint16_t offset;
1110 
1111 	gen5_get_batch(sna, op);
1112 
1113 	binding_table = gen5_composite_get_binding_table(sna, &offset);
1114 
1115 	binding_table[0] =
1116 		gen5_bind_bo(sna,
1117 			    op->dst.bo, op->dst.width, op->dst.height,
1118 			    gen5_get_dest_format(op->dst.format),
1119 			    true);
1120 	binding_table[1] =
1121 		gen5_bind_bo(sna,
1122 			     op->src.bo, op->src.width, op->src.height,
1123 			     op->src.card_format,
1124 			     false);
1125 	if (op->mask.bo) {
1126 		assert(op->u.gen5.ve_id >> 2);
1127 		binding_table[2] =
1128 			gen5_bind_bo(sna,
1129 				     op->mask.bo,
1130 				     op->mask.width,
1131 				     op->mask.height,
1132 				     op->mask.card_format,
1133 				     false);
1134 	}
1135 
1136 	if (sna->kgem.surface == offset &&
1137 	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
1138 	    (op->mask.bo == NULL ||
1139 	     sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
1140 		sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1141 		offset = sna->render_state.gen5.surface_table;
1142 	}
1143 
1144 	gen5_emit_state(sna, op, offset | dirty);
1145 }
1146 
1147 fastcall static void
gen5_render_composite_blt(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1148 gen5_render_composite_blt(struct sna *sna,
1149 			  const struct sna_composite_op *op,
1150 			  const struct sna_composite_rectangles *r)
1151 {
1152 	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1153 	     __FUNCTION__,
1154 	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1155 	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1156 	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1157 	     r->width, r->height));
1158 
1159 	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1160 	op->prim_emit(sna, op, r);
1161 }
1162 
1163 fastcall static void
gen5_render_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1164 gen5_render_composite_box(struct sna *sna,
1165 			  const struct sna_composite_op *op,
1166 			  const BoxRec *box)
1167 {
1168 	struct sna_composite_rectangles r;
1169 
1170 	DBG(("  %s: (%d, %d), (%d, %d)\n",
1171 	     __FUNCTION__,
1172 	     box->x1, box->y1, box->x2, box->y2));
1173 
1174 	gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
1175 
1176 	r.dst.x = box->x1;
1177 	r.dst.y = box->y1;
1178 	r.width  = box->x2 - box->x1;
1179 	r.height = box->y2 - box->y1;
1180 	r.mask = r.src = r.dst;
1181 
1182 	op->prim_emit(sna, op, &r);
1183 }
1184 
1185 static void
gen5_render_composite_boxes__blt(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1186 gen5_render_composite_boxes__blt(struct sna *sna,
1187 				 const struct sna_composite_op *op,
1188 				 const BoxRec *box, int nbox)
1189 {
1190 	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1191 	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1192 	     op->src.offset[0], op->src.offset[1],
1193 	     op->src.width, op->src.height,
1194 	     op->mask.offset[0], op->mask.offset[1],
1195 	     op->mask.width, op->mask.height));
1196 
1197 	do {
1198 		int nbox_this_time;
1199 
1200 		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1201 						     gen5_bind_surfaces);
1202 		nbox -= nbox_this_time;
1203 
1204 		do {
1205 			struct sna_composite_rectangles r;
1206 
1207 			DBG(("  %s: (%d, %d), (%d, %d)\n",
1208 			     __FUNCTION__,
1209 			     box->x1, box->y1, box->x2, box->y2));
1210 
1211 			r.dst.x = box->x1;
1212 			r.dst.y = box->y1;
1213 			r.width  = box->x2 - box->x1;
1214 			r.height = box->y2 - box->y1;
1215 			r.mask = r.src = r.dst;
1216 			op->prim_emit(sna, op, &r);
1217 			box++;
1218 		} while (--nbox_this_time);
1219 	} while (nbox);
1220 }
1221 
1222 static void
gen5_render_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1223 gen5_render_composite_boxes(struct sna *sna,
1224 			    const struct sna_composite_op *op,
1225 			    const BoxRec *box, int nbox)
1226 {
1227 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1228 
1229 	do {
1230 		int nbox_this_time;
1231 		float *v;
1232 
1233 		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1234 						     gen5_bind_surfaces);
1235 		assert(nbox_this_time);
1236 		nbox -= nbox_this_time;
1237 
1238 		v = sna->render.vertices + sna->render.vertex_used;
1239 		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1240 
1241 		op->emit_boxes(op, box, nbox_this_time, v);
1242 		box += nbox_this_time;
1243 	} while (nbox);
1244 }
1245 
1246 static void
gen5_render_composite_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1247 gen5_render_composite_boxes__thread(struct sna *sna,
1248 				    const struct sna_composite_op *op,
1249 				    const BoxRec *box, int nbox)
1250 {
1251 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1252 
1253 	sna_vertex_lock(&sna->render);
1254 	do {
1255 		int nbox_this_time;
1256 		float *v;
1257 
1258 		nbox_this_time = gen5_get_rectangles(sna, op, nbox,
1259 						     gen5_bind_surfaces);
1260 		assert(nbox_this_time);
1261 		nbox -= nbox_this_time;
1262 
1263 		v = sna->render.vertices + sna->render.vertex_used;
1264 		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1265 
1266 		sna_vertex_acquire__locked(&sna->render);
1267 		sna_vertex_unlock(&sna->render);
1268 
1269 		op->emit_boxes(op, box, nbox_this_time, v);
1270 		box += nbox_this_time;
1271 
1272 		sna_vertex_lock(&sna->render);
1273 		sna_vertex_release__locked(&sna->render);
1274 	} while (nbox);
1275 	sna_vertex_unlock(&sna->render);
1276 }
1277 
1278 #ifndef MAX
1279 #define MAX(a,b) ((a) > (b) ? (a) : (b))
1280 #endif
1281 
gen5_bind_video_source(struct sna * sna,struct kgem_bo * src_bo,uint32_t src_offset,int src_width,int src_height,int src_pitch,uint32_t src_surf_format)1282 static uint32_t gen5_bind_video_source(struct sna *sna,
1283 				       struct kgem_bo *src_bo,
1284 				       uint32_t src_offset,
1285 				       int src_width,
1286 				       int src_height,
1287 				       int src_pitch,
1288 				       uint32_t src_surf_format)
1289 {
1290 	struct gen5_surface_state *ss;
1291 
1292 	sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
1293 
1294 	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1295 	ss->ss0.surface_type = GEN5_SURFACE_2D;
1296 	ss->ss0.surface_format = src_surf_format;
1297 	ss->ss0.color_blend = 1;
1298 
1299 	ss->ss1.base_addr =
1300 		kgem_add_reloc(&sna->kgem,
1301 			       sna->kgem.surface + 1,
1302 			       src_bo,
1303 			       I915_GEM_DOMAIN_SAMPLER << 16,
1304 			       src_offset);
1305 
1306 	ss->ss2.width  = src_width - 1;
1307 	ss->ss2.height = src_height - 1;
1308 	ss->ss3.pitch  = src_pitch - 1;
1309 
1310 	return sna->kgem.surface * sizeof(uint32_t);
1311 }
1312 
gen5_video_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1313 static void gen5_video_bind_surfaces(struct sna *sna,
1314 				     const struct sna_composite_op *op)
1315 {
1316 	bool dirty = kgem_bo_is_dirty(op->dst.bo);
1317 	struct sna_video_frame *frame = op->priv;
1318 	uint32_t src_surf_format[6];
1319 	uint32_t src_surf_base[6];
1320 	int src_width[6];
1321 	int src_height[6];
1322 	int src_pitch[6];
1323 	uint32_t *binding_table;
1324 	uint16_t offset;
1325 	int n_src, n;
1326 
1327 	src_surf_base[0] = 0;
1328 	src_surf_base[1] = 0;
1329 	src_surf_base[2] = frame->VBufOffset;
1330 	src_surf_base[3] = frame->VBufOffset;
1331 	src_surf_base[4] = frame->UBufOffset;
1332 	src_surf_base[5] = frame->UBufOffset;
1333 
1334 	if (is_planar_fourcc(frame->id)) {
1335 		for (n = 0; n < 2; n++) {
1336 			src_surf_format[n] = GEN5_SURFACEFORMAT_R8_UNORM;
1337 			src_width[n]  = frame->width;
1338 			src_height[n] = frame->height;
1339 			src_pitch[n]  = frame->pitch[1];
1340 		}
1341 		for (; n < 6; n++) {
1342 			if (is_nv12_fourcc(frame->id))
1343 				src_surf_format[n] = GEN5_SURFACEFORMAT_R8G8_UNORM;
1344 			else
1345 				src_surf_format[n] = GEN5_SURFACEFORMAT_R8_UNORM;
1346 			src_width[n]  = frame->width / 2;
1347 			src_height[n] = frame->height / 2;
1348 			src_pitch[n]  = frame->pitch[0];
1349 		}
1350 		n_src = 6;
1351 	} else {
1352 		if (frame->id == FOURCC_UYVY)
1353 			src_surf_format[0] = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
1354 		else
1355 			src_surf_format[0] = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
1356 
1357 		src_width[0]  = frame->width;
1358 		src_height[0] = frame->height;
1359 		src_pitch[0]  = frame->pitch[0];
1360 		n_src = 1;
1361 	}
1362 
1363 	gen5_get_batch(sna, op);
1364 
1365 	binding_table = gen5_composite_get_binding_table(sna, &offset);
1366 	binding_table[0] =
1367 		gen5_bind_bo(sna,
1368 			     op->dst.bo, op->dst.width, op->dst.height,
1369 			     gen5_get_dest_format(op->dst.format),
1370 			     true);
1371 	for (n = 0; n < n_src; n++) {
1372 		binding_table[1+n] =
1373 			gen5_bind_video_source(sna,
1374 					       frame->bo,
1375 					       src_surf_base[n],
1376 					       src_width[n],
1377 					       src_height[n],
1378 					       src_pitch[n],
1379 					       src_surf_format[n]);
1380 	}
1381 
1382 	gen5_emit_state(sna, op, offset | dirty);
1383 }
1384 
select_video_kernel(const struct sna_video * video,const struct sna_video_frame * frame)1385 static unsigned select_video_kernel(const struct sna_video *video,
1386 				    const struct sna_video_frame *frame)
1387 {
1388 	switch (frame->id) {
1389 	case FOURCC_YV12:
1390 	case FOURCC_I420:
1391 	case FOURCC_XVMC:
1392 		return video->colorspace ?
1393 			WM_KERNEL_VIDEO_PLANAR_BT709 :
1394 			WM_KERNEL_VIDEO_PLANAR_BT601;
1395 
1396 	case FOURCC_NV12:
1397 		return video->colorspace ?
1398 			WM_KERNEL_VIDEO_NV12_BT709 :
1399 			WM_KERNEL_VIDEO_NV12_BT601;
1400 
1401 	default:
1402 		return video->colorspace ?
1403 			WM_KERNEL_VIDEO_PACKED_BT709 :
1404 			WM_KERNEL_VIDEO_PACKED_BT601;
1405 	}
1406 }
1407 
1408 static bool
gen5_render_video(struct sna * sna,struct sna_video * video,struct sna_video_frame * frame,RegionPtr dstRegion,PixmapPtr pixmap)1409 gen5_render_video(struct sna *sna,
1410 		  struct sna_video *video,
1411 		  struct sna_video_frame *frame,
1412 		  RegionPtr dstRegion,
1413 		  PixmapPtr pixmap)
1414 {
1415 	struct sna_composite_op tmp;
1416 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1417 	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1418 	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1419 	int src_width = frame->src.x2 - frame->src.x1;
1420 	int src_height = frame->src.y2 - frame->src.y1;
1421 	float src_offset_x, src_offset_y;
1422 	float src_scale_x, src_scale_y;
1423 	const BoxRec *box;
1424 	int nbox;
1425 
1426 	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1427 	     src_width, src_height, dst_width, dst_height));
1428 
1429 	assert(priv->gpu_bo);
1430 	memset(&tmp, 0, sizeof(tmp));
1431 
1432 	tmp.op = PictOpSrc;
1433 	tmp.dst.pixmap = pixmap;
1434 	tmp.dst.width  = pixmap->drawable.width;
1435 	tmp.dst.height = pixmap->drawable.height;
1436 	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1437 	tmp.dst.bo = priv->gpu_bo;
1438 
1439 	if (src_width == dst_width && src_height == dst_height)
1440 		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1441 	else
1442 		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1443 	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1444 	tmp.src.bo = frame->bo;
1445 	tmp.mask.bo = NULL;
1446 	tmp.u.gen5.wm_kernel = select_video_kernel(video, frame);
1447 	tmp.u.gen5.ve_id = 2;
1448 	tmp.is_affine = true;
1449 	tmp.floats_per_vertex = 3;
1450 	tmp.floats_per_rect = 9;
1451 	tmp.priv = frame;
1452 
1453 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1454 		kgem_submit(&sna->kgem);
1455 		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1456 			return false;
1457 	}
1458 
1459 	gen5_align_vertex(sna, &tmp);
1460 	gen5_video_bind_surfaces(sna, &tmp);
1461 
1462 	src_scale_x = (float)src_width / dst_width / frame->width;
1463 	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1464 
1465 	src_scale_y = (float)src_height / dst_height / frame->height;
1466 	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1467 
1468 	box = region_rects(dstRegion);
1469 	nbox = region_num_rects(dstRegion);
1470 	while (nbox--) {
1471 		gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
1472 
1473 		OUT_VERTEX(box->x2, box->y2);
1474 		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1475 		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1476 
1477 		OUT_VERTEX(box->x1, box->y2);
1478 		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1479 		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1480 
1481 		OUT_VERTEX(box->x1, box->y1);
1482 		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1483 		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1484 
1485 		box++;
1486 	}
1487 	gen4_vertex_flush(sna);
1488 
1489 	if (!DAMAGE_IS_ALL(priv->gpu_damage))
1490 		sna_damage_add(&priv->gpu_damage, dstRegion);
1491 
1492 	return true;
1493 }
1494 
1495 static int
gen5_composite_picture(struct sna * sna,PicturePtr picture,struct sna_composite_channel * channel,int x,int y,int w,int h,int dst_x,int dst_y,bool precise)1496 gen5_composite_picture(struct sna *sna,
1497 		       PicturePtr picture,
1498 		       struct sna_composite_channel *channel,
1499 		       int x, int y,
1500 		       int w, int h,
1501 		       int dst_x, int dst_y,
1502 		       bool precise)
1503 {
1504 	PixmapPtr pixmap;
1505 	uint32_t color;
1506 	int16_t dx, dy;
1507 
1508 	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1509 	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1510 
1511 	channel->is_solid = false;
1512 	channel->card_format = -1;
1513 
1514 	if (sna_picture_is_solid(picture, &color))
1515 		return gen4_channel_init_solid(sna, channel, color);
1516 
1517 	if (picture->pDrawable == NULL) {
1518 		int ret;
1519 
1520 		if (picture->pSourcePict->type == SourcePictTypeLinear)
1521 			return gen4_channel_init_linear(sna, picture, channel,
1522 							x, y,
1523 							w, h,
1524 							dst_x, dst_y);
1525 
1526 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1527 		ret = -1;
1528 		if (!precise)
1529 			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1530 								      x, y, w, h, dst_x, dst_y);
1531 		if (ret == -1)
1532 			ret = sna_render_picture_fixup(sna, picture, channel,
1533 						       x, y, w, h, dst_x, dst_y);
1534 		return ret;
1535 	}
1536 
1537 	if (picture->alphaMap) {
1538 		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1539 		return sna_render_picture_fixup(sna, picture, channel,
1540 						x, y, w, h, dst_x, dst_y);
1541 	}
1542 
1543 	if (!gen5_check_repeat(picture))
1544 		return sna_render_picture_fixup(sna, picture, channel,
1545 						x, y, w, h, dst_x, dst_y);
1546 
1547 	if (!gen5_check_filter(picture))
1548 		return sna_render_picture_fixup(sna, picture, channel,
1549 						x, y, w, h, dst_x, dst_y);
1550 
1551 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1552 	channel->filter = picture->filter;
1553 
1554 	pixmap = get_drawable_pixmap(picture->pDrawable);
1555 	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1556 
1557 	x += dx + picture->pDrawable->x;
1558 	y += dy + picture->pDrawable->y;
1559 
1560 	channel->is_affine = sna_transform_is_affine(picture->transform);
1561 	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1562 		DBG(("%s: integer translation (%d, %d), removing\n",
1563 		     __FUNCTION__, dx, dy));
1564 		x += dx;
1565 		y += dy;
1566 		channel->transform = NULL;
1567 		channel->filter = PictFilterNearest;
1568 
1569 		if (channel->repeat ||
1570 		    (x >= 0 &&
1571 		     y >= 0 &&
1572 		     x + w <= pixmap->drawable.width &&
1573 		     y + h <= pixmap->drawable.height)) {
1574 			struct sna_pixmap *priv = sna_pixmap(pixmap);
1575 			if (priv && priv->clear) {
1576 				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1577 				return gen4_channel_init_solid(sna, channel, solid_color(picture->format, priv->clear_color));
1578 			}
1579 		}
1580 	} else
1581 		channel->transform = picture->transform;
1582 
1583 	channel->pict_format = picture->format;
1584 	channel->card_format = gen5_get_card_format(picture->format);
1585 	if (channel->card_format == -1)
1586 		return sna_render_picture_convert(sna, picture, channel, pixmap,
1587 						  x, y, w, h, dst_x, dst_y,
1588 						  false);
1589 
1590 	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1591 		return sna_render_picture_extract(sna, picture, channel,
1592 						  x, y, w, h, dst_x, dst_y);
1593 
1594 	DBG(("%s: pixmap, repeat=%d, filter=%d, transform?=%d [affine? %d], format=%08x\n",
1595 	     __FUNCTION__,
1596 	     channel->repeat, channel->filter,
1597 	     channel->transform != NULL, channel->is_affine,
1598 	     channel->pict_format));
1599 	if (channel->transform) {
1600 		DBG(("%s: transform=[%f %f %f, %f %f %f, %f %f %f]\n",
1601 		     __FUNCTION__,
1602 		     channel->transform->matrix[0][0] / 65536.,
1603 		     channel->transform->matrix[0][1] / 65536.,
1604 		     channel->transform->matrix[0][2] / 65536.,
1605 		     channel->transform->matrix[1][0] / 65536.,
1606 		     channel->transform->matrix[1][1] / 65536.,
1607 		     channel->transform->matrix[1][2] / 65536.,
1608 		     channel->transform->matrix[2][0] / 65536.,
1609 		     channel->transform->matrix[2][1] / 65536.,
1610 		     channel->transform->matrix[2][2] / 65536.));
1611 	}
1612 
1613 	return sna_render_pixmap_bo(sna, channel, pixmap,
1614 				    x, y, w, h, dst_x, dst_y);
1615 }
1616 
gen5_composite_channel_convert(struct sna_composite_channel * channel)1617 static void gen5_composite_channel_convert(struct sna_composite_channel *channel)
1618 {
1619 	channel->repeat = gen5_repeat(channel->repeat);
1620 	channel->filter = gen5_filter(channel->filter);
1621 	if (channel->card_format == (unsigned)-1)
1622 		channel->card_format = gen5_get_card_format(channel->pict_format);
1623 }
1624 
1625 static void
gen5_render_composite_done(struct sna * sna,const struct sna_composite_op * op)1626 gen5_render_composite_done(struct sna *sna,
1627 			   const struct sna_composite_op *op)
1628 {
1629 	if (sna->render.vertex_offset) {
1630 		gen4_vertex_flush(sna);
1631 		gen5_magic_ca_pass(sna,op);
1632 	}
1633 
1634 	DBG(("%s()\n", __FUNCTION__));
1635 
1636 	if (op->mask.bo)
1637 		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1638 	if (op->src.bo)
1639 		kgem_bo_destroy(&sna->kgem, op->src.bo);
1640 
1641 	sna_render_composite_redirect_done(sna, op);
1642 }
1643 
1644 static bool
gen5_composite_set_target(struct sna * sna,struct sna_composite_op * op,PicturePtr dst,int x,int y,int w,int h,bool partial)1645 gen5_composite_set_target(struct sna *sna,
1646 			  struct sna_composite_op *op,
1647 			  PicturePtr dst,
1648 			  int x, int y, int w, int h,
1649 			  bool partial)
1650 {
1651 	BoxRec box;
1652 	unsigned hint;
1653 
1654 	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1655 	op->dst.width  = op->dst.pixmap->drawable.width;
1656 	op->dst.height = op->dst.pixmap->drawable.height;
1657 	op->dst.format = dst->format;
1658 	if (w && h) {
1659 		box.x1 = x;
1660 		box.y1 = y;
1661 		box.x2 = x + w;
1662 		box.y2 = y + h;
1663 	} else
1664 		sna_render_picture_extents(dst, &box);
1665 
1666 	hint = PREFER_GPU | RENDER_GPU;
1667 	if (!need_tiling(sna, op->dst.width, op->dst.height))
1668 		hint |= FORCE_GPU;
1669 	if (!partial) {
1670 		hint |= IGNORE_DAMAGE;
1671 		if (w == op->dst.width && h == op->dst.height)
1672 			hint |= REPLACES;
1673 	}
1674 
1675 	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1676 	if (op->dst.bo == NULL)
1677 		return false;
1678 
1679 	if (hint & REPLACES) {
1680 		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1681 		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1682 	}
1683 
1684 	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1685 			    &op->dst.x, &op->dst.y);
1686 
1687 	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1688 	     __FUNCTION__,
1689 	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1690 	     op->dst.width, op->dst.height,
1691 	     op->dst.bo->pitch,
1692 	     op->dst.x, op->dst.y,
1693 	     op->damage ? *op->damage : (void *)-1));
1694 
1695 	assert(op->dst.bo->proxy == NULL);
1696 
1697 	if (too_large(op->dst.width, op->dst.height) &&
1698 	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1699 		return false;
1700 
1701 	return true;
1702 }
1703 
1704 static bool
is_gradient(PicturePtr picture,bool precise)1705 is_gradient(PicturePtr picture, bool precise)
1706 {
1707 	if (picture->pDrawable)
1708 		return false;
1709 
1710 	switch (picture->pSourcePict->type) {
1711 	case SourcePictTypeSolidFill:
1712 	case SourcePictTypeLinear:
1713 		return false;
1714 	default:
1715 		return precise;
1716 	}
1717 }
1718 
1719 static bool
has_alphamap(PicturePtr p)1720 has_alphamap(PicturePtr p)
1721 {
1722 	return p->alphaMap != NULL;
1723 }
1724 
1725 static bool
need_upload(struct sna * sna,PicturePtr p)1726 need_upload(struct sna *sna, PicturePtr p)
1727 {
1728 	return p->pDrawable && untransformed(p) &&
1729 		!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1730 }
1731 
1732 static bool
source_is_busy(PixmapPtr pixmap)1733 source_is_busy(PixmapPtr pixmap)
1734 {
1735 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1736 	if (priv == NULL)
1737 		return false;
1738 
1739 	if (priv->clear)
1740 		return false;
1741 
1742 	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1743 		return true;
1744 
1745 	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1746 		return true;
1747 
1748 	return priv->gpu_damage && !priv->cpu_damage;
1749 }
1750 
1751 static bool
source_fallback(struct sna * sna,PicturePtr p,PixmapPtr pixmap,bool precise)1752 source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1753 {
1754 	if (sna_picture_is_solid(p, NULL))
1755 		return false;
1756 
1757 	if (is_gradient(p, precise) ||
1758 	    !gen5_check_repeat(p) ||
1759 	    !gen5_check_format(p->format))
1760 		return true;
1761 
1762 	if (pixmap && source_is_busy(pixmap))
1763 		return false;
1764 
1765 	return has_alphamap(p) || !gen5_check_filter(p) || need_upload(sna, p);
1766 }
1767 
1768 static bool
gen5_composite_fallback(struct sna * sna,PicturePtr src,PicturePtr mask,PicturePtr dst)1769 gen5_composite_fallback(struct sna *sna,
1770 			PicturePtr src,
1771 			PicturePtr mask,
1772 			PicturePtr dst)
1773 {
1774 	PixmapPtr src_pixmap;
1775 	PixmapPtr mask_pixmap;
1776 	PixmapPtr dst_pixmap;
1777 	bool src_fallback, mask_fallback;
1778 
1779 	if (!gen5_check_dst_format(dst->format)) {
1780 		DBG(("%s: unknown destination format: %d\n",
1781 		     __FUNCTION__, dst->format));
1782 		return true;
1783 	}
1784 
1785 	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1786 
1787 	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1788 	src_fallback = source_fallback(sna, src, src_pixmap,
1789 				       dst->polyMode == PolyModePrecise);
1790 
1791 	if (mask) {
1792 		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1793 		mask_fallback = source_fallback(sna, mask, mask_pixmap,
1794 						dst->polyMode == PolyModePrecise);
1795 	} else {
1796 		mask_pixmap = NULL;
1797 		mask_fallback = false;
1798 	}
1799 
1800 	/* If we are using the destination as a source and need to
1801 	 * readback in order to upload the source, do it all
1802 	 * on the cpu.
1803 	 */
1804 	if (src_pixmap == dst_pixmap && src_fallback) {
1805 		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1806 		return true;
1807 	}
1808 	if (mask_pixmap == dst_pixmap && mask_fallback) {
1809 		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1810 		return true;
1811 	}
1812 
1813 	/* If anything is on the GPU, push everything out to the GPU */
1814 	if (dst_use_gpu(dst_pixmap)) {
1815 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1816 		     __FUNCTION__));
1817 		return false;
1818 	}
1819 
1820 	if (src_pixmap && !src_fallback) {
1821 		DBG(("%s: src is already on the GPU, try to use GPU\n",
1822 		     __FUNCTION__));
1823 		return false;
1824 	}
1825 	if (mask_pixmap && !mask_fallback) {
1826 		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1827 		     __FUNCTION__));
1828 		return false;
1829 	}
1830 
1831 	/* However if the dst is not on the GPU and we need to
1832 	 * render one of the sources using the CPU, we may
1833 	 * as well do the entire operation in place onthe CPU.
1834 	 */
1835 	if (src_fallback) {
1836 		DBG(("%s: dst is on the CPU and src will fallback\n",
1837 		     __FUNCTION__));
1838 		return true;
1839 	}
1840 
1841 	if (mask_fallback) {
1842 		DBG(("%s: dst is on the CPU and mask will fallback\n",
1843 		     __FUNCTION__));
1844 		return true;
1845 	}
1846 
1847 	if (too_large(dst_pixmap->drawable.width,
1848 		      dst_pixmap->drawable.height) &&
1849 	    dst_is_cpu(dst_pixmap)) {
1850 		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1851 		return true;
1852 	}
1853 
1854 	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1855 	     __FUNCTION__));
1856 	return dst_use_cpu(dst_pixmap);
1857 }
1858 
1859 static int
reuse_source(struct sna * sna,PicturePtr src,struct sna_composite_channel * sc,int src_x,int src_y,PicturePtr mask,struct sna_composite_channel * mc,int msk_x,int msk_y)1860 reuse_source(struct sna *sna,
1861 	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1862 	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1863 {
1864 	uint32_t color;
1865 
1866 	if (src_x != msk_x || src_y != msk_y)
1867 		return false;
1868 
1869 	if (src == mask) {
1870 		DBG(("%s: mask is source\n", __FUNCTION__));
1871 		*mc = *sc;
1872 		mc->bo = kgem_bo_reference(mc->bo);
1873 		return true;
1874 	}
1875 
1876 	if (sna_picture_is_solid(mask, &color))
1877 		return gen4_channel_init_solid(sna, mc, color);
1878 
1879 	if (sc->is_solid)
1880 		return false;
1881 
1882 	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1883 		return false;
1884 
1885 	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1886 
1887 	if (!sna_transform_equal(src->transform, mask->transform))
1888 		return false;
1889 
1890 	if (!sna_picture_alphamap_equal(src, mask))
1891 		return false;
1892 
1893 	if (!gen5_check_repeat(mask))
1894 		return false;
1895 
1896 	if (!gen5_check_filter(mask))
1897 		return false;
1898 
1899 	if (!gen5_check_format(mask->format))
1900 		return false;
1901 
1902 	DBG(("%s: reusing source channel for mask with a twist\n",
1903 	     __FUNCTION__));
1904 
1905 	*mc = *sc;
1906 	mc->repeat = gen5_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1907 	mc->filter = gen5_filter(mask->filter);
1908 	mc->pict_format = mask->format;
1909 	mc->card_format = gen5_get_card_format(mask->format);
1910 	mc->bo = kgem_bo_reference(mc->bo);
1911 	return true;
1912 }
1913 
1914 static bool
gen5_render_composite(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr mask,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t msk_x,int16_t msk_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)1915 gen5_render_composite(struct sna *sna,
1916 		      uint8_t op,
1917 		      PicturePtr src,
1918 		      PicturePtr mask,
1919 		      PicturePtr dst,
1920 		      int16_t src_x, int16_t src_y,
1921 		      int16_t msk_x, int16_t msk_y,
1922 		      int16_t dst_x, int16_t dst_y,
1923 		      int16_t width, int16_t height,
1924 		      unsigned flags,
1925 		      struct sna_composite_op *tmp)
1926 {
1927 	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1928 	     width, height, sna->kgem.mode));
1929 
1930 	if (op >= ARRAY_SIZE(gen5_blend_op)) {
1931 		DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
1932 		return false;
1933 	}
1934 
1935 	if (mask == NULL &&
1936 	    sna_blt_composite(sna, op,
1937 			      src, dst,
1938 			      src_x, src_y,
1939 			      dst_x, dst_y,
1940 			      width, height,
1941 			      flags, tmp))
1942 		return true;
1943 
1944 	if (gen5_composite_fallback(sna, src, mask, dst))
1945 		goto fallback;
1946 
1947 	if (need_tiling(sna, width, height))
1948 		return sna_tiling_composite(op, src, mask, dst,
1949 					    src_x, src_y,
1950 					    msk_x, msk_y,
1951 					    dst_x, dst_y,
1952 					    width, height,
1953 					    tmp);
1954 
1955 	if (!gen5_composite_set_target(sna, tmp, dst,
1956 				       dst_x, dst_y, width, height,
1957 				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
1958 		DBG(("%s: failed to set composite target\n", __FUNCTION__));
1959 		goto fallback;
1960 	}
1961 
1962 	DBG(("%s: preparing source\n", __FUNCTION__));
1963 	tmp->op = op;
1964 	switch (gen5_composite_picture(sna, src, &tmp->src,
1965 				       src_x, src_y,
1966 				       width, height,
1967 				       dst_x, dst_y,
1968 				       dst->polyMode == PolyModePrecise)) {
1969 	case -1:
1970 		DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
1971 		goto cleanup_dst;
1972 	case 0:
1973 		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1974 			goto cleanup_dst;
1975 		/* fall through */
1976 	case 1:
1977 		if (mask == NULL &&
1978 		    sna_blt_composite__convert(sna,
1979 					       dst_x, dst_y, width, height,
1980 					       tmp))
1981 			return true;
1982 
1983 		gen5_composite_channel_convert(&tmp->src);
1984 		break;
1985 	}
1986 
1987 	tmp->is_affine = tmp->src.is_affine;
1988 	tmp->has_component_alpha = false;
1989 	tmp->need_magic_ca_pass = false;
1990 
1991 	if (mask) {
1992 		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1993 			tmp->has_component_alpha = true;
1994 
1995 			/* Check if it's component alpha that relies on a source alpha and on
1996 			 * the source value.  We can only get one of those into the single
1997 			 * source value that we get to blend with.
1998 			 */
1999 			if (gen5_blend_op[op].src_alpha &&
2000 			    (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
2001 				if (op != PictOpOver) {
2002 					DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
2003 					goto cleanup_src;
2004 				}
2005 
2006 				tmp->need_magic_ca_pass = true;
2007 				tmp->op = PictOpOutReverse;
2008 			}
2009 		}
2010 
2011 		if (!reuse_source(sna,
2012 				  src, &tmp->src, src_x, src_y,
2013 				  mask, &tmp->mask, msk_x, msk_y)) {
2014 			DBG(("%s: preparing mask\n", __FUNCTION__));
2015 			switch (gen5_composite_picture(sna, mask, &tmp->mask,
2016 						       msk_x, msk_y,
2017 						       width, height,
2018 						       dst_x, dst_y,
2019 						       dst->polyMode == PolyModePrecise)) {
2020 			case -1:
2021 				DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
2022 				goto cleanup_src;
2023 			case 0:
2024 				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2025 					goto cleanup_src;
2026 				/* fall through */
2027 			case 1:
2028 				gen5_composite_channel_convert(&tmp->mask);
2029 				break;
2030 			}
2031 		}
2032 
2033 		tmp->is_affine &= tmp->mask.is_affine;
2034 	}
2035 
2036 	tmp->u.gen5.wm_kernel =
2037 		gen5_choose_composite_kernel(tmp->op,
2038 					     tmp->mask.bo != NULL,
2039 					     tmp->has_component_alpha,
2040 					     tmp->is_affine);
2041 	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
2042 
2043 	tmp->blt   = gen5_render_composite_blt;
2044 	tmp->box   = gen5_render_composite_box;
2045 	tmp->boxes = gen5_render_composite_boxes__blt;
2046 	if (tmp->emit_boxes) {
2047 		tmp->boxes = gen5_render_composite_boxes;
2048 		tmp->thread_boxes = gen5_render_composite_boxes__thread;
2049 	}
2050 	tmp->done  = gen5_render_composite_done;
2051 
2052 	if (!kgem_check_bo(&sna->kgem,
2053 			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
2054 		kgem_submit(&sna->kgem);
2055 		if (!kgem_check_bo(&sna->kgem,
2056 				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
2057 			goto cleanup_mask;
2058 	}
2059 
2060 	gen5_align_vertex(sna, tmp);
2061 	gen5_bind_surfaces(sna, tmp);
2062 	return true;
2063 
2064 cleanup_mask:
2065 	if (tmp->mask.bo) {
2066 		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2067 		tmp->mask.bo = NULL;
2068 	}
2069 cleanup_src:
2070 	if (tmp->src.bo) {
2071 		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2072 		tmp->src.bo = NULL;
2073 	}
2074 cleanup_dst:
2075 	if (tmp->redirect.real_bo) {
2076 		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2077 		tmp->redirect.real_bo = NULL;
2078 	}
2079 fallback:
2080 	return (mask == NULL &&
2081 		sna_blt_composite(sna, op,
2082 				  src, dst,
2083 				  src_x, src_y,
2084 				  dst_x, dst_y,
2085 				  width, height,
2086 				  flags | COMPOSITE_FALLBACK, tmp));
2087 }
2088 
2089 #if !NO_COMPOSITE_SPANS
2090 fastcall static void
gen5_render_composite_spans_box(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,float opacity)2091 gen5_render_composite_spans_box(struct sna *sna,
2092 				const struct sna_composite_spans_op *op,
2093 				const BoxRec *box, float opacity)
2094 {
2095 	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2096 	     __FUNCTION__,
2097 	     op->base.src.offset[0], op->base.src.offset[1],
2098 	     opacity,
2099 	     op->base.dst.x, op->base.dst.y,
2100 	     box->x1, box->y1,
2101 	     box->x2 - box->x1,
2102 	     box->y2 - box->y1));
2103 
2104 	gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
2105 	op->prim_emit(sna, op, box, opacity);
2106 }
2107 
2108 static void
gen5_render_composite_spans_boxes(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,int nbox,float opacity)2109 gen5_render_composite_spans_boxes(struct sna *sna,
2110 				  const struct sna_composite_spans_op *op,
2111 				  const BoxRec *box, int nbox,
2112 				  float opacity)
2113 {
2114 	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2115 	     __FUNCTION__, nbox,
2116 	     op->base.src.offset[0], op->base.src.offset[1],
2117 	     opacity,
2118 	     op->base.dst.x, op->base.dst.y));
2119 
2120 	do {
2121 		int nbox_this_time;
2122 
2123 		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2124 						     gen5_bind_surfaces);
2125 		nbox -= nbox_this_time;
2126 
2127 		do {
2128 			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2129 			     box->x1, box->y1,
2130 			     box->x2 - box->x1,
2131 			     box->y2 - box->y1));
2132 
2133 			op->prim_emit(sna, op, box++, opacity);
2134 		} while (--nbox_this_time);
2135 	} while (nbox);
2136 }
2137 
2138 fastcall static void
gen5_render_composite_spans_boxes__thread(struct sna * sna,const struct sna_composite_spans_op * op,const struct sna_opacity_box * box,int nbox)2139 gen5_render_composite_spans_boxes__thread(struct sna *sna,
2140 					  const struct sna_composite_spans_op *op,
2141 					  const struct sna_opacity_box *box,
2142 					  int nbox)
2143 {
2144 	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2145 	     __FUNCTION__, nbox,
2146 	     op->base.src.offset[0], op->base.src.offset[1],
2147 	     op->base.dst.x, op->base.dst.y));
2148 
2149 	sna_vertex_lock(&sna->render);
2150 	do {
2151 		int nbox_this_time;
2152 		float *v;
2153 
2154 		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2155 						     gen5_bind_surfaces);
2156 		assert(nbox_this_time);
2157 		nbox -= nbox_this_time;
2158 
2159 		v = sna->render.vertices + sna->render.vertex_used;
2160 		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2161 
2162 		sna_vertex_acquire__locked(&sna->render);
2163 		sna_vertex_unlock(&sna->render);
2164 
2165 		op->emit_boxes(op, box, nbox_this_time, v);
2166 		box += nbox_this_time;
2167 
2168 		sna_vertex_lock(&sna->render);
2169 		sna_vertex_release__locked(&sna->render);
2170 	} while (nbox);
2171 	sna_vertex_unlock(&sna->render);
2172 }
2173 
2174 fastcall static void
gen5_render_composite_spans_done(struct sna * sna,const struct sna_composite_spans_op * op)2175 gen5_render_composite_spans_done(struct sna *sna,
2176 				 const struct sna_composite_spans_op *op)
2177 {
2178 	if (sna->render.vertex_offset)
2179 		gen4_vertex_flush(sna);
2180 
2181 	DBG(("%s()\n", __FUNCTION__));
2182 
2183 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2184 	sna_render_composite_redirect_done(sna, &op->base);
2185 }
2186 
2187 static bool
gen5_check_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t width,int16_t height,unsigned flags)2188 gen5_check_composite_spans(struct sna *sna,
2189 			   uint8_t op, PicturePtr src, PicturePtr dst,
2190 			   int16_t width, int16_t height,
2191 			   unsigned flags)
2192 {
2193 	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2194 	     __FUNCTION__, op, width, height, flags));
2195 
2196 	if (op >= ARRAY_SIZE(gen5_blend_op))
2197 		return false;
2198 
2199 	if (gen5_composite_fallback(sna, src, NULL, dst)) {
2200 		DBG(("%s: operation would fallback\n", __FUNCTION__));
2201 		return false;
2202 	}
2203 
2204 	if (need_tiling(sna, width, height) &&
2205 	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2206 		DBG(("%s: fallback, tiled operation not on GPU\n",
2207 		     __FUNCTION__));
2208 		return false;
2209 	}
2210 
2211 	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2212 		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2213 		assert(priv);
2214 
2215 		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2216 			return true;
2217 
2218 		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2219 			return false;
2220 
2221 		if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
2222 		    dst->format == PICT_a8)
2223 			return false;
2224 
2225 		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2226 	}
2227 
2228 	return true;
2229 }
2230 
2231 static bool
gen5_render_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_spans_op * tmp)2232 gen5_render_composite_spans(struct sna *sna,
2233 			    uint8_t op,
2234 			    PicturePtr src,
2235 			    PicturePtr dst,
2236 			    int16_t src_x,  int16_t src_y,
2237 			    int16_t dst_x,  int16_t dst_y,
2238 			    int16_t width,  int16_t height,
2239 			    unsigned flags,
2240 			    struct sna_composite_spans_op *tmp)
2241 {
2242 	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2243 	     width, height, flags, sna->kgem.ring));
2244 
2245 	assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
2246 
2247 	if (need_tiling(sna, width, height)) {
2248 		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2249 		     __FUNCTION__, width, height));
2250 		return sna_tiling_composite_spans(op, src, dst,
2251 						  src_x, src_y, dst_x, dst_y,
2252 						  width, height, flags, tmp);
2253 	}
2254 
2255 	tmp->base.op = op;
2256 	if (!gen5_composite_set_target(sna, &tmp->base, dst,
2257 				       dst_x, dst_y, width, height,
2258 				       true))
2259 		return false;
2260 
2261 	switch (gen5_composite_picture(sna, src, &tmp->base.src,
2262 				       src_x, src_y,
2263 				       width, height,
2264 				       dst_x, dst_y,
2265 				       dst->polyMode == PolyModePrecise)) {
2266 	case -1:
2267 		goto cleanup_dst;
2268 	case 0:
2269 		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2270 			goto cleanup_dst;
2271 		/* fall through */
2272 	case 1:
2273 		gen5_composite_channel_convert(&tmp->base.src);
2274 		break;
2275 	}
2276 
2277 	tmp->base.mask.bo = NULL;
2278 
2279 	tmp->base.is_affine = tmp->base.src.is_affine;
2280 	tmp->base.has_component_alpha = false;
2281 	tmp->base.need_magic_ca_pass = false;
2282 
2283 	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
2284 	tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
2285 
2286 	tmp->box   = gen5_render_composite_spans_box;
2287 	tmp->boxes = gen5_render_composite_spans_boxes;
2288 	if (tmp->emit_boxes)
2289 		tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
2290 	tmp->done  = gen5_render_composite_spans_done;
2291 
2292 	if (!kgem_check_bo(&sna->kgem,
2293 			   tmp->base.dst.bo, tmp->base.src.bo,
2294 			   NULL))  {
2295 		kgem_submit(&sna->kgem);
2296 		if (!kgem_check_bo(&sna->kgem,
2297 				   tmp->base.dst.bo, tmp->base.src.bo,
2298 				   NULL))
2299 			goto cleanup_src;
2300 	}
2301 
2302 	gen5_align_vertex(sna, &tmp->base);
2303 	gen5_bind_surfaces(sna, &tmp->base);
2304 	return true;
2305 
2306 cleanup_src:
2307 	if (tmp->base.src.bo)
2308 		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2309 cleanup_dst:
2310 	if (tmp->base.redirect.real_bo)
2311 		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2312 	return false;
2313 }
2314 #endif
2315 
2316 static void
gen5_copy_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2317 gen5_copy_bind_surfaces(struct sna *sna,
2318 			const struct sna_composite_op *op)
2319 {
2320 	bool dirty = kgem_bo_is_dirty(op->dst.bo);
2321 	uint32_t *binding_table;
2322 	uint16_t offset;
2323 
2324 	gen5_get_batch(sna, op);
2325 
2326 	binding_table = gen5_composite_get_binding_table(sna, &offset);
2327 
2328 	binding_table[0] =
2329 		gen5_bind_bo(sna,
2330 			     op->dst.bo, op->dst.width, op->dst.height,
2331 			     gen5_get_dest_format(op->dst.format),
2332 			     true);
2333 	binding_table[1] =
2334 		gen5_bind_bo(sna,
2335 			     op->src.bo, op->src.width, op->src.height,
2336 			     op->src.card_format,
2337 			     false);
2338 
2339 	if (sna->kgem.surface == offset &&
2340 	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
2341 		sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
2342 		offset = sna->render_state.gen5.surface_table;
2343 	}
2344 
2345 	gen5_emit_state(sna, op, offset | dirty);
2346 }
2347 
2348 static bool
gen5_render_copy_boxes(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,unsigned flags)2349 gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
2350 		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2351 		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2352 		       const BoxRec *box, int n, unsigned flags)
2353 {
2354 	struct sna_composite_op tmp;
2355 
2356 	DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
2357 	     __FUNCTION__, alu,
2358 	     src->serialNumber, src_bo->handle,
2359 	     dst->serialNumber, dst_bo->handle,
2360 	     n, box->x1, box->y1, box->x2, box->y2,
2361 	     flags));
2362 
2363 	if (sna_blt_compare_depth(src, dst) &&
2364 	    sna_blt_copy_boxes(sna, alu,
2365 			       src_bo, src_dx, src_dy,
2366 			       dst_bo, dst_dx, dst_dy,
2367 			       dst->bitsPerPixel,
2368 			       box, n))
2369 		return true;
2370 
2371 	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
2372 fallback_blt:
2373 		if (!sna_blt_compare_depth(src, dst))
2374 			return false;
2375 
2376 		return sna_blt_copy_boxes_fallback(sna, alu,
2377 						   src, src_bo, src_dx, src_dy,
2378 						   dst, dst_bo, dst_dx, dst_dy,
2379 						   box, n);
2380 	}
2381 
2382 	memset(&tmp, 0, sizeof(tmp));
2383 
2384 	if (dst->depth == src->depth) {
2385 		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2386 		tmp.src.pict_format = tmp.dst.format;
2387 	} else {
2388 		tmp.dst.format = sna_format_for_depth(dst->depth);
2389 		tmp.src.pict_format = sna_format_for_depth(src->depth);
2390 	}
2391 	if (!gen5_check_format(tmp.src.pict_format)) {
2392 		DBG(("%s: unsupported source format, %x, use BLT\n",
2393 		     __FUNCTION__, tmp.src.pict_format));
2394 		goto fallback_blt;
2395 	}
2396 
2397 	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
2398 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
2399 
2400 	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2401 
2402 	tmp.dst.pixmap = (PixmapPtr)dst;
2403 	tmp.dst.width  = dst->width;
2404 	tmp.dst.height = dst->height;
2405 	tmp.dst.x = tmp.dst.y = 0;
2406 	tmp.dst.bo = dst_bo;
2407 	tmp.damage = NULL;
2408 
2409 	sna_render_composite_redirect_init(&tmp);
2410 	if (too_large(tmp.dst.width, tmp.dst.height)) {
2411 		BoxRec extents = box[0];
2412 		int i;
2413 
2414 		for (i = 1; i < n; i++) {
2415 			if (box[i].x1 < extents.x1)
2416 				extents.x1 = box[i].x1;
2417 			if (box[i].y1 < extents.y1)
2418 				extents.y1 = box[i].y1;
2419 
2420 			if (box[i].x2 > extents.x2)
2421 				extents.x2 = box[i].x2;
2422 			if (box[i].y2 > extents.y2)
2423 				extents.y2 = box[i].y2;
2424 		}
2425 		if (!sna_render_composite_redirect(sna, &tmp,
2426 						   extents.x1 + dst_dx,
2427 						   extents.y1 + dst_dy,
2428 						   extents.x2 - extents.x1,
2429 						   extents.y2 - extents.y1,
2430 						   n > 1))
2431 			goto fallback_tiled;
2432 	}
2433 
2434 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
2435 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
2436 	tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
2437 	if (too_large(src->width, src->height)) {
2438 		BoxRec extents = box[0];
2439 		int i;
2440 
2441 		for (i = 1; i < n; i++) {
2442 			if (box[i].x1 < extents.x1)
2443 				extents.x1 = box[i].x1;
2444 			if (box[i].y1 < extents.y1)
2445 				extents.y1 = box[i].y1;
2446 
2447 			if (box[i].x2 > extents.x2)
2448 				extents.x2 = box[i].x2;
2449 			if (box[i].y2 > extents.y2)
2450 				extents.y2 = box[i].y2;
2451 		}
2452 
2453 		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2454 					       extents.x1 + src_dx,
2455 					       extents.y1 + src_dy,
2456 					       extents.x2 - extents.x1,
2457 					       extents.y2 - extents.y1))
2458 			goto fallback_tiled_dst;
2459 	} else {
2460 		tmp.src.bo = kgem_bo_reference(src_bo);
2461 		tmp.src.width  = src->width;
2462 		tmp.src.height = src->height;
2463 		tmp.src.offset[0] = tmp.src.offset[1] = 0;
2464 		tmp.src.scale[0] = 1.f/src->width;
2465 		tmp.src.scale[1] = 1.f/src->height;
2466 	}
2467 
2468 	tmp.is_affine = true;
2469 	tmp.floats_per_vertex = 3;
2470 	tmp.floats_per_rect = 9;
2471 	tmp.u.gen5.wm_kernel = WM_KERNEL;
2472 	tmp.u.gen5.ve_id = 2;
2473 
2474 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2475 		kgem_submit(&sna->kgem);
2476 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2477 			DBG(("%s: aperture check failed\n", __FUNCTION__));
2478 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2479 			if (tmp.redirect.real_bo)
2480 				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2481 
2482 			goto fallback_blt;
2483 		}
2484 	}
2485 
2486 	dst_dx += tmp.dst.x;
2487 	dst_dy += tmp.dst.y;
2488 	tmp.dst.x = tmp.dst.y = 0;
2489 
2490 	src_dx += tmp.src.offset[0];
2491 	src_dy += tmp.src.offset[1];
2492 
2493 	gen5_align_vertex(sna, &tmp);
2494 	gen5_copy_bind_surfaces(sna, &tmp);
2495 
2496 	do {
2497 		int n_this_time;
2498 
2499 		n_this_time = gen5_get_rectangles(sna, &tmp, n,
2500 						  gen5_copy_bind_surfaces);
2501 		n -= n_this_time;
2502 
2503 		do {
2504 			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2505 			     box->x1 + src_dx, box->y1 + src_dy,
2506 			     box->x1 + dst_dx, box->y1 + dst_dy,
2507 			     box->x2 - box->x1, box->y2 - box->y1));
2508 			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
2509 			OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
2510 			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
2511 
2512 			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
2513 			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
2514 			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
2515 
2516 			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
2517 			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
2518 			OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
2519 
2520 			box++;
2521 		} while (--n_this_time);
2522 	} while (n);
2523 
2524 	gen4_vertex_flush(sna);
2525 	sna_render_composite_redirect_done(sna, &tmp);
2526 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2527 	return true;
2528 
2529 fallback_tiled_dst:
2530 	if (tmp.redirect.real_bo)
2531 		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2532 fallback_tiled:
2533 	if (sna_blt_compare_depth(src, dst) &&
2534 	    sna_blt_copy_boxes(sna, alu,
2535 			       src_bo, src_dx, src_dy,
2536 			       dst_bo, dst_dx, dst_dy,
2537 			       dst->bitsPerPixel,
2538 			       box, n))
2539 		return true;
2540 
2541 	DBG(("%s: tiled fallback\n", __FUNCTION__));
2542 	return sna_tiling_copy_boxes(sna, alu,
2543 				     src, src_bo, src_dx, src_dy,
2544 				     dst, dst_bo, dst_dx, dst_dy,
2545 				     box, n);
2546 }
2547 
2548 static void
gen5_render_copy_blt(struct sna * sna,const struct sna_copy_op * op,int16_t sx,int16_t sy,int16_t w,int16_t h,int16_t dx,int16_t dy)2549 gen5_render_copy_blt(struct sna *sna,
2550 		     const struct sna_copy_op *op,
2551 		     int16_t sx, int16_t sy,
2552 		     int16_t w,  int16_t h,
2553 		     int16_t dx, int16_t dy)
2554 {
2555 	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
2556 	     sx, sy, dx, dy, w, h));
2557 
2558 	gen5_get_rectangles(sna, &op->base, 1, gen5_copy_bind_surfaces);
2559 
2560 	OUT_VERTEX(dx+w, dy+h);
2561 	OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
2562 	OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
2563 
2564 	OUT_VERTEX(dx, dy+h);
2565 	OUT_VERTEX_F(sx*op->base.src.scale[0]);
2566 	OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
2567 
2568 	OUT_VERTEX(dx, dy);
2569 	OUT_VERTEX_F(sx*op->base.src.scale[0]);
2570 	OUT_VERTEX_F(sy*op->base.src.scale[1]);
2571 }
2572 
2573 static void
gen5_render_copy_done(struct sna * sna,const struct sna_copy_op * op)2574 gen5_render_copy_done(struct sna *sna,
2575 		      const struct sna_copy_op *op)
2576 {
2577 	if (sna->render.vertex_offset)
2578 		gen4_vertex_flush(sna);
2579 
2580 	DBG(("%s()\n", __FUNCTION__));
2581 }
2582 
2583 static bool
gen5_render_copy(struct sna * sna,uint8_t alu,PixmapPtr src,struct kgem_bo * src_bo,PixmapPtr dst,struct kgem_bo * dst_bo,struct sna_copy_op * op)2584 gen5_render_copy(struct sna *sna, uint8_t alu,
2585 		 PixmapPtr src, struct kgem_bo *src_bo,
2586 		 PixmapPtr dst, struct kgem_bo *dst_bo,
2587 		 struct sna_copy_op *op)
2588 {
2589 	DBG(("%s (alu=%d)\n", __FUNCTION__, alu));
2590 
2591 	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2592 	    sna_blt_copy(sna, alu,
2593 			 src_bo, dst_bo,
2594 			 dst->drawable.bitsPerPixel,
2595 			 op))
2596 		return true;
2597 
2598 	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2599 	    too_large(src->drawable.width, src->drawable.height) ||
2600 	    too_large(dst->drawable.width, dst->drawable.height)) {
2601 fallback:
2602 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2603 			return false;
2604 
2605 		return sna_blt_copy(sna, alu, src_bo, dst_bo,
2606 				    dst->drawable.bitsPerPixel,
2607 				    op);
2608 	}
2609 
2610 	if (dst->drawable.depth == src->drawable.depth) {
2611 		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2612 		op->base.src.pict_format = op->base.dst.format;
2613 	} else {
2614 		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2615 		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2616 	}
2617 	if (!gen5_check_format(op->base.src.pict_format))
2618 		goto fallback;
2619 
2620 	op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2621 
2622 	op->base.dst.pixmap = dst;
2623 	op->base.dst.width  = dst->drawable.width;
2624 	op->base.dst.height = dst->drawable.height;
2625 	op->base.dst.bo = dst_bo;
2626 
2627 	op->base.src.bo = src_bo;
2628 	op->base.src.card_format =
2629 		gen5_get_card_format(op->base.src.pict_format);
2630 	op->base.src.width  = src->drawable.width;
2631 	op->base.src.height = src->drawable.height;
2632 	op->base.src.scale[0] = 1.f/src->drawable.width;
2633 	op->base.src.scale[1] = 1.f/src->drawable.height;
2634 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
2635 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
2636 
2637 	op->base.is_affine = true;
2638 	op->base.floats_per_vertex = 3;
2639 	op->base.floats_per_rect = 9;
2640 	op->base.u.gen5.wm_kernel = WM_KERNEL;
2641 	op->base.u.gen5.ve_id = 2;
2642 
2643 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2644 		kgem_submit(&sna->kgem);
2645 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2646 			goto fallback;
2647 	}
2648 
2649 	if (kgem_bo_is_dirty(src_bo)) {
2650 		if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2651 		    sna_blt_copy(sna, alu,
2652 				 src_bo, dst_bo,
2653 				 dst->drawable.bitsPerPixel,
2654 				 op))
2655 			return true;
2656 	}
2657 
2658 	gen5_align_vertex(sna, &op->base);
2659 	gen5_copy_bind_surfaces(sna, &op->base);
2660 
2661 	op->blt  = gen5_render_copy_blt;
2662 	op->done = gen5_render_copy_done;
2663 	return true;
2664 }
2665 
2666 static void
gen5_fill_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2667 gen5_fill_bind_surfaces(struct sna *sna,
2668 			const struct sna_composite_op *op)
2669 {
2670 	bool dirty = kgem_bo_is_dirty(op->dst.bo);
2671 	uint32_t *binding_table;
2672 	uint16_t offset;
2673 
2674 	gen5_get_batch(sna, op);
2675 
2676 	binding_table = gen5_composite_get_binding_table(sna, &offset);
2677 
2678 	binding_table[0] =
2679 		gen5_bind_bo(sna,
2680 			     op->dst.bo, op->dst.width, op->dst.height,
2681 			     gen5_get_dest_format(op->dst.format),
2682 			     true);
2683 	binding_table[1] =
2684 		gen5_bind_bo(sna,
2685 			     op->src.bo, 1, 1,
2686 			     GEN5_SURFACEFORMAT_B8G8R8A8_UNORM,
2687 			     false);
2688 
2689 	if (sna->kgem.surface == offset &&
2690 	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
2691 		sna->kgem.surface +=
2692 			sizeof(struct gen5_surface_state_padded)/sizeof(uint32_t);
2693 		offset = sna->render_state.gen5.surface_table;
2694 	}
2695 
2696 	gen5_emit_state(sna, op, offset | dirty);
2697 }
2698 
prefer_blt_fill(struct sna * sna)2699 static inline bool prefer_blt_fill(struct sna *sna)
2700 {
2701 #if PREFER_BLT_FILL
2702 	return true;
2703 #else
2704 	return sna->kgem.mode != KGEM_RENDER;
2705 #endif
2706 }
2707 
2708 static bool
gen5_render_fill_boxes(struct sna * sna,CARD8 op,PictFormat format,const xRenderColor * color,const DrawableRec * dst,struct kgem_bo * dst_bo,const BoxRec * box,int n)2709 gen5_render_fill_boxes(struct sna *sna,
2710 		       CARD8 op,
2711 		       PictFormat format,
2712 		       const xRenderColor *color,
2713 		       const DrawableRec *dst, struct kgem_bo *dst_bo,
2714 		       const BoxRec *box, int n)
2715 {
2716 	struct sna_composite_op tmp;
2717 	uint32_t pixel;
2718 
2719 	DBG(("%s op=%x, color=(%04x,%04x,%04x,%04x), boxes=%d x [((%d, %d), (%d, %d))...]\n",
2720 	     __FUNCTION__, op,
2721 	     color->red, color->green, color->blue, color->alpha,
2722 	     n, box->x1, box->y1, box->x2, box->y2));
2723 
2724 	if (op >= ARRAY_SIZE(gen5_blend_op)) {
2725 		DBG(("%s: fallback due to unhandled blend op: %d\n",
2726 		     __FUNCTION__, op));
2727 		return false;
2728 	}
2729 
2730 	if (op <= PictOpSrc &&
2731 	    (prefer_blt_fill(sna) ||
2732 	     too_large(dst->width, dst->height) ||
2733 	     !gen5_check_dst_format(format))) {
2734 		uint8_t alu = GXinvalid;
2735 
2736 		pixel = 0;
2737 		if (op == PictOpClear)
2738 			alu = GXclear;
2739 		else if (sna_get_pixel_from_rgba(&pixel,
2740 						 color->red,
2741 						 color->green,
2742 						 color->blue,
2743 						 color->alpha,
2744 						 format))
2745 			alu = GXcopy;
2746 
2747 		if (alu != GXinvalid &&
2748 		    sna_blt_fill_boxes(sna, alu,
2749 				       dst_bo, dst->bitsPerPixel,
2750 				       pixel, box, n))
2751 			return true;
2752 
2753 		if (!gen5_check_dst_format(format))
2754 			return false;
2755 
2756 		if (too_large(dst->width, dst->height))
2757 			return sna_tiling_fill_boxes(sna, op, format, color,
2758 						     dst, dst_bo, box, n);
2759 	}
2760 
2761 	if (op == PictOpClear) {
2762 		pixel = 0;
2763 		op = PictOpSrc;
2764 	} else if (!sna_get_pixel_from_rgba(&pixel,
2765 					    color->red,
2766 					    color->green,
2767 					    color->blue,
2768 					    color->alpha,
2769 					    PICT_a8r8g8b8))
2770 		return false;
2771 
2772 	DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
2773 
2774 	memset(&tmp, 0, sizeof(tmp));
2775 
2776 	tmp.op = op;
2777 
2778 	tmp.dst.pixmap = (PixmapPtr)dst;
2779 	tmp.dst.width  = dst->width;
2780 	tmp.dst.height = dst->height;
2781 	tmp.dst.format = format;
2782 	tmp.dst.bo = dst_bo;
2783 
2784 	if (too_large(dst->width, dst->height)) {
2785 		BoxRec extents;
2786 
2787 		boxes_extents(box, n, &extents);
2788 		if (!sna_render_composite_redirect(sna, &tmp,
2789 						   extents.x1, extents.y1,
2790 						   extents.x2 - extents.x1,
2791 						   extents.y2 - extents.y1,
2792 						   n > 1))
2793 			return sna_tiling_fill_boxes(sna, op, format, color,
2794 						     dst, dst_bo, box, n);
2795 	}
2796 
2797 	tmp.src.bo = sna_render_get_solid(sna, pixel);
2798 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
2799 	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
2800 
2801 	tmp.is_affine = true;
2802 	tmp.floats_per_vertex = 2;
2803 	tmp.floats_per_rect = 6;
2804 	tmp.u.gen5.wm_kernel = WM_KERNEL;
2805 	tmp.u.gen5.ve_id = 1;
2806 
2807 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2808 		kgem_submit(&sna->kgem);
2809 		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2810 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2811 			return false;
2812 		}
2813 	}
2814 
2815 	gen5_align_vertex(sna, &tmp);
2816 	gen5_fill_bind_surfaces(sna, &tmp);
2817 
2818 	do {
2819 		int n_this_time;
2820 
2821 		n_this_time = gen5_get_rectangles(sna, &tmp, n,
2822 						  gen5_fill_bind_surfaces);
2823 		n -= n_this_time;
2824 
2825 		do {
2826 			DBG(("	(%d, %d), (%d, %d)\n",
2827 			     box->x1, box->y1, box->x2, box->y2));
2828 			OUT_VERTEX(box->x2, box->y2);
2829 			OUT_VERTEX_F(.5);
2830 
2831 			OUT_VERTEX(box->x1, box->y2);
2832 			OUT_VERTEX_F(.5);
2833 
2834 			OUT_VERTEX(box->x1, box->y1);
2835 			OUT_VERTEX_F(.5);
2836 
2837 			box++;
2838 		} while (--n_this_time);
2839 	} while (n);
2840 
2841 	gen4_vertex_flush(sna);
2842 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2843 	sna_render_composite_redirect_done(sna, &tmp);
2844 	return true;
2845 }
2846 
2847 static void
gen5_render_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t w,int16_t h)2848 gen5_render_fill_op_blt(struct sna *sna,
2849 			const struct sna_fill_op *op,
2850 			int16_t x, int16_t y, int16_t w, int16_t h)
2851 {
2852 	DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
2853 
2854 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
2855 
2856 	OUT_VERTEX(x+w, y+h);
2857 	OUT_VERTEX_F(.5);
2858 
2859 	OUT_VERTEX(x, y+h);
2860 	OUT_VERTEX_F(.5);
2861 
2862 	OUT_VERTEX(x, y);
2863 	OUT_VERTEX_F(.5);
2864 }
2865 
2866 fastcall static void
gen5_render_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)2867 gen5_render_fill_op_box(struct sna *sna,
2868 			const struct sna_fill_op *op,
2869 			const BoxRec *box)
2870 {
2871 	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
2872 	     box->x1, box->y1, box->x2, box->y2));
2873 
2874 	gen5_get_rectangles(sna, &op->base, 1, gen5_fill_bind_surfaces);
2875 
2876 	OUT_VERTEX(box->x2, box->y2);
2877 	OUT_VERTEX_F(.5);
2878 
2879 	OUT_VERTEX(box->x1, box->y2);
2880 	OUT_VERTEX_F(.5);
2881 
2882 	OUT_VERTEX(box->x1, box->y1);
2883 	OUT_VERTEX_F(.5);
2884 }
2885 
2886 fastcall static void
gen5_render_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)2887 gen5_render_fill_op_boxes(struct sna *sna,
2888 			  const struct sna_fill_op *op,
2889 			  const BoxRec *box,
2890 			  int nbox)
2891 {
2892 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2893 	     box->x1, box->y1, box->x2, box->y2, nbox));
2894 
2895 	do {
2896 		int nbox_this_time;
2897 
2898 		nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
2899 						     gen5_fill_bind_surfaces);
2900 		nbox -= nbox_this_time;
2901 
2902 		do {
2903 			OUT_VERTEX(box->x2, box->y2);
2904 			OUT_VERTEX_F(.5);
2905 
2906 			OUT_VERTEX(box->x1, box->y2);
2907 			OUT_VERTEX_F(.5);
2908 
2909 			OUT_VERTEX(box->x1, box->y1);
2910 			OUT_VERTEX_F(.5);
2911 			box++;
2912 		} while (--nbox_this_time);
2913 	} while (nbox);
2914 }
2915 
2916 static void
gen5_render_fill_op_done(struct sna * sna,const struct sna_fill_op * op)2917 gen5_render_fill_op_done(struct sna *sna,
2918 			 const struct sna_fill_op *op)
2919 {
2920 	if (sna->render.vertex_offset)
2921 		gen4_vertex_flush(sna);
2922 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2923 
2924 	DBG(("%s()\n", __FUNCTION__));
2925 }
2926 
2927 static bool
gen5_render_fill(struct sna * sna,uint8_t alu,PixmapPtr dst,struct kgem_bo * dst_bo,uint32_t color,unsigned flags,struct sna_fill_op * op)2928 gen5_render_fill(struct sna *sna, uint8_t alu,
2929 		 PixmapPtr dst, struct kgem_bo *dst_bo,
2930 		 uint32_t color, unsigned flags,
2931 		 struct sna_fill_op *op)
2932 {
2933 	DBG(("%s(alu=%d, color=%08x)\n", __FUNCTION__, alu, color));
2934 
2935 	if (prefer_blt_fill(sna) &&
2936 	    sna_blt_fill(sna, alu,
2937 			 dst_bo, dst->drawable.bitsPerPixel,
2938 			 color,
2939 			 op))
2940 		return true;
2941 
2942 	if (!(alu == GXcopy || alu == GXclear) ||
2943 	    too_large(dst->drawable.width, dst->drawable.height))
2944 		return sna_blt_fill(sna, alu,
2945 				    dst_bo, dst->drawable.bitsPerPixel,
2946 				    color,
2947 				    op);
2948 
2949 	if (alu == GXclear)
2950 		color = 0;
2951 
2952 	op->base.op = color == 0 ? PictOpClear : PictOpSrc;
2953 
2954 	op->base.dst.pixmap = dst;
2955 	op->base.dst.width  = dst->drawable.width;
2956 	op->base.dst.height = dst->drawable.height;
2957 	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2958 	op->base.dst.bo = dst_bo;
2959 	op->base.dst.x = op->base.dst.y = 0;
2960 
2961 	op->base.need_magic_ca_pass = 0;
2962 	op->base.has_component_alpha = 0;
2963 
2964 	op->base.src.bo =
2965 		sna_render_get_solid(sna,
2966 				     sna_rgba_for_color(color,
2967 							dst->drawable.depth));
2968 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
2969 	op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
2970 
2971 	op->base.mask.bo = NULL;
2972 	op->base.mask.filter = SAMPLER_FILTER_NEAREST;
2973 	op->base.mask.repeat = SAMPLER_EXTEND_NONE;
2974 
2975 	op->base.is_affine = true;
2976 	op->base.floats_per_vertex = 2;
2977 	op->base.floats_per_rect = 6;
2978 	op->base.u.gen5.wm_kernel = WM_KERNEL;
2979 	op->base.u.gen5.ve_id = 1;
2980 
2981 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2982 		kgem_submit(&sna->kgem);
2983 		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2984 			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2985 			return false;
2986 		}
2987 	}
2988 
2989 	gen5_align_vertex(sna, &op->base);
2990 	gen5_fill_bind_surfaces(sna, &op->base);
2991 
2992 	op->blt   = gen5_render_fill_op_blt;
2993 	op->box   = gen5_render_fill_op_box;
2994 	op->boxes = gen5_render_fill_op_boxes;
2995 	op->points = NULL;
2996 	op->done  = gen5_render_fill_op_done;
2997 	return true;
2998 }
2999 
3000 static bool
gen5_render_fill_one_try_blt(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)3001 gen5_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3002 			     uint32_t color,
3003 			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3004 			     uint8_t alu)
3005 {
3006 	BoxRec box;
3007 
3008 	box.x1 = x1;
3009 	box.y1 = y1;
3010 	box.x2 = x2;
3011 	box.y2 = y2;
3012 
3013 	return sna_blt_fill_boxes(sna, alu,
3014 				  bo, dst->drawable.bitsPerPixel,
3015 				  color, &box, 1);
3016 }
3017 
3018 static bool
gen5_render_fill_one(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)3019 gen5_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3020 		     uint32_t color,
3021 		     int16_t x1, int16_t y1,
3022 		     int16_t x2, int16_t y2,
3023 		     uint8_t alu)
3024 {
3025 	struct sna_composite_op tmp;
3026 
3027 #if NO_FILL_ONE
3028 	return gen5_render_fill_one_try_blt(sna, dst, bo, color,
3029 					    x1, y1, x2, y2, alu);
3030 #endif
3031 
3032 	/* Prefer to use the BLT if already engaged */
3033 	if (prefer_blt_fill(sna) &&
3034 	    gen5_render_fill_one_try_blt(sna, dst, bo, color,
3035 					 x1, y1, x2, y2, alu))
3036 		return true;
3037 
3038 	/* Must use the BLT if we can't RENDER... */
3039 	if (!(alu == GXcopy || alu == GXclear) ||
3040 	    too_large(dst->drawable.width, dst->drawable.height))
3041 		return gen5_render_fill_one_try_blt(sna, dst, bo, color,
3042 						    x1, y1, x2, y2, alu);
3043 
3044 	if (alu == GXclear)
3045 		color = 0;
3046 
3047 	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
3048 
3049 	tmp.dst.pixmap = dst;
3050 	tmp.dst.width  = dst->drawable.width;
3051 	tmp.dst.height = dst->drawable.height;
3052 	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3053 	tmp.dst.bo = bo;
3054 	tmp.dst.x = tmp.dst.y = 0;
3055 
3056 	tmp.src.bo =
3057 		sna_render_get_solid(sna,
3058 				     sna_rgba_for_color(color,
3059 							dst->drawable.depth));
3060 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
3061 	tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
3062 
3063 	tmp.mask.bo = NULL;
3064 	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
3065 	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
3066 
3067 	tmp.is_affine = true;
3068 	tmp.floats_per_vertex = 2;
3069 	tmp.floats_per_rect = 6;
3070 	tmp.has_component_alpha = 0;
3071 	tmp.need_magic_ca_pass = false;
3072 
3073 	tmp.u.gen5.wm_kernel = WM_KERNEL;
3074 	tmp.u.gen5.ve_id = 1;
3075 
3076 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3077 		kgem_submit(&sna->kgem);
3078 		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3079 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3080 			return false;
3081 		}
3082 	}
3083 
3084 	gen5_align_vertex(sna, &tmp);
3085 	gen5_fill_bind_surfaces(sna, &tmp);
3086 
3087 	gen5_get_rectangles(sna, &tmp, 1, gen5_fill_bind_surfaces);
3088 
3089 	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3090 	OUT_VERTEX(x2, y2);
3091 	OUT_VERTEX_F(.5);
3092 
3093 	OUT_VERTEX(x1, y2);
3094 	OUT_VERTEX_F(.5);
3095 
3096 	OUT_VERTEX(x1, y1);
3097 	OUT_VERTEX_F(.5);
3098 
3099 	gen4_vertex_flush(sna);
3100 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3101 
3102 	return true;
3103 }
3104 static void
gen5_render_context_switch(struct kgem * kgem,int new_mode)3105 gen5_render_context_switch(struct kgem *kgem,
3106 			   int new_mode)
3107 {
3108 	if (!kgem->nbatch)
3109 		return;
3110 
3111 	/* WaNonPipelinedStateCommandFlush
3112 	 *
3113 	 * Ironlake has a limitation that a 3D or Media command can't
3114 	 * be the first command after a BLT, unless it's
3115 	 * non-pipelined.
3116 	 *
3117 	 * We do this by ensuring that the non-pipelined drawrect
3118 	 * is always emitted first following a switch from BLT.
3119 	 */
3120 	if (kgem->mode == KGEM_BLT) {
3121 		struct sna *sna = to_sna_from_kgem(kgem);
3122 		DBG(("%s: forcing drawrect on next state emission\n",
3123 		     __FUNCTION__));
3124 		sna->render_state.gen5.drawrect_limit = -1;
3125 	}
3126 
3127 	if (kgem_ring_is_idle(kgem, kgem->ring)) {
3128 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
3129 		_kgem_submit(kgem);
3130 	}
3131 }
3132 
gen5_render_reset(struct sna * sna)3133 static void gen5_render_reset(struct sna *sna)
3134 {
3135 	sna->render_state.gen5.needs_invariant = true;
3136 	sna->render_state.gen5.ve_id = -1;
3137 	sna->render_state.gen5.last_primitive = -1;
3138 	sna->render_state.gen5.last_pipelined_pointers = 0;
3139 
3140 	sna->render_state.gen5.drawrect_offset = -1;
3141 	sna->render_state.gen5.drawrect_limit = -1;
3142 	sna->render_state.gen5.surface_table = -1;
3143 
3144 	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3145 		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3146 		discard_vbo(sna);
3147 	}
3148 
3149 	sna->render.vertex_offset = 0;
3150 	sna->render.nvertex_reloc = 0;
3151 	sna->render.vb_id = 0;
3152 }
3153 
gen5_render_fini(struct sna * sna)3154 static void gen5_render_fini(struct sna *sna)
3155 {
3156 	kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
3157 }
3158 
gen5_create_vs_unit_state(struct sna_static_stream * stream)3159 static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
3160 {
3161 	struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
3162 
3163 	/* Set up the vertex shader to be disabled (passthrough) */
3164 	vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
3165 	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
3166 	vs->vs6.vs_enable = 0;
3167 	vs->vs6.vert_cache_disable = 1;
3168 
3169 	return sna_static_stream_offsetof(stream, vs);
3170 }
3171 
gen5_create_sf_state(struct sna_static_stream * stream,uint32_t kernel)3172 static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
3173 				     uint32_t kernel)
3174 {
3175 	struct gen5_sf_unit_state *sf_state;
3176 
3177 	sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
3178 
3179 	sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
3180 	sf_state->thread0.kernel_start_pointer = kernel >> 6;
3181 
3182 	sf_state->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
3183 	sf_state->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
3184 	sf_state->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
3185 	/* don't smash vertex header, read start from dw8 */
3186 	sf_state->thread3.urb_entry_read_offset = 1;
3187 	sf_state->thread3.dispatch_grf_start_reg = 3;
3188 	sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
3189 	sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
3190 	sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
3191 	sf_state->sf5.viewport_transform = false;	/* skip viewport */
3192 	sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
3193 	sf_state->sf6.scissor = 0;
3194 	sf_state->sf7.trifan_pv = 2;
3195 	sf_state->sf6.dest_org_vbias = 0x8;
3196 	sf_state->sf6.dest_org_hbias = 0x8;
3197 
3198 	return sna_static_stream_offsetof(stream, sf_state);
3199 }
3200 
gen5_create_sampler_state(struct sna_static_stream * stream,sampler_filter_t src_filter,sampler_extend_t src_extend,sampler_filter_t mask_filter,sampler_extend_t mask_extend)3201 static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
3202 					  sampler_filter_t src_filter,
3203 					  sampler_extend_t src_extend,
3204 					  sampler_filter_t mask_filter,
3205 					  sampler_extend_t mask_extend)
3206 {
3207 	struct gen5_sampler_state *sampler_state;
3208 
3209 	sampler_state = sna_static_stream_map(stream,
3210 					      sizeof(struct gen5_sampler_state) * 2,
3211 					      32);
3212 	sampler_state_init(&sampler_state[0], src_filter, src_extend);
3213 	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
3214 
3215 	return sna_static_stream_offsetof(stream, sampler_state);
3216 }
3217 
gen5_init_wm_state(struct gen5_wm_unit_state * state,bool has_mask,uint32_t kernel,uint32_t sampler)3218 static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
3219 			       bool has_mask,
3220 			       uint32_t kernel,
3221 			       uint32_t sampler)
3222 {
3223 	state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
3224 	state->thread0.kernel_start_pointer = kernel >> 6;
3225 
3226 	state->thread1.single_program_flow = 0;
3227 
3228 	/* scratch space is not used in our kernel */
3229 	state->thread2.scratch_space_base_pointer = 0;
3230 	state->thread2.per_thread_scratch_space = 0;
3231 
3232 	state->thread3.const_urb_entry_read_length = 0;
3233 	state->thread3.const_urb_entry_read_offset = 0;
3234 
3235 	state->thread3.urb_entry_read_offset = 0;
3236 	/* wm kernel use urb from 3, see wm_program in compiler module */
3237 	state->thread3.dispatch_grf_start_reg = 3;	/* must match kernel */
3238 
3239 	state->wm4.sampler_count = 0;	/* hardware requirement */
3240 
3241 	state->wm4.sampler_state_pointer = sampler >> 5;
3242 	state->wm5.max_threads = PS_MAX_THREADS - 1;
3243 	state->wm5.transposed_urb_read = 0;
3244 	state->wm5.thread_dispatch_enable = 1;
3245 	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
3246 	 * start point
3247 	 */
3248 	state->wm5.enable_16_pix = 1;
3249 	state->wm5.enable_8_pix = 0;
3250 	state->wm5.early_depth_test = 1;
3251 
3252 	/* Each pair of attributes (src/mask coords) is two URB entries */
3253 	if (has_mask) {
3254 		state->thread1.binding_table_entry_count = 3;	/* 2 tex and fb */
3255 		state->thread3.urb_entry_read_length = 4;
3256 	} else {
3257 		state->thread1.binding_table_entry_count = 2;	/* 1 tex and fb */
3258 		state->thread3.urb_entry_read_length = 2;
3259 	}
3260 
3261 	/* binding table entry count is only used for prefetching,
3262 	 * and it has to be set 0 for Ironlake
3263 	 */
3264 	state->thread1.binding_table_entry_count = 0;
3265 }
3266 
gen5_create_cc_unit_state(struct sna_static_stream * stream)3267 static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
3268 {
3269 	uint8_t *ptr, *base;
3270 	int i, j;
3271 
3272 	base = ptr =
3273 		sna_static_stream_map(stream,
3274 				      GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
3275 				      64);
3276 
3277 	for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
3278 		for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
3279 			struct gen5_cc_unit_state *state =
3280 				(struct gen5_cc_unit_state *)ptr;
3281 
3282 			state->cc3.blend_enable =
3283 				!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
3284 
3285 			state->cc5.logicop_func = 0xc;	/* COPY */
3286 			state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
3287 
3288 			/* Fill in alpha blend factors same as color, for the future. */
3289 			state->cc5.ia_src_blend_factor = i;
3290 			state->cc5.ia_dest_blend_factor = j;
3291 
3292 			state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
3293 			state->cc6.clamp_post_alpha_blend = 1;
3294 			state->cc6.clamp_pre_alpha_blend = 1;
3295 			state->cc6.src_blend_factor = i;
3296 			state->cc6.dest_blend_factor = j;
3297 
3298 			ptr += 64;
3299 		}
3300 	}
3301 
3302 	return sna_static_stream_offsetof(stream, base);
3303 }
3304 
gen5_render_setup(struct sna * sna)3305 static bool gen5_render_setup(struct sna *sna)
3306 {
3307 	struct gen5_render_state *state = &sna->render_state.gen5;
3308 	struct sna_static_stream general;
3309 	struct gen5_wm_unit_state_padded *wm_state;
3310 	uint32_t sf[2], wm[KERNEL_COUNT];
3311 	int i, j, k, l, m;
3312 
3313 	sna_static_stream_init(&general);
3314 
3315 	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3316 	 * dumps, you know it points to zero.
3317 	 */
3318 	null_create(&general);
3319 
3320 	/* Set up the two SF states (one for blending with a mask, one without) */
3321 	sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
3322 	sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
3323 
3324 	for (m = 0; m < KERNEL_COUNT; m++) {
3325 		if (wm_kernels[m].size) {
3326 			wm[m] = sna_static_stream_add(&general,
3327 						      wm_kernels[m].data,
3328 						      wm_kernels[m].size,
3329 						      64);
3330 		} else {
3331 			wm[m] = sna_static_stream_compile_wm(sna, &general,
3332 							     wm_kernels[m].data,
3333 							     16);
3334 		}
3335 		assert(wm[m]);
3336 	}
3337 
3338 	state->vs = gen5_create_vs_unit_state(&general);
3339 
3340 	state->sf[0] = gen5_create_sf_state(&general, sf[0]);
3341 	state->sf[1] = gen5_create_sf_state(&general, sf[1]);
3342 
3343 
3344 	/* Set up the WM states: each filter/extend type for source and mask, per
3345 	 * kernel.
3346 	 */
3347 	wm_state = sna_static_stream_map(&general,
3348 					  sizeof(*wm_state) * KERNEL_COUNT *
3349 					  FILTER_COUNT * EXTEND_COUNT *
3350 					  FILTER_COUNT * EXTEND_COUNT,
3351 					  64);
3352 	state->wm = sna_static_stream_offsetof(&general, wm_state);
3353 	for (i = 0; i < FILTER_COUNT; i++) {
3354 		for (j = 0; j < EXTEND_COUNT; j++) {
3355 			for (k = 0; k < FILTER_COUNT; k++) {
3356 				for (l = 0; l < EXTEND_COUNT; l++) {
3357 					uint32_t sampler_state;
3358 
3359 					sampler_state =
3360 						gen5_create_sampler_state(&general,
3361 									  i, j,
3362 									  k, l);
3363 
3364 					for (m = 0; m < KERNEL_COUNT; m++) {
3365 						gen5_init_wm_state(&wm_state->state,
3366 								   wm_kernels[m].has_mask,
3367 								   wm[m], sampler_state);
3368 						wm_state++;
3369 					}
3370 				}
3371 			}
3372 		}
3373 	}
3374 
3375 	state->cc = gen5_create_cc_unit_state(&general);
3376 
3377 	state->general_bo = sna_static_stream_fini(sna, &general);
3378 	return state->general_bo != NULL;
3379 }
3380 
gen5_render_init(struct sna * sna,const char * backend)3381 const char *gen5_render_init(struct sna *sna, const char *backend)
3382 {
3383 	if (!gen5_render_setup(sna))
3384 		return backend;
3385 
3386 	sna->kgem.context_switch = gen5_render_context_switch;
3387 	sna->kgem.retire = gen4_render_retire;
3388 	sna->kgem.expire = gen4_render_expire;
3389 
3390 #if !NO_COMPOSITE
3391 	sna->render.composite = gen5_render_composite;
3392 	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3393 #endif
3394 #if !NO_COMPOSITE_SPANS
3395 	sna->render.check_composite_spans = gen5_check_composite_spans;
3396 	sna->render.composite_spans = gen5_render_composite_spans;
3397 	if (intel_get_device_id(sna->dev) == 0x0044)
3398 		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3399 #endif
3400 	sna->render.video = gen5_render_video;
3401 
3402 	sna->render.copy_boxes = gen5_render_copy_boxes;
3403 	sna->render.copy = gen5_render_copy;
3404 
3405 	sna->render.fill_boxes = gen5_render_fill_boxes;
3406 	sna->render.fill = gen5_render_fill;
3407 	sna->render.fill_one = gen5_render_fill_one;
3408 
3409 	sna->render.flush = gen4_render_flush;
3410 	sna->render.reset = gen5_render_reset;
3411 	sna->render.fini = gen5_render_fini;
3412 
3413 	sna->render.max_3d_size = MAX_3D_SIZE;
3414 	sna->render.max_3d_pitch = 1 << 18;
3415 	return "Ironlake (gen5)";
3416 }
3417