1 /*
2  * Copyright © 2006,2008,2011 Intel Corporation
3  * Copyright © 2007 Red Hat, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Wang Zhenyu <zhenyu.z.wang@sna.com>
26  *    Eric Anholt <eric@anholt.net>
27  *    Carl Worth <cworth@redhat.com>
28  *    Keith Packard <keithp@keithp.com>
29  *    Chris Wilson <chris@chris-wilson.co.uk>
30  *
31  */
32 
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36 
37 #include "sna.h"
38 #include "sna_reg.h"
39 #include "sna_render.h"
40 #include "sna_render_inline.h"
41 #include "sna_video.h"
42 
43 #include "brw/brw.h"
44 #include "gen4_common.h"
45 #include "gen4_render.h"
46 #include "gen4_source.h"
47 #include "gen4_vertex.h"
48 
49 /* gen4 has a serious issue with its shaders that we need to flush
50  * after every rectangle... So until that is resolved, prefer
51  * the BLT engine.
52  */
53 #define FORCE_SPANS 0
54 #define FORCE_NONRECTILINEAR_SPANS -1
55 #define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
56 
57 #define ALWAYS_FLUSH 1
58 
59 #define NO_COMPOSITE 0
60 #define NO_COMPOSITE_SPANS 0
61 #define NO_COPY 0
62 #define NO_COPY_BOXES 0
63 #define NO_FILL 0
64 #define NO_FILL_ONE 0
65 #define NO_FILL_BOXES 0
66 #define NO_VIDEO 0
67 
68 #define MAX_FLUSH_VERTICES 1 /* was 6, https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
69 
70 #define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
71 
72 /* Set up a default static partitioning of the URB, which is supposed to
73  * allow anything we would want to do, at potentially lower performance.
74  */
75 #define URB_CS_ENTRY_SIZE     1
76 #define URB_CS_ENTRIES	      0
77 
78 #define URB_VS_ENTRY_SIZE     1
79 #define URB_VS_ENTRIES	      32
80 
81 #define URB_GS_ENTRY_SIZE     0
82 #define URB_GS_ENTRIES	      0
83 
84 #define URB_CL_ENTRY_SIZE   0
85 #define URB_CL_ENTRIES      0
86 
87 #define URB_SF_ENTRY_SIZE     2
88 #define URB_SF_ENTRIES	      64
89 
90 /*
91  * this program computes dA/dx and dA/dy for the texture coordinates along
92  * with the base texture coordinate. It was extracted from the Mesa driver
93  */
94 
95 #define SF_KERNEL_NUM_GRF 16
96 #define PS_KERNEL_NUM_GRF 32
97 
98 #define GEN4_MAX_SF_THREADS 24
99 #define GEN4_MAX_WM_THREADS 32
100 #define G4X_MAX_WM_THREADS 50
101 
102 static const uint32_t ps_kernel_packed_bt601_static[][4] = {
103 #include "exa_wm_xy.g4b"
104 #include "exa_wm_src_affine.g4b"
105 #include "exa_wm_src_sample_argb.g4b"
106 #include "exa_wm_yuv_rgb_bt601.g4b"
107 #include "exa_wm_write.g4b"
108 };
109 
110 static const uint32_t ps_kernel_planar_bt601_static[][4] = {
111 #include "exa_wm_xy.g4b"
112 #include "exa_wm_src_affine.g4b"
113 #include "exa_wm_src_sample_planar.g4b"
114 #include "exa_wm_yuv_rgb_bt601.g4b"
115 #include "exa_wm_write.g4b"
116 };
117 
118 static const uint32_t ps_kernel_nv12_bt601_static[][4] = {
119 #include "exa_wm_xy.g4b"
120 #include "exa_wm_src_affine.g4b"
121 #include "exa_wm_src_sample_nv12.g4b"
122 #include "exa_wm_yuv_rgb_bt601.g4b"
123 #include "exa_wm_write.g4b"
124 };
125 
126 static const uint32_t ps_kernel_packed_bt709_static[][4] = {
127 #include "exa_wm_xy.g4b"
128 #include "exa_wm_src_affine.g4b"
129 #include "exa_wm_src_sample_argb.g4b"
130 #include "exa_wm_yuv_rgb_bt709.g4b"
131 #include "exa_wm_write.g4b"
132 };
133 
134 static const uint32_t ps_kernel_planar_bt709_static[][4] = {
135 #include "exa_wm_xy.g4b"
136 #include "exa_wm_src_affine.g4b"
137 #include "exa_wm_src_sample_planar.g4b"
138 #include "exa_wm_yuv_rgb_bt709.g4b"
139 #include "exa_wm_write.g4b"
140 };
141 
142 static const uint32_t ps_kernel_nv12_bt709_static[][4] = {
143 #include "exa_wm_xy.g4b"
144 #include "exa_wm_src_affine.g4b"
145 #include "exa_wm_src_sample_nv12.g4b"
146 #include "exa_wm_yuv_rgb_bt709.g4b"
147 #include "exa_wm_write.g4b"
148 };
149 
150 #define NOKERNEL(kernel_enum, func, masked) \
151     [kernel_enum] = {func, 0, masked}
152 #define KERNEL(kernel_enum, kernel, masked) \
153     [kernel_enum] = {&kernel, sizeof(kernel), masked}
154 static const struct wm_kernel_info {
155 	const void *data;
156 	unsigned int size;
157 	bool has_mask;
158 } wm_kernels[] = {
159 	NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
160 	NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
161 
162 	NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
163 	NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
164 
165 	NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
166 	NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
167 
168 	NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
169 	NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
170 
171 	NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
172 	NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
173 
174 	KERNEL(WM_KERNEL_VIDEO_PLANAR_BT601, ps_kernel_planar_bt601_static, false),
175 	KERNEL(WM_KERNEL_VIDEO_NV12_BT601, ps_kernel_nv12_bt601_static, false),
176 	KERNEL(WM_KERNEL_VIDEO_PACKED_BT601, ps_kernel_packed_bt601_static, false),
177 
178 	KERNEL(WM_KERNEL_VIDEO_PLANAR_BT709, ps_kernel_planar_bt709_static, false),
179 	KERNEL(WM_KERNEL_VIDEO_NV12_BT709, ps_kernel_nv12_bt709_static, false),
180 	KERNEL(WM_KERNEL_VIDEO_PACKED_BT709, ps_kernel_packed_bt709_static, false),
181 };
182 #undef KERNEL
183 
184 static const struct blendinfo {
185 	bool src_alpha;
186 	uint32_t src_blend;
187 	uint32_t dst_blend;
188 } gen4_blend_op[] = {
189 	/* Clear */	{0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
190 	/* Src */	{0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
191 	/* Dst */	{0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
192 	/* Over */	{1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
193 	/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
194 	/* In */	{0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
195 	/* InReverse */	{1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
196 	/* Out */	{0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
197 	/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
198 	/* Atop */	{1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
199 	/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
200 	/* Xor */	{1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
201 	/* Add */	{0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
202 };
203 
204 /**
205  * Highest-valued BLENDFACTOR used in gen4_blend_op.
206  *
207  * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
208  * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
209  * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
210  */
211 #define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
212 
213 #define BLEND_OFFSET(s, d) \
214 	(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
215 
216 #define SAMPLER_OFFSET(sf, se, mf, me, k) \
217 	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
218 
219 static void
220 gen4_emit_pipelined_pointers(struct sna *sna,
221 			     const struct sna_composite_op *op,
222 			     int blend, int kernel);
223 
224 #define OUT_BATCH(v) batch_emit(sna, v)
225 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
226 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
227 
228 #define GEN4_MAX_3D_SIZE 8192
229 
too_large(int width,int height)230 static inline bool too_large(int width, int height)
231 {
232 	return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
233 }
234 
235 static int
gen4_choose_composite_kernel(int op,bool has_mask,bool is_ca,bool is_affine)236 gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
237 {
238 	int base;
239 
240 	if (has_mask) {
241 		if (is_ca) {
242 			if (gen4_blend_op[op].src_alpha)
243 				base = WM_KERNEL_MASKSA;
244 			else
245 				base = WM_KERNEL_MASKCA;
246 		} else
247 			base = WM_KERNEL_MASK;
248 	} else
249 		base = WM_KERNEL;
250 
251 	return base + !is_affine;
252 }
253 
gen4_magic_ca_pass(struct sna * sna,const struct sna_composite_op * op)254 static bool gen4_magic_ca_pass(struct sna *sna,
255 			       const struct sna_composite_op *op)
256 {
257 	struct gen4_render_state *state = &sna->render_state.gen4;
258 
259 	if (!op->need_magic_ca_pass)
260 		return false;
261 
262 	assert(sna->render.vertex_index > sna->render.vertex_start);
263 
264 	DBG(("%s: CA fixup\n", __FUNCTION__));
265 	assert(op->mask.bo != NULL);
266 	assert(op->has_component_alpha);
267 
268 	gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
269 				     gen4_choose_composite_kernel(PictOpAdd,
270 								  true, true, op->is_affine));
271 
272 	OUT_BATCH(GEN4_3DPRIMITIVE |
273 		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
274 		  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
275 		  (0 << 9) |
276 		  4);
277 	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
278 	OUT_BATCH(sna->render.vertex_start);
279 	OUT_BATCH(1);	/* single instance */
280 	OUT_BATCH(0);	/* start instance location */
281 	OUT_BATCH(0);	/* index buffer offset, ignored */
282 
283 	state->last_primitive = sna->kgem.nbatch;
284 	return true;
285 }
286 
gen4_get_blend(int op,bool has_component_alpha,uint32_t dst_format)287 static uint32_t gen4_get_blend(int op,
288 			       bool has_component_alpha,
289 			       uint32_t dst_format)
290 {
291 	uint32_t src, dst;
292 
293 	src = gen4_blend_op[op].src_blend;
294 	dst = gen4_blend_op[op].dst_blend;
295 
296 	/* If there's no dst alpha channel, adjust the blend op so that we'll treat
297 	 * it as always 1.
298 	 */
299 	if (PICT_FORMAT_A(dst_format) == 0) {
300 		if (src == GEN4_BLENDFACTOR_DST_ALPHA)
301 			src = GEN4_BLENDFACTOR_ONE;
302 		else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
303 			src = GEN4_BLENDFACTOR_ZERO;
304 	}
305 
306 	/* If the source alpha is being used, then we should only be in a
307 	 * case where the source blend factor is 0, and the source blend
308 	 * value is the mask channels multiplied by the source picture's alpha.
309 	 */
310 	if (has_component_alpha && gen4_blend_op[op].src_alpha) {
311 		if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
312 			dst = GEN4_BLENDFACTOR_SRC_COLOR;
313 		else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
314 			dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
315 	}
316 
317 	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
318 	     op, dst_format, PICT_FORMAT_A(dst_format),
319 	     src, dst, BLEND_OFFSET(src, dst)));
320 	return BLEND_OFFSET(src, dst);
321 }
322 
gen4_get_card_format(PictFormat format)323 static uint32_t gen4_get_card_format(PictFormat format)
324 {
325 	switch (format) {
326 	default:
327 		return -1;
328 	case PICT_a8r8g8b8:
329 		return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
330 	case PICT_x8r8g8b8:
331 		return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
332 	case PICT_a8b8g8r8:
333 		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
334 	case PICT_x8b8g8r8:
335 		return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
336 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
337 	case PICT_a2r10g10b10:
338 		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
339 	case PICT_x2r10g10b10:
340 		return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
341 #endif
342 	case PICT_r8g8b8:
343 		return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
344 	case PICT_r5g6b5:
345 		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
346 	case PICT_a1r5g5b5:
347 		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
348 	case PICT_a8:
349 		return GEN4_SURFACEFORMAT_A8_UNORM;
350 	case PICT_a4r4g4b4:
351 		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
352 	}
353 }
354 
gen4_get_dest_format(PictFormat format)355 static uint32_t gen4_get_dest_format(PictFormat format)
356 {
357 	switch (format) {
358 	default:
359 		return -1;
360 	case PICT_a8r8g8b8:
361 	case PICT_x8r8g8b8:
362 		return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
363 	case PICT_a8b8g8r8:
364 	case PICT_x8b8g8r8:
365 		return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
366 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,900,0)
367 	case PICT_a2r10g10b10:
368 	case PICT_x2r10g10b10:
369 		return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
370 #endif
371 	case PICT_r5g6b5:
372 		return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
373 	case PICT_x1r5g5b5:
374 	case PICT_a1r5g5b5:
375 		return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
376 	case PICT_a8:
377 		return GEN4_SURFACEFORMAT_A8_UNORM;
378 	case PICT_a4r4g4b4:
379 	case PICT_x4r4g4b4:
380 		return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
381 	}
382 }
383 
gen4_check_dst_format(PictFormat format)384 static bool gen4_check_dst_format(PictFormat format)
385 {
386 	if (gen4_get_dest_format(format) != -1)
387 		return true;
388 
389 	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
390 	return false;
391 }
392 
gen4_check_format(uint32_t format)393 static bool gen4_check_format(uint32_t format)
394 {
395 	if (gen4_get_card_format(format) != -1)
396 		return true;
397 
398 	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
399 	return false;
400 }
401 
402 typedef struct gen4_surface_state_padded {
403 	struct gen4_surface_state state;
404 	char pad[32 - sizeof(struct gen4_surface_state)];
405 } gen4_surface_state_padded;
406 
null_create(struct sna_static_stream * stream)407 static void null_create(struct sna_static_stream *stream)
408 {
409 	/* A bunch of zeros useful for legacy border color and depth-stencil */
410 	sna_static_stream_map(stream, 64, 64);
411 }
412 
413 static void
sampler_state_init(struct gen4_sampler_state * sampler_state,sampler_filter_t filter,sampler_extend_t extend)414 sampler_state_init(struct gen4_sampler_state *sampler_state,
415 		   sampler_filter_t filter,
416 		   sampler_extend_t extend)
417 {
418 	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
419 
420 	/* We use the legacy mode to get the semantics specified by
421 	 * the Render extension. */
422 	sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
423 
424 	switch (filter) {
425 	default:
426 	case SAMPLER_FILTER_NEAREST:
427 		sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
428 		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
429 		break;
430 	case SAMPLER_FILTER_BILINEAR:
431 		sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
432 		sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
433 		break;
434 	}
435 
436 	switch (extend) {
437 	default:
438 	case SAMPLER_EXTEND_NONE:
439 		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
440 		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
441 		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
442 		break;
443 	case SAMPLER_EXTEND_REPEAT:
444 		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
445 		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
446 		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
447 		break;
448 	case SAMPLER_EXTEND_PAD:
449 		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
450 		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
451 		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
452 		break;
453 	case SAMPLER_EXTEND_REFLECT:
454 		sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
455 		sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
456 		sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
457 		break;
458 	}
459 }
460 
gen4_filter(uint32_t filter)461 static uint32_t gen4_filter(uint32_t filter)
462 {
463 	switch (filter) {
464 	default:
465 		assert(0);
466 	case PictFilterNearest:
467 		return SAMPLER_FILTER_NEAREST;
468 	case PictFilterBilinear:
469 		return SAMPLER_FILTER_BILINEAR;
470 	}
471 }
472 
gen4_check_filter(PicturePtr picture)473 static uint32_t gen4_check_filter(PicturePtr picture)
474 {
475 	switch (picture->filter) {
476 	case PictFilterNearest:
477 	case PictFilterBilinear:
478 		return true;
479 	default:
480 		DBG(("%s: unknown filter: %s [%d]\n",
481 		     __FUNCTION__,
482 		     PictureGetFilterName(picture->filter),
483 		     picture->filter));
484 		return false;
485 	}
486 }
487 
gen4_repeat(uint32_t repeat)488 static uint32_t gen4_repeat(uint32_t repeat)
489 {
490 	switch (repeat) {
491 	default:
492 		assert(0);
493 	case RepeatNone:
494 		return SAMPLER_EXTEND_NONE;
495 	case RepeatNormal:
496 		return SAMPLER_EXTEND_REPEAT;
497 	case RepeatPad:
498 		return SAMPLER_EXTEND_PAD;
499 	case RepeatReflect:
500 		return SAMPLER_EXTEND_REFLECT;
501 	}
502 }
503 
gen4_check_repeat(PicturePtr picture)504 static bool gen4_check_repeat(PicturePtr picture)
505 {
506 	if (!picture->repeat)
507 		return true;
508 
509 	switch (picture->repeatType) {
510 	case RepeatNone:
511 	case RepeatNormal:
512 	case RepeatPad:
513 	case RepeatReflect:
514 		return true;
515 	default:
516 		DBG(("%s: unknown repeat: %d\n",
517 		     __FUNCTION__, picture->repeatType));
518 		return false;
519 	}
520 }
521 
522 static uint32_t
gen4_tiling_bits(uint32_t tiling)523 gen4_tiling_bits(uint32_t tiling)
524 {
525 	switch (tiling) {
526 	default: assert(0);
527 	case I915_TILING_NONE: return 0;
528 	case I915_TILING_X: return GEN4_SURFACE_TILED;
529 	case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
530 	}
531 }
532 
533 /**
534  * Sets up the common fields for a surface state buffer for the given
535  * picture in the given surface state buffer.
536  */
537 static uint32_t
gen4_bind_bo(struct sna * sna,struct kgem_bo * bo,uint32_t width,uint32_t height,uint32_t format,bool is_dst)538 gen4_bind_bo(struct sna *sna,
539 	     struct kgem_bo *bo,
540 	     uint32_t width,
541 	     uint32_t height,
542 	     uint32_t format,
543 	     bool is_dst)
544 {
545 	uint32_t domains;
546 	uint16_t offset;
547 	uint32_t *ss;
548 
549 	assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
550 
551 	/* After the first bind, we manage the cache domains within the batch */
552 	offset = kgem_bo_get_binding(bo, format | is_dst << 31);
553 	if (offset) {
554 		assert(offset >= sna->kgem.surface);
555 		if (is_dst)
556 			kgem_bo_mark_dirty(bo);
557 		return offset * sizeof(uint32_t);
558 	}
559 
560 	offset = sna->kgem.surface -=
561 		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
562 	ss = sna->kgem.batch + offset;
563 
564 	ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
565 		 GEN4_SURFACE_BLEND_ENABLED |
566 		 format << GEN4_SURFACE_FORMAT_SHIFT);
567 
568 	if (is_dst) {
569 		ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
570 		domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
571 	} else
572 		domains = I915_GEM_DOMAIN_SAMPLER << 16;
573 	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
574 
575 	ss[2] = ((width - 1)  << GEN4_SURFACE_WIDTH_SHIFT |
576 		 (height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
577 	ss[3] = (gen4_tiling_bits(bo->tiling) |
578 		 (bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
579 	ss[4] = 0;
580 	ss[5] = 0;
581 
582 	kgem_bo_set_binding(bo, format | is_dst << 31, offset);
583 
584 	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
585 	     offset, bo->handle, ss[1],
586 	     format, width, height, bo->pitch, bo->tiling,
587 	     domains & 0xffff ? "render" : "sampler"));
588 
589 	return offset * sizeof(uint32_t);
590 }
591 
gen4_emit_vertex_buffer(struct sna * sna,const struct sna_composite_op * op)592 static void gen4_emit_vertex_buffer(struct sna *sna,
593 				    const struct sna_composite_op *op)
594 {
595 	int id = op->u.gen4.ve_id;
596 
597 	assert((sna->render.vb_id & (1 << id)) == 0);
598 
599 	OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
600 	OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
601 		  (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
602 	assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
603 	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
604 	OUT_BATCH(0);
605 	OUT_BATCH(0);
606 	OUT_BATCH(0);
607 
608 	sna->render.vb_id |= 1 << id;
609 }
610 
611 inline static void
gen4_emit_pipe_flush(struct sna * sna)612 gen4_emit_pipe_flush(struct sna *sna)
613 {
614 #if 1
615 	OUT_BATCH(GEN4_PIPE_CONTROL |
616 		  GEN4_PIPE_CONTROL_WC_FLUSH |
617 		  (4 - 2));
618 	OUT_BATCH(0);
619 	OUT_BATCH(0);
620 	OUT_BATCH(0);
621 #else
622 	OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
623 #endif
624 }
625 
626 inline static void
gen4_emit_pipe_break(struct sna * sna)627 gen4_emit_pipe_break(struct sna *sna)
628 {
629 #if !ALWAYS_FLUSH
630 	OUT_BATCH(GEN4_PIPE_CONTROL | (4 - 2));
631 	OUT_BATCH(0);
632 	OUT_BATCH(0);
633 	OUT_BATCH(0);
634 #else
635 	OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
636 #endif
637 }
638 
639 inline static void
gen4_emit_pipe_invalidate(struct sna * sna)640 gen4_emit_pipe_invalidate(struct sna *sna)
641 {
642 #if 0
643 	OUT_BATCH(GEN4_PIPE_CONTROL |
644 		  GEN4_PIPE_CONTROL_WC_FLUSH |
645 		  (sna->kgem.gen >= 045 ? GEN4_PIPE_CONTROL_TC_FLUSH : 0) |
646 		  (4 - 2));
647 	OUT_BATCH(0);
648 	OUT_BATCH(0);
649 	OUT_BATCH(0);
650 #else
651 	OUT_BATCH(MI_FLUSH);
652 #endif
653 }
654 
gen4_emit_primitive(struct sna * sna)655 static void gen4_emit_primitive(struct sna *sna)
656 {
657 	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
658 		sna->render.vertex_offset = sna->kgem.nbatch - 5;
659 		return;
660 	}
661 
662 	OUT_BATCH(GEN4_3DPRIMITIVE |
663 		  GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
664 		  (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
665 		  (0 << 9) |
666 		  4);
667 	sna->render.vertex_offset = sna->kgem.nbatch;
668 	OUT_BATCH(0);	/* vertex count, to be filled in later */
669 	OUT_BATCH(sna->render.vertex_index);
670 	OUT_BATCH(1);	/* single instance */
671 	OUT_BATCH(0);	/* start instance location */
672 	OUT_BATCH(0);	/* index buffer offset, ignored */
673 	sna->render.vertex_start = sna->render.vertex_index;
674 
675 	sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
676 }
677 
gen4_rectangle_begin(struct sna * sna,const struct sna_composite_op * op)678 static bool gen4_rectangle_begin(struct sna *sna,
679 				 const struct sna_composite_op *op)
680 {
681 	unsigned int id = 1 << op->u.gen4.ve_id;
682 	int ndwords;
683 
684 	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
685 		return true;
686 
687 	/* 7xpipelined pointers + 6xprimitive + 1xflush */
688 	ndwords = op->need_magic_ca_pass? 19 : 6;
689 	if ((sna->render.vb_id & id) == 0)
690 		ndwords += 5;
691 	ndwords += 8*FORCE_FLUSH;
692 
693 	if (!kgem_check_batch(&sna->kgem, ndwords))
694 		return false;
695 
696 	if ((sna->render.vb_id & id) == 0)
697 		gen4_emit_vertex_buffer(sna, op);
698 	if (sna->render.vertex_offset == 0)
699 		gen4_emit_primitive(sna);
700 
701 	return true;
702 }
703 
gen4_get_rectangles__flush(struct sna * sna,const struct sna_composite_op * op)704 static int gen4_get_rectangles__flush(struct sna *sna,
705 				      const struct sna_composite_op *op)
706 {
707 	/* Preventing discarding new vbo after lock contention */
708 	if (sna_vertex_wait__locked(&sna->render)) {
709 		int rem = vertex_space(sna);
710 		if (rem > op->floats_per_rect)
711 			return rem;
712 	}
713 
714 	if (!kgem_check_batch(&sna->kgem,
715 			      8*FORCE_FLUSH + (op->need_magic_ca_pass ? 2*19+6 : 6)))
716 		return 0;
717 	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
718 		return 0;
719 
720 	if (sna->render.vertex_offset) {
721 		gen4_vertex_flush(sna);
722 		if (gen4_magic_ca_pass(sna, op))
723 			gen4_emit_pipelined_pointers(sna, op, op->op,
724 						     op->u.gen4.wm_kernel);
725 	}
726 
727 	return gen4_vertex_finish(sna);
728 }
729 
gen4_get_rectangles(struct sna * sna,const struct sna_composite_op * op,int want,void (* emit_state)(struct sna * sna,const struct sna_composite_op * op))730 inline static int gen4_get_rectangles(struct sna *sna,
731 				      const struct sna_composite_op *op,
732 				      int want,
733 				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
734 {
735 	int rem;
736 
737 	assert(want);
738 #if FORCE_FLUSH
739 	rem = sna->render.vertex_offset;
740 	if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
741 		rem = sna->kgem.nbatch - 5;
742 	if (rem) {
743 		rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
744 		if (rem <= 0) {
745 			if (sna->render.vertex_offset) {
746 				gen4_vertex_flush(sna);
747 				if (gen4_magic_ca_pass(sna, op)) {
748 					if (kgem_check_batch(&sna->kgem, 19+6))
749 						gen4_emit_pipelined_pointers(sna, op, op->op,
750 									     op->u.gen4.wm_kernel);
751 				}
752 			}
753 			gen4_emit_pipe_break(sna);
754 			rem = MAX_FLUSH_VERTICES;
755 		}
756 	} else
757 		rem = MAX_FLUSH_VERTICES;
758 	if (want > rem)
759 		want = rem;
760 #endif
761 
762 start:
763 	rem = vertex_space(sna);
764 	if (unlikely(rem < op->floats_per_rect)) {
765 		DBG(("flushing vbo for %s: %d < %d\n",
766 		     __FUNCTION__, rem, op->floats_per_rect));
767 		rem = gen4_get_rectangles__flush(sna, op);
768 		if (unlikely(rem == 0))
769 			goto flush;
770 	}
771 
772 	if (unlikely(sna->render.vertex_offset == 0)) {
773 		if (!gen4_rectangle_begin(sna, op))
774 			goto flush;
775 		else
776 			goto start;
777 	}
778 
779 	assert(rem <= vertex_space(sna));
780 	assert(op->floats_per_rect <= rem);
781 	if (want > 1 && want * op->floats_per_rect > rem)
782 		want = rem / op->floats_per_rect;
783 
784 	sna->render.vertex_index += 3*want;
785 	return want;
786 
787 flush:
788 	if (sna->render.vertex_offset) {
789 		gen4_vertex_flush(sna);
790 		gen4_magic_ca_pass(sna, op);
791 	}
792 	sna_vertex_wait__locked(&sna->render);
793 	_kgem_submit(&sna->kgem);
794 	emit_state(sna, op);
795 	goto start;
796 }
797 
798 static uint32_t *
gen4_composite_get_binding_table(struct sna * sna,uint16_t * offset)799 gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
800 {
801 	sna->kgem.surface -=
802 		sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
803 
804 	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
805 
806 	/* Clear all surplus entries to zero in case of prefetch */
807 	*offset = sna->kgem.surface;
808 	return memset(sna->kgem.batch + sna->kgem.surface,
809 		      0, sizeof(struct gen4_surface_state_padded));
810 }
811 
812 static void
gen4_emit_urb(struct sna * sna)813 gen4_emit_urb(struct sna *sna)
814 {
815 	int urb_vs_end;
816 	int urb_gs_end;
817 	int urb_cl_end;
818 	int urb_sf_end;
819 	int urb_cs_end;
820 
821 	if (!sna->render_state.gen4.needs_urb)
822 		return;
823 
824 	urb_vs_end =              URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
825 	urb_gs_end = urb_vs_end + URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
826 	urb_cl_end = urb_gs_end + URB_CL_ENTRIES * URB_CL_ENTRY_SIZE;
827 	urb_sf_end = urb_cl_end + URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
828 	urb_cs_end = urb_sf_end + URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
829 	assert(urb_cs_end <= 256);
830 
831 	while ((sna->kgem.nbatch & 15) > 12)
832 		OUT_BATCH(MI_NOOP);
833 
834 	OUT_BATCH(GEN4_URB_FENCE |
835 		  UF0_CS_REALLOC |
836 		  UF0_SF_REALLOC |
837 		  UF0_CLIP_REALLOC |
838 		  UF0_GS_REALLOC |
839 		  UF0_VS_REALLOC |
840 		  1);
841 	OUT_BATCH(urb_cl_end << UF1_CLIP_FENCE_SHIFT |
842 		  urb_gs_end << UF1_GS_FENCE_SHIFT |
843 		  urb_vs_end << UF1_VS_FENCE_SHIFT);
844 	OUT_BATCH(urb_cs_end << UF2_CS_FENCE_SHIFT |
845 		  urb_sf_end << UF2_SF_FENCE_SHIFT);
846 
847 	/* Constant buffer state */
848 	OUT_BATCH(GEN4_CS_URB_STATE | 0);
849 	OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
850 
851 	sna->render_state.gen4.needs_urb = false;
852 }
853 
854 static void
gen4_emit_state_base_address(struct sna * sna)855 gen4_emit_state_base_address(struct sna *sna)
856 {
857 	assert(sna->render_state.gen4.general_bo->proxy == NULL);
858 	OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
859 	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
860 				 sna->kgem.nbatch,
861 				 sna->render_state.gen4.general_bo,
862 				 I915_GEM_DOMAIN_INSTRUCTION << 16,
863 				 BASE_ADDRESS_MODIFY));
864 	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
865 				 sna->kgem.nbatch,
866 				 NULL,
867 				 I915_GEM_DOMAIN_INSTRUCTION << 16,
868 				 BASE_ADDRESS_MODIFY));
869 	OUT_BATCH(0); /* media */
870 
871 	/* upper bounds, all disabled */
872 	OUT_BATCH(BASE_ADDRESS_MODIFY);
873 	OUT_BATCH(0);
874 }
875 
876 static void
gen4_emit_invariant(struct sna * sna)877 gen4_emit_invariant(struct sna *sna)
878 {
879 	assert(sna->kgem.surface == sna->kgem.batch_size);
880 
881 	if (sna->kgem.gen >= 045)
882 		OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
883 	else
884 		OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
885 
886 	OUT_BATCH(GEN4_CONSTANT_BUFFER);
887 	OUT_BATCH(0);
888 
889 	gen4_emit_state_base_address(sna);
890 
891 	sna->render_state.gen4.needs_invariant = false;
892 }
893 
894 static void
gen4_get_batch(struct sna * sna,const struct sna_composite_op * op)895 gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
896 {
897 	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
898 
899 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
900 		DBG(("%s: flushing batch: %d < %d+%d\n",
901 		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
902 		     150, 4*8));
903 		kgem_submit(&sna->kgem);
904 		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
905 	}
906 
907 	if (sna->render_state.gen4.needs_invariant)
908 		gen4_emit_invariant(sna);
909 }
910 
911 static void
gen4_align_vertex(struct sna * sna,const struct sna_composite_op * op)912 gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
913 {
914 	assert(op->floats_per_rect == 3*op->floats_per_vertex);
915 	if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
916 		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
917 		     sna->render_state.gen4.floats_per_vertex,
918 		     op->floats_per_vertex));
919 		gen4_vertex_align(sna, op);
920 		sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
921 	}
922 }
923 
924 static void
gen4_emit_binding_table(struct sna * sna,uint16_t offset)925 gen4_emit_binding_table(struct sna *sna, uint16_t offset)
926 {
927 	if (sna->render_state.gen4.surface_table == offset)
928 		return;
929 
930 	sna->render_state.gen4.surface_table = offset;
931 
932 	/* Binding table pointers */
933 	OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
934 	OUT_BATCH(0);		/* vs */
935 	OUT_BATCH(0);		/* gs */
936 	OUT_BATCH(0);		/* clip */
937 	OUT_BATCH(0);		/* sf */
938 	/* Only the PS uses the binding table */
939 	OUT_BATCH(offset*4);
940 }
941 
942 static void
gen4_emit_pipelined_pointers(struct sna * sna,const struct sna_composite_op * op,int blend,int kernel)943 gen4_emit_pipelined_pointers(struct sna *sna,
944 			     const struct sna_composite_op *op,
945 			     int blend, int kernel)
946 {
947 	uint16_t sp, bp;
948 	uint32_t key;
949 
950 	DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
951 	     __FUNCTION__, op->u.gen4.ve_id & 2,
952 	     op->src.filter, op->src.repeat,
953 	     op->mask.filter, op->mask.repeat,
954 	     kernel, blend, op->has_component_alpha, (int)op->dst.format));
955 
956 	sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
957 			    op->mask.filter, op->mask.repeat,
958 			    kernel);
959 	bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
960 
961 	DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
962 	key = sp | (uint32_t)bp << 16;
963 	if (key == sna->render_state.gen4.last_pipelined_pointers)
964 		return;
965 
966 	OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
967 	OUT_BATCH(sna->render_state.gen4.vs);
968 	OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
969 	OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
970 	OUT_BATCH(sna->render_state.gen4.sf);
971 	OUT_BATCH(sna->render_state.gen4.wm + sp);
972 	OUT_BATCH(sna->render_state.gen4.cc + bp);
973 
974 	sna->render_state.gen4.last_pipelined_pointers = key;
975 	gen4_emit_urb(sna);
976 }
977 
978 static bool
gen4_emit_drawing_rectangle(struct sna * sna,const struct sna_composite_op * op)979 gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
980 {
981 	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
982 	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
983 
984 	assert(!too_large(abs(op->dst.x), abs(op->dst.y)));
985 	assert(!too_large(op->dst.width, op->dst.height));
986 
987 	if (sna->render_state.gen4.drawrect_limit == limit &&
988 	    sna->render_state.gen4.drawrect_offset == offset)
989 		return true;
990 
991 	sna->render_state.gen4.drawrect_offset = offset;
992 	sna->render_state.gen4.drawrect_limit = limit;
993 
994 	OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
995 	OUT_BATCH(0);
996 	OUT_BATCH(limit);
997 	OUT_BATCH(offset);
998 	return false;
999 }
1000 
1001 static void
gen4_emit_vertex_elements(struct sna * sna,const struct sna_composite_op * op)1002 gen4_emit_vertex_elements(struct sna *sna,
1003 			  const struct sna_composite_op *op)
1004 {
1005 	/*
1006 	 * vertex data in vertex buffer
1007 	 *    position: (x, y)
1008 	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
1009 	 *    texture coordinate 1 if (has_mask is true): same as above
1010 	 */
1011 	struct gen4_render_state *render = &sna->render_state.gen4;
1012 	uint32_t src_format, dw;
1013 	int id = op->u.gen4.ve_id;
1014 
1015 	if (render->ve_id == id)
1016 		return;
1017 	render->ve_id = id;
1018 
1019 	/* The VUE layout
1020 	 *    dword 0-3: position (x, y, 1.0, 1.0),
1021 	 *    dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
1022 	 *    [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
1023 	 */
1024 	OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
1025 
1026 	/* x,y */
1027 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1028 		  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
1029 		  0 << VE0_OFFSET_SHIFT);
1030 	OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
1031 		  VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
1032 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
1033 		  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
1034 		  (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1035 
1036 	/* u0, v0, w0 */
1037 	/* u0, v0, w0 */
1038 	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
1039 	dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1040 	switch (id & 3) {
1041 	default:
1042 		assert(0);
1043 	case 0:
1044 		src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
1045 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1046 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1047 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1048 		break;
1049 	case 1:
1050 		src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1051 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1052 		dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1053 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1054 		break;
1055 	case 2:
1056 		src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1057 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1058 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1059 		dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1060 		break;
1061 	case 3:
1062 		src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1063 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1064 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1065 		dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1066 		break;
1067 	}
1068 	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1069 		  src_format << VE0_FORMAT_SHIFT |
1070 		  4 << VE0_OFFSET_SHIFT);
1071 	OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1072 
1073 	/* u1, v1, w1 */
1074 	if (id >> 2) {
1075 		unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
1076 		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
1077 		     id >> 2, src_offset));
1078 		dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
1079 		switch (id >> 2) {
1080 		case 1:
1081 			src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
1082 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1083 			dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
1084 			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1085 			break;
1086 		default:
1087 			assert(0);
1088 		case 2:
1089 			src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
1090 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1091 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1092 			dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
1093 			break;
1094 		case 3:
1095 			src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
1096 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
1097 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
1098 			dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
1099 			break;
1100 		}
1101 		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1102 			  src_format << VE0_FORMAT_SHIFT |
1103 			  src_offset << VE0_OFFSET_SHIFT);
1104 		OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1105 	} else {
1106 		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
1107 			  GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
1108 			  0 << VE0_OFFSET_SHIFT);
1109 		OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
1110 			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
1111 			  VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
1112 			  VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
1113 			  12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
1114 	}
1115 }
1116 
1117 static void
gen4_emit_state(struct sna * sna,const struct sna_composite_op * op,uint16_t wm_binding_table)1118 gen4_emit_state(struct sna *sna,
1119 		const struct sna_composite_op *op,
1120 		uint16_t wm_binding_table)
1121 {
1122 	bool flush;
1123 
1124 	assert(op->dst.bo->exec);
1125 
1126 	flush = wm_binding_table & 1;
1127 	wm_binding_table &= ~1;
1128 
1129 	if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1130 		DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
1131 		     kgem_bo_is_dirty(op->src.bo),
1132 		     kgem_bo_is_dirty(op->mask.bo),
1133 		     flush));
1134 		gen4_emit_pipe_invalidate(sna);
1135 		kgem_clear_dirty(&sna->kgem);
1136 		kgem_bo_mark_dirty(op->dst.bo);
1137 		flush = false;
1138 	}
1139 	flush &= gen4_emit_drawing_rectangle(sna, op);
1140 	if (flush && op->op > PictOpSrc)
1141 		gen4_emit_pipe_flush(sna);
1142 
1143 	gen4_emit_binding_table(sna, wm_binding_table);
1144 	gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
1145 	gen4_emit_vertex_elements(sna, op);
1146 }
1147 
1148 static void
gen4_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1149 gen4_bind_surfaces(struct sna *sna,
1150 		   const struct sna_composite_op *op)
1151 {
1152 	uint32_t *binding_table;
1153 	uint16_t offset, dirty;
1154 
1155 	gen4_get_batch(sna, op);
1156 	dirty = kgem_bo_is_dirty(op->dst.bo);
1157 
1158 	binding_table = gen4_composite_get_binding_table(sna, &offset);
1159 
1160 	binding_table[0] =
1161 		gen4_bind_bo(sna,
1162 			    op->dst.bo, op->dst.width, op->dst.height,
1163 			    gen4_get_dest_format(op->dst.format),
1164 			    true);
1165 	binding_table[1] =
1166 		gen4_bind_bo(sna,
1167 			     op->src.bo, op->src.width, op->src.height,
1168 			     op->src.card_format,
1169 			     false);
1170 	if (op->mask.bo) {
1171 		assert(op->u.gen4.ve_id >> 2);
1172 		binding_table[2] =
1173 			gen4_bind_bo(sna,
1174 				     op->mask.bo,
1175 				     op->mask.width,
1176 				     op->mask.height,
1177 				     op->mask.card_format,
1178 				     false);
1179 	}
1180 
1181 	if (sna->kgem.surface == offset &&
1182 	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
1183 	    (op->mask.bo == NULL ||
1184 	     sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
1185 		sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1186 		offset = sna->render_state.gen4.surface_table;
1187 	}
1188 
1189 	if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1190 		dirty = 0;
1191 
1192 	gen4_emit_state(sna, op, offset | dirty);
1193 }
1194 
1195 fastcall static void
gen4_render_composite_blt(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1196 gen4_render_composite_blt(struct sna *sna,
1197 			  const struct sna_composite_op *op,
1198 			  const struct sna_composite_rectangles *r)
1199 {
1200 	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
1201 	     __FUNCTION__,
1202 	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1203 	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1204 	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1205 	     r->width, r->height));
1206 
1207 	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1208 	op->prim_emit(sna, op, r);
1209 }
1210 
1211 fastcall static void
gen4_render_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1212 gen4_render_composite_box(struct sna *sna,
1213 			  const struct sna_composite_op *op,
1214 			  const BoxRec *box)
1215 {
1216 	struct sna_composite_rectangles r;
1217 
1218 	DBG(("  %s: (%d, %d), (%d, %d)\n",
1219 	     __FUNCTION__,
1220 	     box->x1, box->y1, box->x2, box->y2));
1221 
1222 	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
1223 
1224 	r.dst.x = box->x1;
1225 	r.dst.y = box->y1;
1226 	r.width  = box->x2 - box->x1;
1227 	r.height = box->y2 - box->y1;
1228 	r.mask = r.src = r.dst;
1229 
1230 	op->prim_emit(sna, op, &r);
1231 }
1232 
1233 static void
gen4_render_composite_boxes__blt(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1234 gen4_render_composite_boxes__blt(struct sna *sna,
1235 				 const struct sna_composite_op *op,
1236 				 const BoxRec *box, int nbox)
1237 {
1238 	DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
1239 	     __FUNCTION__, nbox, op->dst.x, op->dst.y,
1240 	     op->src.offset[0], op->src.offset[1],
1241 	     op->src.width, op->src.height,
1242 	     op->mask.offset[0], op->mask.offset[1],
1243 	     op->mask.width, op->mask.height));
1244 
1245 	do {
1246 		int nbox_this_time;
1247 
1248 		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1249 						     gen4_bind_surfaces);
1250 		nbox -= nbox_this_time;
1251 
1252 		do {
1253 			struct sna_composite_rectangles r;
1254 
1255 			DBG(("  %s: (%d, %d), (%d, %d)\n",
1256 			     __FUNCTION__,
1257 			     box->x1, box->y1, box->x2, box->y2));
1258 
1259 			r.dst.x = box->x1;
1260 			r.dst.y = box->y1;
1261 			r.width  = box->x2 - box->x1;
1262 			r.height = box->y2 - box->y1;
1263 			r.mask = r.src = r.dst;
1264 			op->prim_emit(sna, op, &r);
1265 			box++;
1266 		} while (--nbox_this_time);
1267 	} while (nbox);
1268 }
1269 
1270 static void
gen4_render_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1271 gen4_render_composite_boxes(struct sna *sna,
1272 			    const struct sna_composite_op *op,
1273 			    const BoxRec *box, int nbox)
1274 {
1275 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1276 
1277 	do {
1278 		int nbox_this_time;
1279 		float *v;
1280 
1281 		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1282 						     gen4_bind_surfaces);
1283 		assert(nbox_this_time);
1284 		nbox -= nbox_this_time;
1285 
1286 		v = sna->render.vertices + sna->render.vertex_used;
1287 		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1288 
1289 		op->emit_boxes(op, box, nbox_this_time, v);
1290 		box += nbox_this_time;
1291 	} while (nbox);
1292 }
1293 
1294 #if !FORCE_FLUSH
1295 static void
gen4_render_composite_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1296 gen4_render_composite_boxes__thread(struct sna *sna,
1297 				    const struct sna_composite_op *op,
1298 				    const BoxRec *box, int nbox)
1299 {
1300 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1301 
1302 	sna_vertex_lock(&sna->render);
1303 	do {
1304 		int nbox_this_time;
1305 		float *v;
1306 
1307 		nbox_this_time = gen4_get_rectangles(sna, op, nbox,
1308 						     gen4_bind_surfaces);
1309 		assert(nbox_this_time);
1310 		nbox -= nbox_this_time;
1311 
1312 		v = sna->render.vertices + sna->render.vertex_used;
1313 		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1314 
1315 		sna_vertex_acquire__locked(&sna->render);
1316 		sna_vertex_unlock(&sna->render);
1317 
1318 		op->emit_boxes(op, box, nbox_this_time, v);
1319 		box += nbox_this_time;
1320 
1321 		sna_vertex_lock(&sna->render);
1322 		sna_vertex_release__locked(&sna->render);
1323 	} while (nbox);
1324 	sna_vertex_unlock(&sna->render);
1325 }
1326 #endif
1327 
1328 #ifndef MAX
1329 #define MAX(a,b) ((a) > (b) ? (a) : (b))
1330 #endif
1331 
gen4_bind_video_source(struct sna * sna,struct kgem_bo * src_bo,uint32_t src_offset,int src_width,int src_height,int src_pitch,uint32_t src_surf_format)1332 static uint32_t gen4_bind_video_source(struct sna *sna,
1333 				       struct kgem_bo *src_bo,
1334 				       uint32_t src_offset,
1335 				       int src_width,
1336 				       int src_height,
1337 				       int src_pitch,
1338 				       uint32_t src_surf_format)
1339 {
1340 	struct gen4_surface_state *ss;
1341 
1342 	sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
1343 
1344 	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1345 	ss->ss0.surface_type = GEN4_SURFACE_2D;
1346 	ss->ss0.surface_format = src_surf_format;
1347 	ss->ss0.color_blend = 1;
1348 
1349 	ss->ss1.base_addr =
1350 		kgem_add_reloc(&sna->kgem,
1351 			       sna->kgem.surface + 1,
1352 			       src_bo,
1353 			       I915_GEM_DOMAIN_SAMPLER << 16,
1354 			       src_offset);
1355 
1356 	ss->ss2.width  = src_width - 1;
1357 	ss->ss2.height = src_height - 1;
1358 	ss->ss3.pitch  = src_pitch - 1;
1359 
1360 	return sna->kgem.surface * sizeof(uint32_t);
1361 }
1362 
gen4_video_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)1363 static void gen4_video_bind_surfaces(struct sna *sna,
1364 				     const struct sna_composite_op *op)
1365 {
1366 	struct sna_video_frame *frame = op->priv;
1367 	uint32_t src_surf_format[6];
1368 	uint32_t src_surf_base[6];
1369 	int src_width[6];
1370 	int src_height[6];
1371 	int src_pitch[6];
1372 	uint32_t *binding_table;
1373 	uint16_t offset, dirty;
1374 	int n_src, n;
1375 
1376 	src_surf_base[0] = 0;
1377 	src_surf_base[1] = 0;
1378 	src_surf_base[2] = frame->VBufOffset;
1379 	src_surf_base[3] = frame->VBufOffset;
1380 	src_surf_base[4] = frame->UBufOffset;
1381 	src_surf_base[5] = frame->UBufOffset;
1382 
1383 	if (is_planar_fourcc(frame->id)) {
1384 		for (n = 0; n < 2; n++) {
1385 			src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM;
1386 			src_width[n]  = frame->width;
1387 			src_height[n] = frame->height;
1388 			src_pitch[n]  = frame->pitch[1];
1389 		}
1390 		for (; n < 6; n++) {
1391 			if (is_nv12_fourcc(frame->id))
1392 				src_surf_format[n] = GEN4_SURFACEFORMAT_R8G8_UNORM;
1393 			else
1394 				src_surf_format[n] = GEN4_SURFACEFORMAT_R8_UNORM;
1395 			src_width[n]  = frame->width / 2;
1396 			src_height[n] = frame->height / 2;
1397 			src_pitch[n]  = frame->pitch[0];
1398 		}
1399 		n_src = 6;
1400 	} else {
1401 		if (frame->id == FOURCC_UYVY)
1402 			src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
1403 		else
1404 			src_surf_format[0] = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
1405 
1406 		src_width[0]  = frame->width;
1407 		src_height[0] = frame->height;
1408 		src_pitch[0]  = frame->pitch[0];
1409 		n_src = 1;
1410 	}
1411 
1412 	gen4_get_batch(sna, op);
1413 	dirty = kgem_bo_is_dirty(op->dst.bo);
1414 
1415 	binding_table = gen4_composite_get_binding_table(sna, &offset);
1416 	binding_table[0] =
1417 		gen4_bind_bo(sna,
1418 			     op->dst.bo, op->dst.width, op->dst.height,
1419 			     gen4_get_dest_format(op->dst.format),
1420 			     true);
1421 	for (n = 0; n < n_src; n++) {
1422 		binding_table[1+n] =
1423 			gen4_bind_video_source(sna,
1424 					       frame->bo,
1425 					       src_surf_base[n],
1426 					       src_width[n],
1427 					       src_height[n],
1428 					       src_pitch[n],
1429 					       src_surf_format[n]);
1430 	}
1431 
1432 	if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
1433 		dirty = 0;
1434 
1435 	gen4_emit_state(sna, op, offset | dirty);
1436 }
1437 
select_video_kernel(const struct sna_video * video,const struct sna_video_frame * frame)1438 static unsigned select_video_kernel(const struct sna_video *video,
1439 				    const struct sna_video_frame *frame)
1440 {
1441 	switch (frame->id) {
1442 	case FOURCC_YV12:
1443 	case FOURCC_I420:
1444 	case FOURCC_XVMC:
1445 		return video->colorspace ?
1446 			WM_KERNEL_VIDEO_PLANAR_BT709 :
1447 			WM_KERNEL_VIDEO_PLANAR_BT601;
1448 
1449 	case FOURCC_NV12:
1450 		return video->colorspace ?
1451 			WM_KERNEL_VIDEO_NV12_BT709 :
1452 			WM_KERNEL_VIDEO_NV12_BT601;
1453 
1454 	default:
1455 		return video->colorspace ?
1456 			WM_KERNEL_VIDEO_PACKED_BT709 :
1457 			WM_KERNEL_VIDEO_PACKED_BT601;
1458 	}
1459 }
1460 
1461 static bool
gen4_render_video(struct sna * sna,struct sna_video * video,struct sna_video_frame * frame,RegionPtr dstRegion,PixmapPtr pixmap)1462 gen4_render_video(struct sna *sna,
1463 		  struct sna_video *video,
1464 		  struct sna_video_frame *frame,
1465 		  RegionPtr dstRegion,
1466 		  PixmapPtr pixmap)
1467 {
1468 	struct sna_composite_op tmp;
1469 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1470 	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1471 	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1472 	int src_width = frame->src.x2 - frame->src.x1;
1473 	int src_height = frame->src.y2 - frame->src.y1;
1474 	float src_offset_x, src_offset_y;
1475 	float src_scale_x, src_scale_y;
1476 	const BoxRec *box;
1477 	int nbox;
1478 
1479 	DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
1480 	     src_width, src_height, dst_width, dst_height));
1481 
1482 	assert(priv->gpu_bo);
1483 	memset(&tmp, 0, sizeof(tmp));
1484 
1485 	tmp.op = PictOpSrc;
1486 	tmp.dst.pixmap = pixmap;
1487 	tmp.dst.width  = pixmap->drawable.width;
1488 	tmp.dst.height = pixmap->drawable.height;
1489 	tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
1490 	tmp.dst.bo = priv->gpu_bo;
1491 
1492 	if (src_width == dst_width && src_height == dst_height)
1493 		tmp.src.filter = SAMPLER_FILTER_NEAREST;
1494 	else
1495 		tmp.src.filter = SAMPLER_FILTER_BILINEAR;
1496 	tmp.src.repeat = SAMPLER_EXTEND_PAD;
1497 	tmp.src.bo = frame->bo;
1498 	tmp.mask.bo = NULL;
1499 	tmp.u.gen4.wm_kernel = select_video_kernel(video, frame);
1500 	tmp.u.gen4.ve_id = 2;
1501 	tmp.is_affine = true;
1502 	tmp.floats_per_vertex = 3;
1503 	tmp.floats_per_rect = 9;
1504 	tmp.priv = frame;
1505 
1506 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1507 		kgem_submit(&sna->kgem);
1508 		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1509 			return false;
1510 	}
1511 
1512 	gen4_align_vertex(sna, &tmp);
1513 	gen4_video_bind_surfaces(sna, &tmp);
1514 
1515 	src_scale_x = (float)src_width / dst_width / frame->width;
1516 	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1517 
1518 	src_scale_y = (float)src_height / dst_height / frame->height;
1519 	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1520 
1521 	box = region_rects(dstRegion);
1522 	nbox = region_num_rects(dstRegion);
1523 	do {
1524 		int n;
1525 
1526 		n = gen4_get_rectangles(sna, &tmp, nbox,
1527 					gen4_video_bind_surfaces);
1528 		assert(n);
1529 		nbox -= n;
1530 
1531 		do {
1532 			OUT_VERTEX(box->x2, box->y2);
1533 			OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1534 			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1535 
1536 			OUT_VERTEX(box->x1, box->y2);
1537 			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1538 			OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1539 
1540 			OUT_VERTEX(box->x1, box->y1);
1541 			OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1542 			OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1543 
1544 			box++;
1545 		} while (--n);
1546 	} while (nbox);
1547 	gen4_vertex_flush(sna);
1548 
1549 	if (!DAMAGE_IS_ALL(priv->gpu_damage))
1550 		sna_damage_add(&priv->gpu_damage, dstRegion);
1551 
1552 	return true;
1553 }
1554 
1555 static int
gen4_composite_picture(struct sna * sna,PicturePtr picture,struct sna_composite_channel * channel,int x,int y,int w,int h,int dst_x,int dst_y,bool precise)1556 gen4_composite_picture(struct sna *sna,
1557 		       PicturePtr picture,
1558 		       struct sna_composite_channel *channel,
1559 		       int x, int y,
1560 		       int w, int h,
1561 		       int dst_x, int dst_y,
1562 		       bool precise)
1563 {
1564 	PixmapPtr pixmap;
1565 	uint32_t color;
1566 	int16_t dx, dy;
1567 
1568 	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1569 	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1570 
1571 	channel->is_solid = false;
1572 	channel->card_format = -1;
1573 
1574 	if (sna_picture_is_solid(picture, &color))
1575 		return gen4_channel_init_solid(sna, channel, color);
1576 
1577 	if (picture->pDrawable == NULL) {
1578 		int ret;
1579 
1580 		if (picture->pSourcePict->type == SourcePictTypeLinear)
1581 			return gen4_channel_init_linear(sna, picture, channel,
1582 							x, y,
1583 							w, h,
1584 							dst_x, dst_y);
1585 
1586 		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1587 		ret = -1;
1588 		if (!precise)
1589 			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1590 								      x, y, w, h, dst_x, dst_y);
1591 		if (ret == -1)
1592 			ret = sna_render_picture_fixup(sna, picture, channel,
1593 						       x, y, w, h, dst_x, dst_y);
1594 		return ret;
1595 	}
1596 
1597 	if (picture->alphaMap) {
1598 		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1599 		return sna_render_picture_fixup(sna, picture, channel,
1600 						x, y, w, h, dst_x, dst_y);
1601 	}
1602 
1603 	if (!gen4_check_repeat(picture)) {
1604 		DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
1605 		return sna_render_picture_fixup(sna, picture, channel,
1606 						x, y, w, h, dst_x, dst_y);
1607 	}
1608 
1609 	if (!gen4_check_filter(picture)) {
1610 		DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
1611 		return sna_render_picture_fixup(sna, picture, channel,
1612 						x, y, w, h, dst_x, dst_y);
1613 	}
1614 
1615 	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1616 	channel->filter = picture->filter;
1617 
1618 	pixmap = get_drawable_pixmap(picture->pDrawable);
1619 	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1620 
1621 	x += dx + picture->pDrawable->x;
1622 	y += dy + picture->pDrawable->y;
1623 
1624 	channel->is_affine = sna_transform_is_affine(picture->transform);
1625 	if (sna_transform_is_imprecise_integer_translation(picture->transform, picture->filter, precise, &dx, &dy)) {
1626 		DBG(("%s: integer translation (%d, %d), removing\n",
1627 		     __FUNCTION__, dx, dy));
1628 		x += dx;
1629 		y += dy;
1630 		channel->transform = NULL;
1631 		channel->filter = PictFilterNearest;
1632 
1633 		if (channel->repeat &&
1634 		    (x >= 0 &&
1635 		     y >= 0 &&
1636 		     x + w <= pixmap->drawable.width &&
1637 		     y + h <= pixmap->drawable.height)) {
1638 			struct sna_pixmap *priv = sna_pixmap(pixmap);
1639 			if (priv && priv->clear) {
1640 				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
1641 				return gen4_channel_init_solid(sna, channel,
1642 							       solid_color(picture->format,
1643 									   priv->clear_color));
1644 			}
1645 		}
1646 	} else
1647 		channel->transform = picture->transform;
1648 
1649 	channel->pict_format = picture->format;
1650 	channel->card_format = gen4_get_card_format(picture->format);
1651 	if (channel->card_format == -1)
1652 		return sna_render_picture_convert(sna, picture, channel, pixmap,
1653 						  x, y, w, h, dst_x, dst_y,
1654 						  false);
1655 
1656 	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
1657 		return sna_render_picture_extract(sna, picture, channel,
1658 						  x, y, w, h, dst_x, dst_y);
1659 
1660 	return sna_render_pixmap_bo(sna, channel, pixmap,
1661 				    x, y, w, h, dst_x, dst_y);
1662 }
1663 
gen4_composite_channel_convert(struct sna_composite_channel * channel)1664 static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
1665 {
1666 	DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
1667 	     __FUNCTION__,
1668 	     channel->repeat, gen4_repeat(channel->repeat),
1669 	     channel->filter, gen4_repeat(channel->filter)));
1670 	channel->repeat = gen4_repeat(channel->repeat);
1671 	channel->filter = gen4_filter(channel->filter);
1672 	if (channel->card_format == (unsigned)-1)
1673 		channel->card_format = gen4_get_card_format(channel->pict_format);
1674 }
1675 
1676 static void
gen4_render_composite_done(struct sna * sna,const struct sna_composite_op * op)1677 gen4_render_composite_done(struct sna *sna,
1678 			   const struct sna_composite_op *op)
1679 {
1680 	DBG(("%s()\n", __FUNCTION__));
1681 
1682 	if (sna->render.vertex_offset) {
1683 		gen4_vertex_flush(sna);
1684 		gen4_magic_ca_pass(sna, op);
1685 	}
1686 
1687 	if (op->mask.bo)
1688 		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1689 	if (op->src.bo)
1690 		kgem_bo_destroy(&sna->kgem, op->src.bo);
1691 
1692 	sna_render_composite_redirect_done(sna, op);
1693 }
1694 
1695 static bool
gen4_composite_set_target(struct sna * sna,struct sna_composite_op * op,PicturePtr dst,int x,int y,int w,int h,bool partial)1696 gen4_composite_set_target(struct sna *sna,
1697 			  struct sna_composite_op *op,
1698 			  PicturePtr dst,
1699 			  int x, int y, int w, int h,
1700 			  bool partial)
1701 {
1702 	BoxRec box;
1703 	unsigned hint;
1704 
1705 	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1706 	op->dst.width  = op->dst.pixmap->drawable.width;
1707 	op->dst.height = op->dst.pixmap->drawable.height;
1708 	op->dst.format = dst->format;
1709 	if (w && h) {
1710 		box.x1 = x;
1711 		box.y1 = y;
1712 		box.x2 = x + w;
1713 		box.y2 = y + h;
1714 	} else
1715 		sna_render_picture_extents(dst, &box);
1716 
1717 	hint = PREFER_GPU | RENDER_GPU;
1718 	if (!need_tiling(sna, op->dst.width, op->dst.height))
1719 		hint |= FORCE_GPU;
1720 	if (!partial) {
1721 		hint |= IGNORE_DAMAGE;
1722 		if (w == op->dst.width && h == op->dst.height)
1723 			hint |= REPLACES;
1724 	}
1725 
1726 	op->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &box, &op->damage);
1727 	if (op->dst.bo == NULL)
1728 		return false;
1729 
1730 	if (hint & REPLACES) {
1731 		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1732 		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
1733 	}
1734 
1735 	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1736 			    &op->dst.x, &op->dst.y);
1737 
1738 	DBG(("%s: pixmap=%ld, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1739 	     __FUNCTION__,
1740 	     op->dst.pixmap->drawable.serialNumber, (int)op->dst.format,
1741 	     op->dst.width, op->dst.height,
1742 	     op->dst.bo->pitch,
1743 	     op->dst.x, op->dst.y,
1744 	     op->damage ? *op->damage : (void *)-1));
1745 
1746 	assert(op->dst.bo->proxy == NULL);
1747 
1748 	if (too_large(op->dst.width, op->dst.height) &&
1749 	    !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
1750 		return false;
1751 
1752 	return true;
1753 }
1754 
1755 static bool
check_gradient(PicturePtr picture,bool precise)1756 check_gradient(PicturePtr picture, bool precise)
1757 {
1758 	switch (picture->pSourcePict->type) {
1759 	case SourcePictTypeSolidFill:
1760 	case SourcePictTypeLinear:
1761 		return false;
1762 	default:
1763 		return precise;
1764 	}
1765 }
1766 
1767 static bool
has_alphamap(PicturePtr p)1768 has_alphamap(PicturePtr p)
1769 {
1770 	return p->alphaMap != NULL;
1771 }
1772 
1773 static bool
need_upload(struct sna * sna,PicturePtr p)1774 need_upload(struct sna *sna, PicturePtr p)
1775 {
1776 	return p->pDrawable && untransformed(p) &&
1777 		!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
1778 }
1779 
1780 static bool
source_is_busy(PixmapPtr pixmap)1781 source_is_busy(PixmapPtr pixmap)
1782 {
1783 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1784 	if (priv == NULL)
1785 		return false;
1786 
1787 	if (priv->clear)
1788 		return false;
1789 
1790 	if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
1791 		return true;
1792 
1793 	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
1794 		return true;
1795 
1796 	return priv->gpu_damage && !priv->cpu_damage;
1797 }
1798 
1799 static bool
source_fallback(struct sna * sna,PicturePtr p,PixmapPtr pixmap,bool precise)1800 source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
1801 {
1802 	if (sna_picture_is_solid(p, NULL))
1803 		return false;
1804 
1805 	if (p->pSourcePict)
1806 		return check_gradient(p, precise);
1807 
1808 	if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
1809 		return true;
1810 
1811 	/* soft errors: perfer to upload/compute rather than readback */
1812 	if (pixmap && source_is_busy(pixmap))
1813 		return false;
1814 
1815 	return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
1816 }
1817 
1818 static bool
gen4_composite_fallback(struct sna * sna,PicturePtr src,PicturePtr mask,PicturePtr dst)1819 gen4_composite_fallback(struct sna *sna,
1820 			PicturePtr src,
1821 			PicturePtr mask,
1822 			PicturePtr dst)
1823 {
1824 	PixmapPtr src_pixmap;
1825 	PixmapPtr mask_pixmap;
1826 	PixmapPtr dst_pixmap;
1827 	bool src_fallback, mask_fallback;
1828 
1829 	if (!gen4_check_dst_format(dst->format)) {
1830 		DBG(("%s: unknown destination format: %d\n",
1831 		     __FUNCTION__, dst->format));
1832 		return true;
1833 	}
1834 
1835 	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
1836 
1837 	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1838 	src_fallback = source_fallback(sna, src, src_pixmap,
1839 				       dst->polyMode == PolyModePrecise);
1840 
1841 	if (mask) {
1842 		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1843 		mask_fallback = source_fallback(sna, mask, mask_pixmap,
1844 						dst->polyMode == PolyModePrecise);
1845 	} else {
1846 		mask_pixmap = NULL;
1847 		mask_fallback = false;
1848 	}
1849 
1850 	/* If we are using the destination as a source and need to
1851 	 * readback in order to upload the source, do it all
1852 	 * on the cpu.
1853 	 */
1854 	if (src_pixmap == dst_pixmap && src_fallback) {
1855 		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1856 		return true;
1857 	}
1858 	if (mask_pixmap == dst_pixmap && mask_fallback) {
1859 		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
1860 		return true;
1861 	}
1862 
1863 	/* If anything is on the GPU, push everything out to the GPU */
1864 	if (dst_use_gpu(dst_pixmap)) {
1865 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
1866 		     __FUNCTION__));
1867 		return false;
1868 	}
1869 
1870 	if (src_pixmap && !src_fallback) {
1871 		DBG(("%s: src is already on the GPU, try to use GPU\n",
1872 		     __FUNCTION__));
1873 		return false;
1874 	}
1875 	if (mask_pixmap && !mask_fallback) {
1876 		DBG(("%s: mask is already on the GPU, try to use GPU\n",
1877 		     __FUNCTION__));
1878 		return false;
1879 	}
1880 
1881 	/* However if the dst is not on the GPU and we need to
1882 	 * render one of the sources using the CPU, we may
1883 	 * as well do the entire operation in place onthe CPU.
1884 	 */
1885 	if (src_fallback) {
1886 		DBG(("%s: dst is on the CPU and src will fallback\n",
1887 		     __FUNCTION__));
1888 		return true;
1889 	}
1890 
1891 	if (mask_fallback) {
1892 		DBG(("%s: dst is on the CPU and mask will fallback\n",
1893 		     __FUNCTION__));
1894 		return true;
1895 	}
1896 
1897 	if (too_large(dst_pixmap->drawable.width,
1898 		      dst_pixmap->drawable.height) &&
1899 	    dst_is_cpu(dst_pixmap)) {
1900 		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
1901 		return true;
1902 	}
1903 
1904 	DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
1905 	     __FUNCTION__));
1906 	return dst_use_cpu(dst_pixmap);
1907 }
1908 
1909 static int
reuse_source(struct sna * sna,PicturePtr src,struct sna_composite_channel * sc,int src_x,int src_y,PicturePtr mask,struct sna_composite_channel * mc,int msk_x,int msk_y)1910 reuse_source(struct sna *sna,
1911 	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
1912 	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
1913 {
1914 	uint32_t color;
1915 
1916 	if (src_x != msk_x || src_y != msk_y)
1917 		return false;
1918 
1919 	if (src == mask) {
1920 		DBG(("%s: mask is source\n", __FUNCTION__));
1921 		*mc = *sc;
1922 		mc->bo = kgem_bo_reference(mc->bo);
1923 		return true;
1924 	}
1925 
1926 	if (sna_picture_is_solid(mask, &color))
1927 		return gen4_channel_init_solid(sna, mc, color);
1928 
1929 	if (sc->is_solid)
1930 		return false;
1931 
1932 	if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
1933 		return false;
1934 
1935 	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
1936 
1937 	if (!sna_transform_equal(src->transform, mask->transform))
1938 		return false;
1939 
1940 	if (!sna_picture_alphamap_equal(src, mask))
1941 		return false;
1942 
1943 	if (!gen4_check_repeat(mask))
1944 		return false;
1945 
1946 	if (!gen4_check_filter(mask))
1947 		return false;
1948 
1949 	if (!gen4_check_format(mask->format))
1950 		return false;
1951 
1952 	DBG(("%s: reusing source channel for mask with a twist\n",
1953 	     __FUNCTION__));
1954 
1955 	*mc = *sc;
1956 	mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
1957 	mc->filter = gen4_filter(mask->filter);
1958 	mc->pict_format = mask->format;
1959 	mc->card_format = gen4_get_card_format(mask->format);
1960 	mc->bo = kgem_bo_reference(mc->bo);
1961 	return true;
1962 }
1963 
1964 static bool
gen4_render_composite(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr mask,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t msk_x,int16_t msk_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)1965 gen4_render_composite(struct sna *sna,
1966 		      uint8_t op,
1967 		      PicturePtr src,
1968 		      PicturePtr mask,
1969 		      PicturePtr dst,
1970 		      int16_t src_x, int16_t src_y,
1971 		      int16_t msk_x, int16_t msk_y,
1972 		      int16_t dst_x, int16_t dst_y,
1973 		      int16_t width, int16_t height,
1974 		      unsigned flags,
1975 		      struct sna_composite_op *tmp)
1976 {
1977 	DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1978 	     width, height, sna->kgem.mode));
1979 
1980 	if (op >= ARRAY_SIZE(gen4_blend_op))
1981 		return false;
1982 
1983 	if (mask == NULL &&
1984 	    sna_blt_composite(sna, op,
1985 			      src, dst,
1986 			      src_x, src_y,
1987 			      dst_x, dst_y,
1988 			      width, height,
1989 			      flags, tmp))
1990 		return true;
1991 
1992 	if (gen4_composite_fallback(sna, src, mask, dst))
1993 		goto fallback;
1994 
1995 	if (need_tiling(sna, width, height))
1996 		return sna_tiling_composite(op, src, mask, dst,
1997 					    src_x, src_y,
1998 					    msk_x, msk_y,
1999 					    dst_x, dst_y,
2000 					    width, height,
2001 					    tmp);
2002 
2003 	if (!gen4_composite_set_target(sna, tmp, dst,
2004 				       dst_x, dst_y, width, height,
2005 				       flags & COMPOSITE_PARTIAL || op > PictOpSrc)) {
2006 		DBG(("%s: failed to set composite target\n", __FUNCTION__));
2007 		goto fallback;
2008 	}
2009 
2010 	tmp->op = op;
2011 	switch (gen4_composite_picture(sna, src, &tmp->src,
2012 				       src_x, src_y,
2013 				       width, height,
2014 				       dst_x, dst_y,
2015 				       dst->polyMode == PolyModePrecise)) {
2016 	case -1:
2017 		DBG(("%s: failed to prepare source\n", __FUNCTION__));
2018 		goto cleanup_dst;
2019 	case 0:
2020 		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
2021 			goto cleanup_dst;
2022 		/* fall through */
2023 	case 1:
2024 		if (mask == NULL &&
2025 		    sna_blt_composite__convert(sna,
2026 					       dst_x, dst_y, width, height,
2027 					       tmp))
2028 			return true;
2029 
2030 		gen4_composite_channel_convert(&tmp->src);
2031 		break;
2032 	}
2033 
2034 	tmp->is_affine = tmp->src.is_affine;
2035 	tmp->has_component_alpha = false;
2036 	tmp->need_magic_ca_pass = false;
2037 
2038 	if (mask) {
2039 		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2040 			tmp->has_component_alpha = true;
2041 
2042 			/* Check if it's component alpha that relies on a source alpha and on
2043 			 * the source value.  We can only get one of those into the single
2044 			 * source value that we get to blend with.
2045 			 */
2046 			if (gen4_blend_op[op].src_alpha &&
2047 			    (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
2048 				if (op != PictOpOver) {
2049 					DBG(("%s -- fallback: unhandled component alpha blend\n",
2050 					     __FUNCTION__));
2051 
2052 					goto cleanup_src;
2053 				}
2054 
2055 				tmp->need_magic_ca_pass = true;
2056 				tmp->op = PictOpOutReverse;
2057 			}
2058 		}
2059 
2060 		if (!reuse_source(sna,
2061 				  src, &tmp->src, src_x, src_y,
2062 				  mask, &tmp->mask, msk_x, msk_y)) {
2063 			switch (gen4_composite_picture(sna, mask, &tmp->mask,
2064 						       msk_x, msk_y,
2065 						       width, height,
2066 						       dst_x, dst_y,
2067 						       dst->polyMode == PolyModePrecise)) {
2068 			case -1:
2069 				DBG(("%s: failed to prepare mask\n", __FUNCTION__));
2070 				goto cleanup_src;
2071 			case 0:
2072 				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
2073 					goto cleanup_src;
2074 				/* fall through */
2075 			case 1:
2076 				gen4_composite_channel_convert(&tmp->mask);
2077 				break;
2078 			}
2079 		}
2080 
2081 		tmp->is_affine &= tmp->mask.is_affine;
2082 	}
2083 
2084 	tmp->u.gen4.wm_kernel =
2085 		gen4_choose_composite_kernel(tmp->op,
2086 					     tmp->mask.bo != NULL,
2087 					     tmp->has_component_alpha,
2088 					     tmp->is_affine);
2089 	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
2090 
2091 	tmp->blt   = gen4_render_composite_blt;
2092 	tmp->box   = gen4_render_composite_box;
2093 	tmp->boxes = gen4_render_composite_boxes__blt;
2094 	if (tmp->emit_boxes) {
2095 		tmp->boxes = gen4_render_composite_boxes;
2096 #if !FORCE_FLUSH
2097 		tmp->thread_boxes = gen4_render_composite_boxes__thread;
2098 #endif
2099 	}
2100 	tmp->done  = gen4_render_composite_done;
2101 
2102 	if (!kgem_check_bo(&sna->kgem,
2103 			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2104 			   NULL)) {
2105 		kgem_submit(&sna->kgem);
2106 		if (!kgem_check_bo(&sna->kgem,
2107 				     tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2108 				     NULL))
2109 			goto cleanup_mask;
2110 	}
2111 
2112 	gen4_align_vertex(sna, tmp);
2113 	gen4_bind_surfaces(sna, tmp);
2114 	return true;
2115 
2116 cleanup_mask:
2117 	if (tmp->mask.bo) {
2118 		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2119 		tmp->mask.bo = NULL;
2120 	}
2121 cleanup_src:
2122 	if (tmp->src.bo) {
2123 		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2124 		tmp->src.bo = NULL;
2125 	}
2126 cleanup_dst:
2127 	if (tmp->redirect.real_bo) {
2128 		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2129 		tmp->redirect.real_bo = NULL;
2130 	}
2131 fallback:
2132 	return (mask == NULL &&
2133 		sna_blt_composite(sna, op,
2134 				  src, dst,
2135 				  src_x, src_y,
2136 				  dst_x, dst_y,
2137 				  width, height,
2138 				  flags | COMPOSITE_FALLBACK, tmp));
2139 }
2140 
2141 #if !NO_COMPOSITE_SPANS
2142 fastcall static void
gen4_render_composite_spans_box(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,float opacity)2143 gen4_render_composite_spans_box(struct sna *sna,
2144 				const struct sna_composite_spans_op *op,
2145 				const BoxRec *box, float opacity)
2146 {
2147 	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2148 	     __FUNCTION__,
2149 	     op->base.src.offset[0], op->base.src.offset[1],
2150 	     opacity,
2151 	     op->base.dst.x, op->base.dst.y,
2152 	     box->x1, box->y1,
2153 	     box->x2 - box->x1,
2154 	     box->y2 - box->y1));
2155 
2156 	gen4_get_rectangles(sna, &op->base, 1, gen4_bind_surfaces);
2157 	op->prim_emit(sna, op, box, opacity);
2158 }
2159 
2160 static void
gen4_render_composite_spans_boxes(struct sna * sna,const struct sna_composite_spans_op * op,const BoxRec * box,int nbox,float opacity)2161 gen4_render_composite_spans_boxes(struct sna *sna,
2162 				  const struct sna_composite_spans_op *op,
2163 				  const BoxRec *box, int nbox,
2164 				  float opacity)
2165 {
2166 	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2167 	     __FUNCTION__, nbox,
2168 	     op->base.src.offset[0], op->base.src.offset[1],
2169 	     opacity,
2170 	     op->base.dst.x, op->base.dst.y));
2171 
2172 	do {
2173 		int nbox_this_time;
2174 
2175 		nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2176 						     gen4_bind_surfaces);
2177 		nbox -= nbox_this_time;
2178 
2179 		do {
2180 			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2181 			     box->x1, box->y1,
2182 			     box->x2 - box->x1,
2183 			     box->y2 - box->y1));
2184 
2185 			op->prim_emit(sna, op, box++, opacity);
2186 		} while (--nbox_this_time);
2187 	} while (nbox);
2188 }
2189 
2190 fastcall static void
gen4_render_composite_spans_boxes__thread(struct sna * sna,const struct sna_composite_spans_op * op,const struct sna_opacity_box * box,int nbox)2191 gen4_render_composite_spans_boxes__thread(struct sna *sna,
2192 					  const struct sna_composite_spans_op *op,
2193 					  const struct sna_opacity_box *box,
2194 					  int nbox)
2195 {
2196 	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2197 	     __FUNCTION__, nbox,
2198 	     op->base.src.offset[0], op->base.src.offset[1],
2199 	     op->base.dst.x, op->base.dst.y));
2200 	assert(nbox);
2201 
2202 	sna_vertex_lock(&sna->render);
2203 	do {
2204 		int nbox_this_time;
2205 		float *v;
2206 
2207 		nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
2208 						     gen4_bind_surfaces);
2209 		assert(nbox_this_time);
2210 		nbox -= nbox_this_time;
2211 
2212 		v = sna->render.vertices + sna->render.vertex_used;
2213 		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2214 
2215 		sna_vertex_acquire__locked(&sna->render);
2216 		sna_vertex_unlock(&sna->render);
2217 
2218 		op->emit_boxes(op, box, nbox_this_time, v);
2219 		box += nbox_this_time;
2220 
2221 		sna_vertex_lock(&sna->render);
2222 		sna_vertex_release__locked(&sna->render);
2223 	} while (nbox);
2224 	sna_vertex_unlock(&sna->render);
2225 }
2226 
2227 fastcall static void
gen4_render_composite_spans_done(struct sna * sna,const struct sna_composite_spans_op * op)2228 gen4_render_composite_spans_done(struct sna *sna,
2229 				 const struct sna_composite_spans_op *op)
2230 {
2231 	if (sna->render.vertex_offset)
2232 		gen4_vertex_flush(sna);
2233 
2234 	DBG(("%s()\n", __FUNCTION__));
2235 
2236 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2237 	sna_render_composite_redirect_done(sna, &op->base);
2238 }
2239 
2240 static bool
gen4_check_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t width,int16_t height,unsigned flags)2241 gen4_check_composite_spans(struct sna *sna,
2242 			   uint8_t op, PicturePtr src, PicturePtr dst,
2243 			   int16_t width, int16_t height,
2244 			   unsigned flags)
2245 {
2246 	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2247 	     __FUNCTION__, op, width, height, flags));
2248 
2249 	if (op >= ARRAY_SIZE(gen4_blend_op))
2250 		return false;
2251 
2252 	if (gen4_composite_fallback(sna, src, NULL, dst)) {
2253 		DBG(("%s: operation would fallback\n", __FUNCTION__));
2254 		return false;
2255 	}
2256 
2257 	if (need_tiling(sna, width, height) &&
2258 	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2259 		DBG(("%s: fallback, tiled operation not on GPU\n",
2260 		     __FUNCTION__));
2261 		return false;
2262 	}
2263 
2264 	if (FORCE_SPANS)
2265 		return FORCE_SPANS > 0;
2266 
2267 	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2268 		struct sna_pixmap *priv;
2269 
2270 		if (FORCE_NONRECTILINEAR_SPANS)
2271 			return FORCE_NONRECTILINEAR_SPANS > 0;
2272 
2273 		if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0)
2274 			return false;
2275 
2276 		priv = sna_pixmap_from_drawable(dst->pDrawable);
2277 		assert(priv);
2278 
2279 		if (priv->cpu_bo &&
2280 		    __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo))
2281 			return true;
2282 
2283 		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2284 			return false;
2285 
2286 		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2287 	}
2288 
2289 	return true;
2290 }
2291 
2292 static bool
gen4_render_composite_spans(struct sna * sna,uint8_t op,PicturePtr src,PicturePtr dst,int16_t src_x,int16_t src_y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_spans_op * tmp)2293 gen4_render_composite_spans(struct sna *sna,
2294 			    uint8_t op,
2295 			    PicturePtr src,
2296 			    PicturePtr dst,
2297 			    int16_t src_x,  int16_t src_y,
2298 			    int16_t dst_x,  int16_t dst_y,
2299 			    int16_t width,  int16_t height,
2300 			    unsigned flags,
2301 			    struct sna_composite_spans_op *tmp)
2302 {
2303 	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2304 	     width, height, flags, sna->kgem.ring));
2305 
2306 	assert(gen4_check_composite_spans(sna, op, src, dst, width, height, flags));
2307 
2308 	if (need_tiling(sna, width, height)) {
2309 		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2310 		     __FUNCTION__, width, height));
2311 		return sna_tiling_composite_spans(op, src, dst,
2312 						  src_x, src_y, dst_x, dst_y,
2313 						  width, height, flags, tmp);
2314 	}
2315 
2316 	tmp->base.op = op;
2317 	if (!gen4_composite_set_target(sna, &tmp->base, dst,
2318 				       dst_x, dst_y, width, height, true))
2319 		return false;
2320 
2321 	switch (gen4_composite_picture(sna, src, &tmp->base.src,
2322 				       src_x, src_y,
2323 				       width, height,
2324 				       dst_x, dst_y,
2325 				       dst->polyMode == PolyModePrecise)) {
2326 	case -1:
2327 		goto cleanup_dst;
2328 	case 0:
2329 		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2330 			goto cleanup_dst;
2331 		/* fall through */
2332 	case 1:
2333 		gen4_composite_channel_convert(&tmp->base.src);
2334 		break;
2335 	}
2336 
2337 	tmp->base.mask.bo = NULL;
2338 	tmp->base.mask.filter = SAMPLER_FILTER_NEAREST;
2339 	tmp->base.mask.repeat = SAMPLER_EXTEND_NONE;
2340 
2341 	tmp->base.is_affine = tmp->base.src.is_affine;
2342 	tmp->base.has_component_alpha = false;
2343 	tmp->base.need_magic_ca_pass = false;
2344 
2345 	tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp);
2346 	tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
2347 
2348 	tmp->box   = gen4_render_composite_spans_box;
2349 	tmp->boxes = gen4_render_composite_spans_boxes;
2350 	if (tmp->emit_boxes)
2351 		tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
2352 	tmp->done  = gen4_render_composite_spans_done;
2353 
2354 	if (!kgem_check_bo(&sna->kgem,
2355 			   tmp->base.dst.bo, tmp->base.src.bo,
2356 			   NULL))  {
2357 		kgem_submit(&sna->kgem);
2358 		if (!kgem_check_bo(&sna->kgem,
2359 				   tmp->base.dst.bo, tmp->base.src.bo,
2360 				   NULL))
2361 			goto cleanup_src;
2362 	}
2363 
2364 	gen4_align_vertex(sna, &tmp->base);
2365 	gen4_bind_surfaces(sna, &tmp->base);
2366 	return true;
2367 
2368 cleanup_src:
2369 	if (tmp->base.src.bo)
2370 		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2371 cleanup_dst:
2372 	if (tmp->base.redirect.real_bo)
2373 		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2374 	return false;
2375 }
2376 #endif
2377 
2378 static void
gen4_copy_bind_surfaces(struct sna * sna,const struct sna_composite_op * op)2379 gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
2380 {
2381 	uint32_t *binding_table;
2382 	uint16_t offset, dirty;
2383 
2384 	gen4_get_batch(sna, op);
2385 	dirty = kgem_bo_is_dirty(op->dst.bo);
2386 
2387 	binding_table = gen4_composite_get_binding_table(sna, &offset);
2388 
2389 	binding_table[0] =
2390 		gen4_bind_bo(sna,
2391 			     op->dst.bo, op->dst.width, op->dst.height,
2392 			     gen4_get_dest_format(op->dst.format),
2393 			     true);
2394 	binding_table[1] =
2395 		gen4_bind_bo(sna,
2396 			     op->src.bo, op->src.width, op->src.height,
2397 			     op->src.card_format,
2398 			     false);
2399 
2400 	if (sna->kgem.surface == offset &&
2401 	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
2402 		sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
2403 		offset = sna->render_state.gen4.surface_table;
2404 	}
2405 
2406 	if (!ALWAYS_FLUSH && sna->kgem.batch[sna->render_state.gen4.surface_table] == binding_table[0])
2407 		dirty = 0;
2408 
2409 	gen4_emit_state(sna, op, offset | dirty);
2410 }
2411 
2412 static void
gen4_render_copy_one(struct sna * sna,const struct sna_composite_op * op,int sx,int sy,int w,int h,int dx,int dy)2413 gen4_render_copy_one(struct sna *sna,
2414 		     const struct sna_composite_op *op,
2415 		     int sx, int sy,
2416 		     int w, int h,
2417 		     int dx, int dy)
2418 {
2419 	gen4_get_rectangles(sna, op, 1, gen4_copy_bind_surfaces);
2420 
2421 	OUT_VERTEX(dx+w, dy+h);
2422 	OUT_VERTEX_F((sx+w)*op->src.scale[0]);
2423 	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2424 
2425 	OUT_VERTEX(dx, dy+h);
2426 	OUT_VERTEX_F(sx*op->src.scale[0]);
2427 	OUT_VERTEX_F((sy+h)*op->src.scale[1]);
2428 
2429 	OUT_VERTEX(dx, dy);
2430 	OUT_VERTEX_F(sx*op->src.scale[0]);
2431 	OUT_VERTEX_F(sy*op->src.scale[1]);
2432 }
2433 
2434 static bool
gen4_render_copy_boxes(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,unsigned flags)2435 gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
2436 		       const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2437 		       const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2438 		       const BoxRec *box, int n, unsigned flags)
2439 {
2440 	struct sna_composite_op tmp;
2441 
2442 	DBG(("%s x %d\n", __FUNCTION__, n));
2443 
2444 	if (sna_blt_compare_depth(src, dst) &&
2445 	    sna_blt_copy_boxes(sna, alu,
2446 			       src_bo, src_dx, src_dy,
2447 			       dst_bo, dst_dx, dst_dy,
2448 			       dst->bitsPerPixel,
2449 			       box, n))
2450 		return true;
2451 
2452 	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
2453 fallback_blt:
2454 		if (!sna_blt_compare_depth(src, dst))
2455 			return false;
2456 
2457 		return sna_blt_copy_boxes_fallback(sna, alu,
2458 						   src, src_bo, src_dx, src_dy,
2459 						   dst, dst_bo, dst_dx, dst_dy,
2460 						   box, n);
2461 	}
2462 
2463 	memset(&tmp, 0, sizeof(tmp));
2464 
2465 	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
2466 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
2467 
2468 	if (dst->depth == src->depth) {
2469 		tmp.dst.format = sna_render_format_for_depth(dst->depth);
2470 		tmp.src.pict_format = tmp.dst.format;
2471 	} else {
2472 		tmp.dst.format = sna_format_for_depth(dst->depth);
2473 		tmp.src.pict_format = sna_format_for_depth(src->depth);
2474 	}
2475 	if (!gen4_check_format(tmp.src.pict_format))
2476 		goto fallback_blt;
2477 
2478 	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2479 
2480 	tmp.dst.pixmap = (PixmapPtr)dst;
2481 	tmp.dst.width  = dst->width;
2482 	tmp.dst.height = dst->height;
2483 	tmp.dst.x = tmp.dst.y = 0;
2484 	tmp.dst.bo = dst_bo;
2485 	tmp.damage = NULL;
2486 
2487 	sna_render_composite_redirect_init(&tmp);
2488 	if (too_large(tmp.dst.width, tmp.dst.height)) {
2489 		BoxRec extents = box[0];
2490 		int i;
2491 
2492 		for (i = 1; i < n; i++) {
2493 			if (box[i].x1 < extents.x1)
2494 				extents.x1 = box[i].x1;
2495 			if (box[i].y1 < extents.y1)
2496 				extents.y1 = box[i].y1;
2497 
2498 			if (box[i].x2 > extents.x2)
2499 				extents.x2 = box[i].x2;
2500 			if (box[i].y2 > extents.y2)
2501 				extents.y2 = box[i].y2;
2502 		}
2503 		if (!sna_render_composite_redirect(sna, &tmp,
2504 						   extents.x1 + dst_dx,
2505 						   extents.y1 + dst_dy,
2506 						   extents.x2 - extents.x1,
2507 						   extents.y2 - extents.y1,
2508 						   n > 1))
2509 			goto fallback_tiled;
2510 	}
2511 
2512 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
2513 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
2514 	tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format);
2515 	if (too_large(src->width, src->height)) {
2516 		BoxRec extents = box[0];
2517 		int i;
2518 
2519 		for (i = 1; i < n; i++) {
2520 			if (box[i].x1 < extents.x1)
2521 				extents.x1 = box[i].x1;
2522 			if (box[i].y1 < extents.y1)
2523 				extents.y1 = box[i].y1;
2524 
2525 			if (box[i].x2 > extents.x2)
2526 				extents.x2 = box[i].x2;
2527 			if (box[i].y2 > extents.y2)
2528 				extents.y2 = box[i].y2;
2529 		}
2530 
2531 		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2532 					       extents.x1 + src_dx,
2533 					       extents.y1 + src_dy,
2534 					       extents.x2 - extents.x1,
2535 					       extents.y2 - extents.y1))
2536 			goto fallback_tiled_dst;
2537 	} else {
2538 		tmp.src.bo = kgem_bo_reference(src_bo);
2539 		tmp.src.width  = src->width;
2540 		tmp.src.height = src->height;
2541 		tmp.src.offset[0] = tmp.src.offset[1] = 0;
2542 		tmp.src.scale[0] = 1.f/src->width;
2543 		tmp.src.scale[1] = 1.f/src->height;
2544 	}
2545 
2546 	tmp.is_affine = true;
2547 	tmp.floats_per_vertex = 3;
2548 	tmp.floats_per_rect = 9;
2549 	tmp.u.gen4.wm_kernel = WM_KERNEL;
2550 	tmp.u.gen4.ve_id = 2;
2551 
2552 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2553 		kgem_submit(&sna->kgem);
2554 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2555 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2556 			if (tmp.redirect.real_bo)
2557 				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2558 
2559 			goto fallback_blt;
2560 		}
2561 	}
2562 
2563 	dst_dx += tmp.dst.x;
2564 	dst_dy += tmp.dst.y;
2565 	tmp.dst.x = tmp.dst.y = 0;
2566 
2567 	src_dx += tmp.src.offset[0];
2568 	src_dy += tmp.src.offset[1];
2569 
2570 	gen4_align_vertex(sna, &tmp);
2571 	gen4_copy_bind_surfaces(sna, &tmp);
2572 
2573 	do {
2574 		gen4_render_copy_one(sna, &tmp,
2575 				     box->x1 + src_dx, box->y1 + src_dy,
2576 				     box->x2 - box->x1, box->y2 - box->y1,
2577 				     box->x1 + dst_dx, box->y1 + dst_dy);
2578 		box++;
2579 	} while (--n);
2580 
2581 	gen4_vertex_flush(sna);
2582 	sna_render_composite_redirect_done(sna, &tmp);
2583 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2584 	return true;
2585 
2586 fallback_tiled_dst:
2587 	if (tmp.redirect.real_bo)
2588 		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2589 fallback_tiled:
2590 	if (sna_blt_compare_depth(src, dst) &&
2591 	    sna_blt_copy_boxes(sna, alu,
2592 			       src_bo, src_dx, src_dy,
2593 			       dst_bo, dst_dx, dst_dy,
2594 			       dst->bitsPerPixel,
2595 			       box, n))
2596 		return true;
2597 
2598 	return sna_tiling_copy_boxes(sna, alu,
2599 				     src, src_bo, src_dx, src_dy,
2600 				     dst, dst_bo, dst_dx, dst_dy,
2601 				     box, n);
2602 }
2603 
2604 static void
gen4_render_copy_blt(struct sna * sna,const struct sna_copy_op * op,int16_t sx,int16_t sy,int16_t w,int16_t h,int16_t dx,int16_t dy)2605 gen4_render_copy_blt(struct sna *sna,
2606 		     const struct sna_copy_op *op,
2607 		     int16_t sx, int16_t sy,
2608 		     int16_t w,  int16_t h,
2609 		     int16_t dx, int16_t dy)
2610 {
2611 	gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
2612 }
2613 
2614 static void
gen4_render_copy_done(struct sna * sna,const struct sna_copy_op * op)2615 gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2616 {
2617 	if (sna->render.vertex_offset)
2618 		gen4_vertex_flush(sna);
2619 }
2620 
2621 static bool
gen4_render_copy(struct sna * sna,uint8_t alu,PixmapPtr src,struct kgem_bo * src_bo,PixmapPtr dst,struct kgem_bo * dst_bo,struct sna_copy_op * op)2622 gen4_render_copy(struct sna *sna, uint8_t alu,
2623 		 PixmapPtr src, struct kgem_bo *src_bo,
2624 		 PixmapPtr dst, struct kgem_bo *dst_bo,
2625 		 struct sna_copy_op *op)
2626 {
2627 	DBG(("%s: src=%ld, dst=%ld, alu=%d\n",
2628 	     __FUNCTION__,
2629 	     src->drawable.serialNumber,
2630 	     dst->drawable.serialNumber,
2631 	     alu));
2632 
2633 	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2634 	    sna_blt_copy(sna, alu,
2635 			 src_bo, dst_bo,
2636 			 dst->drawable.bitsPerPixel,
2637 			 op))
2638 		return true;
2639 
2640 	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
2641 	    too_large(src->drawable.width, src->drawable.height) ||
2642 	    too_large(dst->drawable.width, dst->drawable.height)) {
2643 fallback:
2644 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2645 			return false;
2646 
2647 		return sna_blt_copy(sna, alu, src_bo, dst_bo,
2648 				    dst->drawable.bitsPerPixel,
2649 				    op);
2650 	}
2651 
2652 	if (dst->drawable.depth == src->drawable.depth) {
2653 		op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2654 		op->base.src.pict_format = op->base.dst.format;
2655 	} else {
2656 		op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2657 		op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
2658 	}
2659 	if (!gen4_check_format(op->base.src.pict_format))
2660 		goto fallback;
2661 
2662 	op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
2663 
2664 	op->base.dst.pixmap = dst;
2665 	op->base.dst.width  = dst->drawable.width;
2666 	op->base.dst.height = dst->drawable.height;
2667 	op->base.dst.bo = dst_bo;
2668 
2669 	op->base.src.bo = src_bo;
2670 	op->base.src.card_format =
2671 		gen4_get_card_format(op->base.src.pict_format);
2672 	op->base.src.width  = src->drawable.width;
2673 	op->base.src.height = src->drawable.height;
2674 	op->base.src.scale[0] = 1.f/src->drawable.width;
2675 	op->base.src.scale[1] = 1.f/src->drawable.height;
2676 	op->base.src.filter = SAMPLER_FILTER_NEAREST;
2677 	op->base.src.repeat = SAMPLER_EXTEND_NONE;
2678 
2679 	op->base.is_affine = true;
2680 	op->base.floats_per_vertex = 3;
2681 	op->base.floats_per_rect = 9;
2682 	op->base.u.gen4.wm_kernel = WM_KERNEL;
2683 	op->base.u.gen4.ve_id = 2;
2684 
2685 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2686 		kgem_submit(&sna->kgem);
2687 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2688 			goto fallback;
2689 	}
2690 
2691 	if (kgem_bo_is_dirty(src_bo)) {
2692 		if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2693 		    sna_blt_copy(sna, alu,
2694 				 src_bo, dst_bo,
2695 				 dst->drawable.bitsPerPixel,
2696 				 op))
2697 			return true;
2698 	}
2699 
2700 	gen4_align_vertex(sna, &op->base);
2701 	gen4_copy_bind_surfaces(sna, &op->base);
2702 
2703 	op->blt  = gen4_render_copy_blt;
2704 	op->done = gen4_render_copy_done;
2705 	return true;
2706 }
2707 
2708 static void
gen4_render_fill_rectangle(struct sna * sna,const struct sna_composite_op * op,int x,int y,int w,int h)2709 gen4_render_fill_rectangle(struct sna *sna,
2710 			   const struct sna_composite_op *op,
2711 			   int x, int y, int w, int h)
2712 {
2713 	gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
2714 
2715 	OUT_VERTEX(x+w, y+h);
2716 	OUT_VERTEX_F(.5);
2717 
2718 	OUT_VERTEX(x, y+h);
2719 	OUT_VERTEX_F(.5);
2720 
2721 	OUT_VERTEX(x, y);
2722 	OUT_VERTEX_F(.5);
2723 }
2724 
2725 static bool
gen4_render_fill_boxes(struct sna * sna,CARD8 op,PictFormat format,const xRenderColor * color,const DrawableRec * dst,struct kgem_bo * dst_bo,const BoxRec * box,int n)2726 gen4_render_fill_boxes(struct sna *sna,
2727 		       CARD8 op,
2728 		       PictFormat format,
2729 		       const xRenderColor *color,
2730 		       const DrawableRec *dst, struct kgem_bo *dst_bo,
2731 		       const BoxRec *box, int n)
2732 {
2733 	struct sna_composite_op tmp;
2734 	uint32_t pixel;
2735 
2736 	if (op >= ARRAY_SIZE(gen4_blend_op)) {
2737 		DBG(("%s: fallback due to unhandled blend op: %d\n",
2738 		     __FUNCTION__, op));
2739 		return false;
2740 	}
2741 
2742 	if (op <= PictOpSrc) {
2743 		uint8_t alu = GXinvalid;
2744 
2745 		pixel = 0;
2746 		if (op == PictOpClear)
2747 			alu = GXclear;
2748 		else if (sna_get_pixel_from_rgba(&pixel,
2749 						 color->red,
2750 						 color->green,
2751 						 color->blue,
2752 						 color->alpha,
2753 						 format))
2754 			alu = GXcopy;
2755 
2756 		if (alu != GXinvalid &&
2757 		    sna_blt_fill_boxes(sna, alu,
2758 				       dst_bo, dst->bitsPerPixel,
2759 				       pixel, box, n))
2760 			return true;
2761 
2762 		if (!gen4_check_dst_format(format))
2763 			return false;
2764 
2765 		if (too_large(dst->width, dst->height))
2766 			return sna_tiling_fill_boxes(sna, op, format, color,
2767 						     dst, dst_bo, box, n);
2768 	}
2769 
2770 	if (op == PictOpClear) {
2771 		pixel = 0;
2772 		op = PictOpSrc;
2773 	} else if (!sna_get_pixel_from_rgba(&pixel,
2774 					    color->red,
2775 					    color->green,
2776 					    color->blue,
2777 					    color->alpha,
2778 					    PICT_a8r8g8b8))
2779 		return false;
2780 
2781 	DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
2782 
2783 	memset(&tmp, 0, sizeof(tmp));
2784 
2785 	tmp.op = op;
2786 
2787 	tmp.dst.pixmap = (PixmapPtr)dst;
2788 	tmp.dst.width  = dst->width;
2789 	tmp.dst.height = dst->height;
2790 	tmp.dst.format = format;
2791 	tmp.dst.bo = dst_bo;
2792 
2793 	sna_render_composite_redirect_init(&tmp);
2794 	if (too_large(dst->width, dst->height)) {
2795 		BoxRec extents;
2796 
2797 		boxes_extents(box, n, &extents);
2798 		if (!sna_render_composite_redirect(sna, &tmp,
2799 						   extents.x1, extents.y1,
2800 						   extents.x2 - extents.x1,
2801 						   extents.y2 - extents.y1,
2802 						   n > 1))
2803 			return sna_tiling_fill_boxes(sna, op, format, color,
2804 						     dst, dst_bo, box, n);
2805 	}
2806 
2807 	gen4_channel_init_solid(sna, &tmp.src, pixel);
2808 
2809 	tmp.is_affine = true;
2810 	tmp.floats_per_vertex = 2;
2811 	tmp.floats_per_rect = 6;
2812 	tmp.u.gen4.wm_kernel = WM_KERNEL;
2813 	tmp.u.gen4.ve_id = 1;
2814 
2815 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2816 		kgem_submit(&sna->kgem);
2817 		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2818 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2819 			return false;
2820 		}
2821 	}
2822 
2823 	gen4_align_vertex(sna, &tmp);
2824 	gen4_bind_surfaces(sna, &tmp);
2825 
2826 	do {
2827 		gen4_render_fill_rectangle(sna, &tmp,
2828 					   box->x1, box->y1,
2829 					   box->x2 - box->x1,
2830 					   box->y2 - box->y1);
2831 		box++;
2832 	} while (--n);
2833 
2834 	gen4_vertex_flush(sna);
2835 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2836 	sna_render_composite_redirect_done(sna, &tmp);
2837 	return true;
2838 }
2839 
2840 static void
gen4_render_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t w,int16_t h)2841 gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
2842 			int16_t x, int16_t y, int16_t w, int16_t h)
2843 {
2844 	gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
2845 }
2846 
2847 fastcall static void
gen4_render_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)2848 gen4_render_fill_op_box(struct sna *sna,
2849 			const struct sna_fill_op *op,
2850 			const BoxRec *box)
2851 {
2852 	gen4_render_fill_rectangle(sna, &op->base,
2853 				   box->x1, box->y1,
2854 				   box->x2-box->x1, box->y2-box->y1);
2855 }
2856 
2857 fastcall static void
gen4_render_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)2858 gen4_render_fill_op_boxes(struct sna *sna,
2859 			  const struct sna_fill_op *op,
2860 			  const BoxRec *box,
2861 			  int nbox)
2862 {
2863 	do {
2864 		gen4_render_fill_rectangle(sna, &op->base,
2865 					   box->x1, box->y1,
2866 					   box->x2-box->x1, box->y2-box->y1);
2867 		box++;
2868 	} while (--nbox);
2869 }
2870 
2871 static void
gen4_render_fill_op_done(struct sna * sna,const struct sna_fill_op * op)2872 gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
2873 {
2874 	if (sna->render.vertex_offset)
2875 		gen4_vertex_flush(sna);
2876 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2877 }
2878 
2879 static bool
gen4_render_fill(struct sna * sna,uint8_t alu,PixmapPtr dst,struct kgem_bo * dst_bo,uint32_t color,unsigned flags,struct sna_fill_op * op)2880 gen4_render_fill(struct sna *sna, uint8_t alu,
2881 		 PixmapPtr dst, struct kgem_bo *dst_bo,
2882 		 uint32_t color, unsigned flags,
2883 		 struct sna_fill_op *op)
2884 {
2885 	if (sna_blt_fill(sna, alu,
2886 			 dst_bo, dst->drawable.bitsPerPixel,
2887 			 color,
2888 			 op))
2889 		return true;
2890 
2891 	if (!(alu == GXcopy || alu == GXclear) ||
2892 	    too_large(dst->drawable.width, dst->drawable.height))
2893 		return sna_blt_fill(sna, alu,
2894 				    dst_bo, dst->drawable.bitsPerPixel,
2895 				    color,
2896 				    op);
2897 
2898 	if (alu == GXclear)
2899 		color = 0;
2900 
2901 	op->base.op = color == 0 ? PictOpClear : PictOpSrc;
2902 
2903 	op->base.dst.pixmap = dst;
2904 	op->base.dst.width  = dst->drawable.width;
2905 	op->base.dst.height = dst->drawable.height;
2906 	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2907 	op->base.dst.bo = dst_bo;
2908 	op->base.dst.x = op->base.dst.y = 0;
2909 
2910 	op->base.need_magic_ca_pass = 0;
2911 	op->base.has_component_alpha = 0;
2912 
2913 	gen4_channel_init_solid(sna, &op->base.src,
2914 				sna_rgba_for_color(color,
2915 						   dst->drawable.depth));
2916 	op->base.mask.bo = NULL;
2917 
2918 	op->base.is_affine = true;
2919 	op->base.floats_per_vertex = 2;
2920 	op->base.floats_per_rect = 6;
2921 	op->base.u.gen4.wm_kernel = WM_KERNEL;
2922 	op->base.u.gen4.ve_id = 1;
2923 
2924 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2925 		kgem_submit(&sna->kgem);
2926 		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2927 			kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2928 			return false;
2929 		}
2930 	}
2931 
2932 	gen4_align_vertex(sna, &op->base);
2933 	gen4_bind_surfaces(sna, &op->base);
2934 
2935 	op->blt   = gen4_render_fill_op_blt;
2936 	op->box   = gen4_render_fill_op_box;
2937 	op->boxes = gen4_render_fill_op_boxes;
2938 	op->points = NULL;
2939 	op->done  = gen4_render_fill_op_done;
2940 	return true;
2941 }
2942 
2943 static bool
gen4_render_fill_one_try_blt(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)2944 gen4_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2945 			     uint32_t color,
2946 			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
2947 			     uint8_t alu)
2948 {
2949 	BoxRec box;
2950 
2951 	box.x1 = x1;
2952 	box.y1 = y1;
2953 	box.x2 = x2;
2954 	box.y2 = y2;
2955 
2956 	return sna_blt_fill_boxes(sna, alu,
2957 				  bo, dst->drawable.bitsPerPixel,
2958 				  color, &box, 1);
2959 }
2960 
2961 static bool
gen4_render_fill_one(struct sna * sna,PixmapPtr dst,struct kgem_bo * bo,uint32_t color,int16_t x1,int16_t y1,int16_t x2,int16_t y2,uint8_t alu)2962 gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
2963 		     uint32_t color,
2964 		     int16_t x1, int16_t y1,
2965 		     int16_t x2, int16_t y2,
2966 		     uint8_t alu)
2967 {
2968 	struct sna_composite_op tmp;
2969 
2970 	DBG(("%s: color=%08x\n", __FUNCTION__, color));
2971 
2972 	if (gen4_render_fill_one_try_blt(sna, dst, bo, color,
2973 					 x1, y1, x2, y2, alu))
2974 		return true;
2975 
2976 	/* Must use the BLT if we can't RENDER... */
2977 	if (!(alu == GXcopy || alu == GXclear) ||
2978 	    too_large(dst->drawable.width, dst->drawable.height))
2979 		return false;
2980 
2981 	if (alu == GXclear)
2982 		color = 0;
2983 
2984 	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
2985 
2986 	tmp.dst.pixmap = dst;
2987 	tmp.dst.width  = dst->drawable.width;
2988 	tmp.dst.height = dst->drawable.height;
2989 	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2990 	tmp.dst.bo = bo;
2991 	tmp.dst.x = tmp.dst.y = 0;
2992 
2993 	gen4_channel_init_solid(sna, &tmp.src,
2994 				sna_rgba_for_color(color,
2995 						   dst->drawable.depth));
2996 	tmp.mask.bo = NULL;
2997 	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
2998 	tmp.mask.repeat = SAMPLER_EXTEND_NONE;
2999 
3000 	tmp.is_affine = true;
3001 	tmp.floats_per_vertex = 2;
3002 	tmp.floats_per_rect = 6;
3003 	tmp.has_component_alpha = false;
3004 	tmp.need_magic_ca_pass = false;
3005 
3006 	tmp.u.gen4.wm_kernel = WM_KERNEL;
3007 	tmp.u.gen4.ve_id = 1;
3008 
3009 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3010 		kgem_submit(&sna->kgem);
3011 		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3012 			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3013 			return false;
3014 		}
3015 	}
3016 
3017 	gen4_align_vertex(sna, &tmp);
3018 	gen4_bind_surfaces(sna, &tmp);
3019 
3020 	gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1);
3021 
3022 	gen4_vertex_flush(sna);
3023 	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3024 
3025 	return true;
3026 }
3027 
gen4_render_reset(struct sna * sna)3028 static void gen4_render_reset(struct sna *sna)
3029 {
3030 	sna->render_state.gen4.needs_invariant = true;
3031 	sna->render_state.gen4.needs_urb = true;
3032 	sna->render_state.gen4.ve_id = -1;
3033 	sna->render_state.gen4.last_primitive = -1;
3034 	sna->render_state.gen4.last_pipelined_pointers = -1;
3035 
3036 	sna->render_state.gen4.drawrect_offset = -1;
3037 	sna->render_state.gen4.drawrect_limit = -1;
3038 	sna->render_state.gen4.surface_table = 0;
3039 
3040 	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
3041 		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
3042 		discard_vbo(sna);
3043 	}
3044 
3045 	sna->render.vertex_offset = 0;
3046 	sna->render.nvertex_reloc = 0;
3047 	sna->render.vb_id = 0;
3048 }
3049 
gen4_render_fini(struct sna * sna)3050 static void gen4_render_fini(struct sna *sna)
3051 {
3052 	kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
3053 }
3054 
gen4_create_vs_unit_state(struct sna_static_stream * stream)3055 static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
3056 {
3057 	struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
3058 
3059 	/* Set up the vertex shader to be disabled (passthrough) */
3060 	vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
3061 	vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
3062 	vs->vs6.vs_enable = 0;
3063 	vs->vs6.vert_cache_disable = 1;
3064 
3065 	return sna_static_stream_offsetof(stream, vs);
3066 }
3067 
gen4_create_sf_state(struct sna_static_stream * stream,uint32_t kernel)3068 static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
3069 				     uint32_t kernel)
3070 {
3071 	struct gen4_sf_unit_state *sf;
3072 
3073 	sf = sna_static_stream_map(stream, sizeof(*sf), 32);
3074 
3075 	sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
3076 	sf->thread0.kernel_start_pointer = kernel >> 6;
3077 	sf->thread3.const_urb_entry_read_length = 0;	/* no const URBs */
3078 	sf->thread3.const_urb_entry_read_offset = 0;	/* no const URBs */
3079 	sf->thread3.urb_entry_read_length = 1;	/* 1 URB per vertex */
3080 	/* don't smash vertex header, read start from dw8 */
3081 	sf->thread3.urb_entry_read_offset = 1;
3082 	sf->thread3.dispatch_grf_start_reg = 3;
3083 	sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
3084 	sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
3085 	sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
3086 	sf->sf5.viewport_transform = false;	/* skip viewport */
3087 	sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
3088 	sf->sf6.scissor = 0;
3089 	sf->sf7.trifan_pv = 2;
3090 	sf->sf6.dest_org_vbias = 0x8;
3091 	sf->sf6.dest_org_hbias = 0x8;
3092 
3093 	return sna_static_stream_offsetof(stream, sf);
3094 }
3095 
gen4_create_sampler_state(struct sna_static_stream * stream,sampler_filter_t src_filter,sampler_extend_t src_extend,sampler_filter_t mask_filter,sampler_extend_t mask_extend)3096 static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
3097 					  sampler_filter_t src_filter,
3098 					  sampler_extend_t src_extend,
3099 					  sampler_filter_t mask_filter,
3100 					  sampler_extend_t mask_extend)
3101 {
3102 	struct gen4_sampler_state *sampler_state;
3103 
3104 	sampler_state = sna_static_stream_map(stream,
3105 					      sizeof(struct gen4_sampler_state) * 2,
3106 					      32);
3107 	sampler_state_init(&sampler_state[0], src_filter, src_extend);
3108 	sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
3109 
3110 	return sna_static_stream_offsetof(stream, sampler_state);
3111 }
3112 
gen4_init_wm_state(struct gen4_wm_unit_state * wm,int gen,bool has_mask,uint32_t kernel,uint32_t sampler)3113 static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
3114 			       int gen,
3115 			       bool has_mask,
3116 			       uint32_t kernel,
3117 			       uint32_t sampler)
3118 {
3119 	assert((kernel & 63) == 0);
3120 	wm->thread0.kernel_start_pointer = kernel >> 6;
3121 	wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
3122 
3123 	wm->thread1.single_program_flow = 0;
3124 
3125 	wm->thread3.const_urb_entry_read_length = 0;
3126 	wm->thread3.const_urb_entry_read_offset = 0;
3127 
3128 	wm->thread3.urb_entry_read_offset = 0;
3129 	wm->thread3.dispatch_grf_start_reg = 3;
3130 
3131 	assert((sampler & 31) == 0);
3132 	wm->wm4.sampler_state_pointer = sampler >> 5;
3133 	wm->wm4.sampler_count = 1;
3134 
3135 	wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
3136 	wm->wm5.transposed_urb_read = 0;
3137 	wm->wm5.thread_dispatch_enable = 1;
3138 	/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
3139 	 * start point
3140 	 */
3141 	wm->wm5.enable_16_pix = 1;
3142 	wm->wm5.enable_8_pix = 0;
3143 	wm->wm5.early_depth_test = 1;
3144 
3145 	/* Each pair of attributes (src/mask coords) is two URB entries */
3146 	if (has_mask) {
3147 		wm->thread1.binding_table_entry_count = 3;
3148 		wm->thread3.urb_entry_read_length = 4;
3149 	} else {
3150 		wm->thread1.binding_table_entry_count = 2;
3151 		wm->thread3.urb_entry_read_length = 2;
3152 	}
3153 }
3154 
gen4_create_cc_unit_state(struct sna_static_stream * stream)3155 static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
3156 {
3157 	uint8_t *ptr, *base;
3158 	int i, j;
3159 
3160 	base = ptr =
3161 		sna_static_stream_map(stream,
3162 				      GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
3163 				      64);
3164 
3165 	for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
3166 		for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
3167 			struct gen4_cc_unit_state *state =
3168 				(struct gen4_cc_unit_state *)ptr;
3169 
3170 			state->cc3.blend_enable =
3171 				!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
3172 
3173 			state->cc5.logicop_func = 0xc;	/* COPY */
3174 			state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
3175 
3176 			/* Fill in alpha blend factors same as color, for the future. */
3177 			state->cc5.ia_src_blend_factor = i;
3178 			state->cc5.ia_dest_blend_factor = j;
3179 
3180 			state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
3181 			state->cc6.clamp_post_alpha_blend = 1;
3182 			state->cc6.clamp_pre_alpha_blend = 1;
3183 			state->cc6.src_blend_factor = i;
3184 			state->cc6.dest_blend_factor = j;
3185 
3186 			ptr += 64;
3187 		}
3188 	}
3189 
3190 	return sna_static_stream_offsetof(stream, base);
3191 }
3192 
gen4_render_setup(struct sna * sna)3193 static bool gen4_render_setup(struct sna *sna)
3194 {
3195 	struct gen4_render_state *state = &sna->render_state.gen4;
3196 	struct sna_static_stream general;
3197 	struct gen4_wm_unit_state_padded *wm_state;
3198 	uint32_t sf, wm[KERNEL_COUNT];
3199 	int i, j, k, l, m;
3200 
3201 	sna_static_stream_init(&general);
3202 
3203 	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3204 	 * dumps, you know it points to zero.
3205 	 */
3206 	null_create(&general);
3207 
3208 	sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
3209 	for (m = 0; m < KERNEL_COUNT; m++) {
3210 		if (wm_kernels[m].size) {
3211 			wm[m] = sna_static_stream_add(&general,
3212 						      wm_kernels[m].data,
3213 						      wm_kernels[m].size,
3214 						      64);
3215 		} else {
3216 			wm[m] = sna_static_stream_compile_wm(sna, &general,
3217 							     wm_kernels[m].data,
3218 							     16);
3219 		}
3220 	}
3221 
3222 	state->vs = gen4_create_vs_unit_state(&general);
3223 	state->sf = gen4_create_sf_state(&general, sf);
3224 
3225 	wm_state = sna_static_stream_map(&general,
3226 					  sizeof(*wm_state) * KERNEL_COUNT *
3227 					  FILTER_COUNT * EXTEND_COUNT *
3228 					  FILTER_COUNT * EXTEND_COUNT,
3229 					  64);
3230 	state->wm = sna_static_stream_offsetof(&general, wm_state);
3231 	for (i = 0; i < FILTER_COUNT; i++) {
3232 		for (j = 0; j < EXTEND_COUNT; j++) {
3233 			for (k = 0; k < FILTER_COUNT; k++) {
3234 				for (l = 0; l < EXTEND_COUNT; l++) {
3235 					uint32_t sampler_state;
3236 
3237 					sampler_state =
3238 						gen4_create_sampler_state(&general,
3239 									  i, j,
3240 									  k, l);
3241 
3242 					for (m = 0; m < KERNEL_COUNT; m++) {
3243 						gen4_init_wm_state(&wm_state->state,
3244 								   sna->kgem.gen,
3245 								   wm_kernels[m].has_mask,
3246 								   wm[m], sampler_state);
3247 						wm_state++;
3248 					}
3249 				}
3250 			}
3251 		}
3252 	}
3253 
3254 	state->cc = gen4_create_cc_unit_state(&general);
3255 
3256 	state->general_bo = sna_static_stream_fini(sna, &general);
3257 	return state->general_bo != NULL;
3258 }
3259 
gen4_render_init(struct sna * sna,const char * backend)3260 const char *gen4_render_init(struct sna *sna, const char *backend)
3261 {
3262 	if (!gen4_render_setup(sna))
3263 		return backend;
3264 
3265 	sna->kgem.retire = gen4_render_retire;
3266 	sna->kgem.expire = gen4_render_expire;
3267 
3268 #if !NO_COMPOSITE
3269 	sna->render.composite = gen4_render_composite;
3270 	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
3271 #endif
3272 #if !NO_COMPOSITE_SPANS
3273 	sna->render.check_composite_spans = gen4_check_composite_spans;
3274 	sna->render.composite_spans = gen4_render_composite_spans;
3275 	if (0)
3276 		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
3277 #endif
3278 
3279 #if !NO_VIDEO
3280 	sna->render.video = gen4_render_video;
3281 #endif
3282 
3283 #if !NO_COPY_BOXES
3284 	sna->render.copy_boxes = gen4_render_copy_boxes;
3285 #endif
3286 #if !NO_COPY
3287 	sna->render.copy = gen4_render_copy;
3288 #endif
3289 
3290 #if !NO_FILL_BOXES
3291 	sna->render.fill_boxes = gen4_render_fill_boxes;
3292 #endif
3293 #if !NO_FILL
3294 	sna->render.fill = gen4_render_fill;
3295 #endif
3296 #if !NO_FILL_ONE
3297 	sna->render.fill_one = gen4_render_fill_one;
3298 #endif
3299 
3300 	sna->render.flush = gen4_render_flush;
3301 	sna->render.reset = gen4_render_reset;
3302 	sna->render.fini = gen4_render_fini;
3303 
3304 	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
3305 	sna->render.max_3d_pitch = 1 << 18;
3306 	return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
3307 }
3308