1 /*
2  * Based on code from intel_uxa.c and i830_xaa.c
3  * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4  * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
5  * Copyright (c) 2009-2011 Intel Corporation
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  * Authors:
27  *    Chris Wilson <chris@chris-wilson.co.uk>
28  *
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include "sna.h"
36 #include "sna_render.h"
37 #include "sna_render_inline.h"
38 #include "sna_reg.h"
39 #include "rop.h"
40 
41 #define NO_BLT_COMPOSITE 0
42 #define NO_BLT_COPY 0
43 #define NO_BLT_COPY_BOXES 0
44 #define NO_BLT_FILL 0
45 #define NO_BLT_FILL_BOXES 0
46 
47 #ifndef PICT_TYPE_BGRA
48 #define PICT_TYPE_BGRA 8
49 #endif
50 
51 static const uint8_t copy_ROP[] = {
52 	ROP_0,                  /* GXclear */
53 	ROP_DSa,                /* GXand */
54 	ROP_SDna,               /* GXandReverse */
55 	ROP_S,                  /* GXcopy */
56 	ROP_DSna,               /* GXandInverted */
57 	ROP_D,                  /* GXnoop */
58 	ROP_DSx,                /* GXxor */
59 	ROP_DSo,                /* GXor */
60 	ROP_DSon,               /* GXnor */
61 	ROP_DSxn,               /* GXequiv */
62 	ROP_Dn,                 /* GXinvert */
63 	ROP_SDno,               /* GXorReverse */
64 	ROP_Sn,                 /* GXcopyInverted */
65 	ROP_DSno,               /* GXorInverted */
66 	ROP_DSan,               /* GXnand */
67 	ROP_1                   /* GXset */
68 };
69 
70 static const uint8_t fill_ROP[] = {
71 	ROP_0,
72 	ROP_DPa,
73 	ROP_PDna,
74 	ROP_P,
75 	ROP_DPna,
76 	ROP_D,
77 	ROP_DPx,
78 	ROP_DPo,
79 	ROP_DPon,
80 	ROP_PDxn,
81 	ROP_Dn,
82 	ROP_PDno,
83 	ROP_Pn,
84 	ROP_DPno,
85 	ROP_DPan,
86 	ROP_1
87 };
88 
sig_done(struct sna * sna,const struct sna_composite_op * op)89 static void sig_done(struct sna *sna, const struct sna_composite_op *op)
90 {
91 	sigtrap_put();
92 }
93 
nop_done(struct sna * sna,const struct sna_composite_op * op)94 static void nop_done(struct sna *sna, const struct sna_composite_op *op)
95 {
96 	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
97 	if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
98 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
99 		_kgem_submit(&sna->kgem);
100 	}
101 	(void)op;
102 }
103 
gen6_blt_copy_done(struct sna * sna,const struct sna_composite_op * op)104 static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
105 {
106 	struct kgem *kgem = &sna->kgem;
107 
108 	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
109 	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
110 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
111 		_kgem_submit(kgem);
112 		return;
113 	}
114 
115 	if (kgem_check_batch(kgem, 3)) {
116 		uint32_t *b = kgem->batch + kgem->nbatch;
117 		assert(sna->kgem.mode == KGEM_BLT);
118 		b[0] = XY_SETUP_CLIP;
119 		b[1] = b[2] = 0;
120 		kgem->nbatch += 3;
121 		assert(kgem->nbatch < kgem->surface);
122 	}
123 	assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
124 	(void)op;
125 }
126 
sna_blt_fill_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * bo,int bpp,uint8_t alu,uint32_t pixel)127 static bool sna_blt_fill_init(struct sna *sna,
128 			      struct sna_blt_state *blt,
129 			      struct kgem_bo *bo,
130 			      int bpp,
131 			      uint8_t alu,
132 			      uint32_t pixel)
133 {
134 	struct kgem *kgem = &sna->kgem;
135 
136 	assert(kgem_bo_can_blt (kgem, bo));
137 	blt->bo[0] = bo;
138 
139 	blt->br13 = bo->pitch;
140 	blt->cmd = XY_SCANLINE_BLT;
141 	if (kgem->gen >= 040 && bo->tiling) {
142 		blt->cmd |= BLT_DST_TILED;
143 		blt->br13 >>= 2;
144 	}
145 	assert(blt->br13 <= MAXSHORT);
146 
147 	if (alu == GXclear)
148 		pixel = 0;
149 	else if (alu == GXcopy) {
150 		if (pixel == 0)
151 			alu = GXclear;
152 		else if (pixel == -1)
153 			alu = GXset;
154 	}
155 
156 	blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
157 	blt->br13 |= sna_br13_color_depth(bpp);
158 
159 	blt->pixel = pixel;
160 	blt->bpp = bpp;
161 	blt->alu = alu;
162 
163 	kgem_set_mode(kgem, KGEM_BLT, bo);
164 	if (!kgem_check_batch(kgem, 14) ||
165 	    !kgem_check_bo_fenced(kgem, bo)) {
166 		kgem_submit(kgem);
167 		if (!kgem_check_bo_fenced(kgem, bo))
168 			return false;
169 		_kgem_set_mode(kgem, KGEM_BLT);
170 	}
171 
172 	if (sna->blt_state.fill_bo != bo->unique_id ||
173 	    sna->blt_state.fill_pixel != pixel ||
174 	    sna->blt_state.fill_alu != alu)
175 	{
176 		uint32_t *b;
177 
178 		if (!kgem_check_batch(kgem, 24) ||
179 		    !kgem_check_reloc(kgem, 1)) {
180 			_kgem_submit(kgem);
181 			if (!kgem_check_bo_fenced(kgem, bo))
182 				return false;
183 			_kgem_set_mode(kgem, KGEM_BLT);
184 		}
185 		kgem_bcs_set_tiling(kgem, NULL, bo);
186 
187 		assert(sna->kgem.mode == KGEM_BLT);
188 		b = kgem->batch + kgem->nbatch;
189 		if (sna->kgem.gen >= 0100) {
190 			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
191 			if (bpp == 32)
192 				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
193 			if (bo->tiling)
194 				b[0] |= BLT_DST_TILED;
195 			b[1] = blt->br13;
196 			b[2] = 0;
197 			b[3] = 0;
198 			*(uint64_t *)(b+4) =
199 				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
200 						 I915_GEM_DOMAIN_RENDER << 16 |
201 						 I915_GEM_DOMAIN_RENDER |
202 						 KGEM_RELOC_FENCED,
203 						 0);
204 			b[6] = pixel;
205 			b[7] = pixel;
206 			b[8] = 0;
207 			b[9] = 0;
208 			kgem->nbatch += 10;
209 		} else {
210 			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
211 			if (bpp == 32)
212 				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
213 			if (bo->tiling && kgem->gen >= 040)
214 				b[0] |= BLT_DST_TILED;
215 			b[1] = blt->br13;
216 			b[2] = 0;
217 			b[3] = 0;
218 			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
219 					      I915_GEM_DOMAIN_RENDER << 16 |
220 					      I915_GEM_DOMAIN_RENDER |
221 					      KGEM_RELOC_FENCED,
222 					      0);
223 			b[5] = pixel;
224 			b[6] = pixel;
225 			b[7] = 0;
226 			b[8] = 0;
227 			kgem->nbatch += 9;
228 		}
229 		assert(kgem->nbatch < kgem->surface);
230 
231 		sna->blt_state.fill_bo = bo->unique_id;
232 		sna->blt_state.fill_pixel = pixel;
233 		sna->blt_state.fill_alu = alu;
234 	}
235 
236 	assert(sna->kgem.mode == KGEM_BLT);
237 	return true;
238 }
239 
__sna_blt_fill_begin(struct sna * sna,const struct sna_blt_state * blt)240 noinline static void __sna_blt_fill_begin(struct sna *sna,
241 					  const struct sna_blt_state *blt)
242 {
243 	struct kgem *kgem = &sna->kgem;
244 	uint32_t *b;
245 
246 	kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]);
247 
248 	assert(kgem->mode == KGEM_BLT);
249 	b = kgem->batch + kgem->nbatch;
250 	if (sna->kgem.gen >= 0100) {
251 		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
252 		if (blt->bpp == 32)
253 			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
254 		if (blt->bo[0]->tiling)
255 			b[0] |= BLT_DST_TILED;
256 		b[1] = blt->br13;
257 		b[2] = 0;
258 		b[3] = 0;
259 		*(uint64_t *)(b+4) =
260 			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
261 					 I915_GEM_DOMAIN_RENDER << 16 |
262 					 I915_GEM_DOMAIN_RENDER |
263 					 KGEM_RELOC_FENCED,
264 					 0);
265 		b[6] = blt->pixel;
266 		b[7] = blt->pixel;
267 		b[8] = 0;
268 		b[9] = 0;
269 		kgem->nbatch += 10;
270 	} else {
271 		b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
272 		if (blt->bpp == 32)
273 			b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
274 		if (blt->bo[0]->tiling && kgem->gen >= 040)
275 			b[0] |= BLT_DST_TILED;
276 		b[1] = blt->br13;
277 		b[2] = 0;
278 		b[3] = 0;
279 		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
280 				      I915_GEM_DOMAIN_RENDER << 16 |
281 				      I915_GEM_DOMAIN_RENDER |
282 				      KGEM_RELOC_FENCED,
283 				      0);
284 		b[5] = blt->pixel;
285 		b[6] = blt->pixel;
286 		b[7] = 0;
287 		b[8] = 0;
288 		kgem->nbatch += 9;
289 	}
290 }
291 
sna_blt_fill_begin(struct sna * sna,const struct sna_blt_state * blt)292 inline static void sna_blt_fill_begin(struct sna *sna,
293 				      const struct sna_blt_state *blt)
294 {
295 	struct kgem *kgem = &sna->kgem;
296 
297 	if (kgem->nreloc) {
298 		_kgem_submit(kgem);
299 		_kgem_set_mode(kgem, KGEM_BLT);
300 		kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]);
301 		assert(kgem->nbatch == 0);
302 	}
303 
304 	__sna_blt_fill_begin(sna, blt);
305 }
306 
sna_blt_fill_one(struct sna * sna,const struct sna_blt_state * blt,int16_t x,int16_t y,int16_t width,int16_t height)307 inline static void sna_blt_fill_one(struct sna *sna,
308 				    const struct sna_blt_state *blt,
309 				    int16_t x, int16_t y,
310 				    int16_t width, int16_t height)
311 {
312 	struct kgem *kgem = &sna->kgem;
313 	uint32_t *b;
314 
315 	DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
316 	     __FUNCTION__, x, y, width, height, blt->pixel));
317 
318 	assert(x >= 0);
319 	assert(y >= 0);
320 	assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
321 
322 	if (!kgem_check_batch(kgem, 3))
323 		sna_blt_fill_begin(sna, blt);
324 
325 	assert(sna->kgem.mode == KGEM_BLT);
326 	b = kgem->batch + kgem->nbatch;
327 	kgem->nbatch += 3;
328 	assert(kgem->nbatch < kgem->surface);
329 
330 	b[0] = blt->cmd;
331 	b[1] = y << 16 | x;
332 	b[2] = b[1] + (height << 16 | width);
333 }
334 
sna_blt_copy_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * src,struct kgem_bo * dst,int bpp,uint8_t alu)335 static bool sna_blt_copy_init(struct sna *sna,
336 			      struct sna_blt_state *blt,
337 			      struct kgem_bo *src,
338 			      struct kgem_bo *dst,
339 			      int bpp,
340 			      uint8_t alu)
341 {
342 	struct kgem *kgem = &sna->kgem;
343 
344 	assert(kgem_bo_can_blt(kgem, src));
345 	assert(kgem_bo_can_blt(kgem, dst));
346 
347 	blt->bo[0] = src;
348 	blt->bo[1] = dst;
349 
350 	blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
351 	if (bpp == 32)
352 		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
353 
354 	blt->pitch[0] = src->pitch;
355 	if (kgem->gen >= 040 && src->tiling) {
356 		blt->cmd |= BLT_SRC_TILED;
357 		blt->pitch[0] >>= 2;
358 	}
359 	assert(blt->pitch[0] <= MAXSHORT);
360 
361 	blt->pitch[1] = dst->pitch;
362 	if (kgem->gen >= 040 && dst->tiling) {
363 		blt->cmd |= BLT_DST_TILED;
364 		blt->pitch[1] >>= 2;
365 	}
366 	assert(blt->pitch[1] <= MAXSHORT);
367 
368 	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
369 	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
370 	blt->br13 |= sna_br13_color_depth(bpp);
371 
372 	kgem_set_mode(kgem, KGEM_BLT, dst);
373 	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
374 		kgem_submit(kgem);
375 		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
376 			return false;
377 		_kgem_set_mode(kgem, KGEM_BLT);
378 	}
379 	kgem_bcs_set_tiling(&sna->kgem, src, dst);
380 
381 	sna->blt_state.fill_bo = 0;
382 	return true;
383 }
384 
sna_blt_alpha_fixup_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * src,struct kgem_bo * dst,int bpp,uint32_t alpha)385 static bool sna_blt_alpha_fixup_init(struct sna *sna,
386 				     struct sna_blt_state *blt,
387 				     struct kgem_bo *src,
388 				     struct kgem_bo *dst,
389 				     int bpp, uint32_t alpha)
390 {
391 	struct kgem *kgem = &sna->kgem;
392 
393 	DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
394 	     __FUNCTION__, dst->handle, src->handle, bpp, alpha));
395 	assert(kgem_bo_can_blt(kgem, src));
396 	assert(kgem_bo_can_blt(kgem, dst));
397 
398 	blt->bo[0] = src;
399 	blt->bo[1] = dst;
400 
401 	blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
402 	blt->pitch[0] = src->pitch;
403 	if (kgem->gen >= 040 && src->tiling) {
404 		blt->cmd |= BLT_SRC_TILED;
405 		blt->pitch[0] >>= 2;
406 	}
407 	assert(blt->pitch[0] <= MAXSHORT);
408 
409 	blt->pitch[1] = dst->pitch;
410 	if (kgem->gen >= 040 && dst->tiling) {
411 		blt->cmd |= BLT_DST_TILED;
412 		blt->pitch[1] >>= 2;
413 	}
414 	assert(blt->pitch[1] <= MAXSHORT);
415 
416 	blt->overwrites = 1;
417 	blt->br13 = (0xfc << 16) | blt->pitch[1];
418 	blt->br13 |= sna_br13_color_depth(bpp);
419 	if (bpp == 32)
420 		blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
421 
422 	blt->pixel = alpha;
423 
424 	kgem_set_mode(kgem, KGEM_BLT, dst);
425 	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
426 		kgem_submit(kgem);
427 		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
428 			return false;
429 		_kgem_set_mode(kgem, KGEM_BLT);
430 	}
431 	kgem_bcs_set_tiling(&sna->kgem, src, dst);
432 
433 	sna->blt_state.fill_bo = 0;
434 	return true;
435 }
436 
sna_blt_alpha_fixup_one(struct sna * sna,const struct sna_blt_state * blt,int src_x,int src_y,int width,int height,int dst_x,int dst_y)437 static void sna_blt_alpha_fixup_one(struct sna *sna,
438 				    const struct sna_blt_state *blt,
439 				    int src_x, int src_y,
440 				    int width, int height,
441 				    int dst_x, int dst_y)
442 {
443 	struct kgem *kgem = &sna->kgem;
444 	uint32_t *b;
445 
446 	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
447 	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
448 
449 	assert(src_x >= 0);
450 	assert(src_y >= 0);
451 	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
452 	assert(dst_x >= 0);
453 	assert(dst_y >= 0);
454 	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
455 	assert(width > 0);
456 	assert(height > 0);
457 
458 	if (!kgem_check_batch(kgem, 14) ||
459 	    !kgem_check_reloc(kgem, 2)) {
460 		_kgem_submit(kgem);
461 		_kgem_set_mode(kgem, KGEM_BLT);
462 		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
463 	}
464 
465 	assert(sna->kgem.mode == KGEM_BLT);
466 	b = kgem->batch + kgem->nbatch;
467 	b[0] = blt->cmd;
468 	b[1] = blt->br13;
469 	b[2] = (dst_y << 16) | dst_x;
470 	b[3] = ((dst_y + height) << 16) | (dst_x + width);
471 	if (sna->kgem.gen >= 0100) {
472 		*(uint64_t *)(b+4) =
473 			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
474 					 I915_GEM_DOMAIN_RENDER << 16 |
475 					 I915_GEM_DOMAIN_RENDER |
476 					 KGEM_RELOC_FENCED,
477 					 0);
478 		b[6] = blt->pitch[0];
479 		b[7] = (src_y << 16) | src_x;
480 		*(uint64_t *)(b+8) =
481 			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
482 					 I915_GEM_DOMAIN_RENDER << 16 |
483 					 KGEM_RELOC_FENCED,
484 					 0);
485 		b[10] = blt->pixel;
486 		b[11] = blt->pixel;
487 		b[12] = 0;
488 		b[13] = 0;
489 		kgem->nbatch += 14;
490 	} else {
491 		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
492 				      I915_GEM_DOMAIN_RENDER << 16 |
493 				      I915_GEM_DOMAIN_RENDER |
494 				      KGEM_RELOC_FENCED,
495 				      0);
496 		b[5] = blt->pitch[0];
497 		b[6] = (src_y << 16) | src_x;
498 		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
499 				      I915_GEM_DOMAIN_RENDER << 16 |
500 				      KGEM_RELOC_FENCED,
501 				      0);
502 		b[8] = blt->pixel;
503 		b[9] = blt->pixel;
504 		b[10] = 0;
505 		b[11] = 0;
506 		kgem->nbatch += 12;
507 	}
508 	assert(kgem->nbatch < kgem->surface);
509 }
510 
sna_blt_copy_one(struct sna * sna,const struct sna_blt_state * blt,int src_x,int src_y,int width,int height,int dst_x,int dst_y)511 static void sna_blt_copy_one(struct sna *sna,
512 			     const struct sna_blt_state *blt,
513 			     int src_x, int src_y,
514 			     int width, int height,
515 			     int dst_x, int dst_y)
516 {
517 	struct kgem *kgem = &sna->kgem;
518 	uint32_t *b;
519 
520 	DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
521 	     __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
522 
523 	assert(src_x >= 0);
524 	assert(src_y >= 0);
525 	assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
526 	assert(dst_x >= 0);
527 	assert(dst_y >= 0);
528 	assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
529 	assert(width > 0);
530 	assert(height > 0);
531 
532 	/* Compare against a previous fill */
533 	if (blt->overwrites &&
534 	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
535 		if (sna->kgem.gen >= 0100) {
536 			if (kgem->nbatch >= 7 &&
537 			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
538 			    kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
539 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
540 				DBG(("%s: replacing last fill\n", __FUNCTION__));
541 				if (kgem_check_batch(kgem, 3)) {
542 					assert(kgem->mode == KGEM_BLT);
543 					b = kgem->batch + kgem->nbatch - 7;
544 					b[0] = blt->cmd;
545 					b[1] = blt->br13;
546 					b[6] = (src_y << 16) | src_x;
547 					b[7] = blt->pitch[0];
548 					*(uint64_t *)(b+8) =
549 						kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
550 								 I915_GEM_DOMAIN_RENDER << 16 |
551 								 KGEM_RELOC_FENCED,
552 								 0);
553 					kgem->nbatch += 3;
554 					assert(kgem->nbatch < kgem->surface);
555 					return;
556 				}
557 				kgem->nbatch -= 7;
558 				kgem->nreloc--;
559 			}
560 		} else {
561 			if (kgem->nbatch >= 6 &&
562 			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
563 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
564 			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
565 				DBG(("%s: replacing last fill\n", __FUNCTION__));
566 				if (kgem_check_batch(kgem, 8-6)) {
567 					assert(kgem->mode == KGEM_BLT);
568 					b = kgem->batch + kgem->nbatch - 6;
569 					b[0] = blt->cmd;
570 					b[1] = blt->br13;
571 					b[5] = (src_y << 16) | src_x;
572 					b[6] = blt->pitch[0];
573 					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
574 							      I915_GEM_DOMAIN_RENDER << 16 |
575 							      KGEM_RELOC_FENCED,
576 							      0);
577 					kgem->nbatch += 8 - 6;
578 					assert(kgem->nbatch < kgem->surface);
579 					return;
580 				}
581 				kgem->nbatch -= 6;
582 				kgem->nreloc--;
583 			}
584 		}
585 	}
586 
587 	if (!kgem_check_batch(kgem, 10) ||
588 	    !kgem_check_reloc(kgem, 2)) {
589 		_kgem_submit(kgem);
590 		_kgem_set_mode(kgem, KGEM_BLT);
591 		kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
592 	}
593 
594 	assert(sna->kgem.mode == KGEM_BLT);
595 	b = kgem->batch + kgem->nbatch;
596 	b[0] = blt->cmd;
597 	b[1] = blt->br13;
598 	b[2] = (dst_y << 16) | dst_x;
599 	b[3] = ((dst_y + height) << 16) | (dst_x + width);
600 	if (kgem->gen >= 0100) {
601 		*(uint64_t *)(b+4) =
602 			kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
603 					 I915_GEM_DOMAIN_RENDER << 16 |
604 					 I915_GEM_DOMAIN_RENDER |
605 					 KGEM_RELOC_FENCED,
606 					 0);
607 		b[6] = (src_y << 16) | src_x;
608 		b[7] = blt->pitch[0];
609 		*(uint64_t *)(b+8) =
610 			kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
611 					 I915_GEM_DOMAIN_RENDER << 16 |
612 					 KGEM_RELOC_FENCED,
613 					 0);
614 		kgem->nbatch += 10;
615 	} else {
616 		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
617 				      I915_GEM_DOMAIN_RENDER << 16 |
618 				      I915_GEM_DOMAIN_RENDER |
619 				      KGEM_RELOC_FENCED,
620 				      0);
621 		b[5] = (src_y << 16) | src_x;
622 		b[6] = blt->pitch[0];
623 		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
624 				      I915_GEM_DOMAIN_RENDER << 16 |
625 				      KGEM_RELOC_FENCED,
626 				      0);
627 		kgem->nbatch += 8;
628 	}
629 	assert(kgem->nbatch < kgem->surface);
630 }
631 
632 bool
sna_get_rgba_from_pixel(uint32_t pixel,uint16_t * red,uint16_t * green,uint16_t * blue,uint16_t * alpha,uint32_t format)633 sna_get_rgba_from_pixel(uint32_t pixel,
634 			uint16_t *red,
635 			uint16_t *green,
636 			uint16_t *blue,
637 			uint16_t *alpha,
638 			uint32_t format)
639 {
640 	int rbits, bbits, gbits, abits;
641 	int rshift, bshift, gshift, ashift;
642 
643 	rbits = PICT_FORMAT_R(format);
644 	gbits = PICT_FORMAT_G(format);
645 	bbits = PICT_FORMAT_B(format);
646 	abits = PICT_FORMAT_A(format);
647 
648 	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
649 		rshift = gshift = bshift = ashift = 0;
650 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
651 		bshift = 0;
652 		gshift = bbits;
653 		rshift = gshift + gbits;
654 		ashift = rshift + rbits;
655 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
656 		rshift = 0;
657 		gshift = rbits;
658 		bshift = gshift + gbits;
659 		ashift = bshift + bbits;
660 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
661 		ashift = 0;
662 		rshift = abits;
663 		if (abits == 0)
664 			rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
665 		gshift = rshift + rbits;
666 		bshift = gshift + gbits;
667 	} else {
668 		return false;
669 	}
670 
671 	if (rbits) {
672 		*red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
673 		while (rbits < 16) {
674 			*red |= *red >> rbits;
675 			rbits <<= 1;
676 		}
677 	} else
678 		*red = 0;
679 
680 	if (gbits) {
681 		*green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
682 		while (gbits < 16) {
683 			*green |= *green >> gbits;
684 			gbits <<= 1;
685 		}
686 	} else
687 		*green = 0;
688 
689 	if (bbits) {
690 		*blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
691 		while (bbits < 16) {
692 			*blue |= *blue >> bbits;
693 			bbits <<= 1;
694 		}
695 	} else
696 		*blue = 0;
697 
698 	if (abits) {
699 		*alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
700 		while (abits < 16) {
701 			*alpha |= *alpha >> abits;
702 			abits <<= 1;
703 		}
704 	} else
705 		*alpha = 0xffff;
706 
707 	return true;
708 }
709 
710 bool
_sna_get_pixel_from_rgba(uint32_t * pixel,uint16_t red,uint16_t green,uint16_t blue,uint16_t alpha,uint32_t format)711 _sna_get_pixel_from_rgba(uint32_t * pixel,
712 			uint16_t red,
713 			uint16_t green,
714 			uint16_t blue,
715 			uint16_t alpha,
716 			uint32_t format)
717 {
718 	int rbits, bbits, gbits, abits;
719 	int rshift, bshift, gshift, ashift;
720 
721 	rbits = PICT_FORMAT_R(format);
722 	gbits = PICT_FORMAT_G(format);
723 	bbits = PICT_FORMAT_B(format);
724 	abits = PICT_FORMAT_A(format);
725 	if (abits == 0)
726 	    abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
727 
728 	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
729 		*pixel = alpha >> (16 - abits);
730 		return true;
731 	}
732 
733 	if (!PICT_FORMAT_COLOR(format))
734 		return false;
735 
736 	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
737 		bshift = 0;
738 		gshift = bbits;
739 		rshift = gshift + gbits;
740 		ashift = rshift + rbits;
741 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
742 		rshift = 0;
743 		gshift = rbits;
744 		bshift = gshift + gbits;
745 		ashift = bshift + bbits;
746 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
747 		ashift = 0;
748 		rshift = abits;
749 		gshift = rshift + rbits;
750 		bshift = gshift + gbits;
751 	} else
752 		return false;
753 
754 	*pixel = 0;
755 	*pixel |= (blue  >> (16 - bbits)) << bshift;
756 	*pixel |= (green >> (16 - gbits)) << gshift;
757 	*pixel |= (red   >> (16 - rbits)) << rshift;
758 	*pixel |= (alpha >> (16 - abits)) << ashift;
759 
760 	return true;
761 }
762 
763 uint32_t
sna_rgba_for_color(uint32_t color,int depth)764 sna_rgba_for_color(uint32_t color, int depth)
765 {
766 	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
767 }
768 
769 uint32_t
sna_rgba_to_color(uint32_t rgba,uint32_t format)770 sna_rgba_to_color(uint32_t rgba, uint32_t format)
771 {
772 	return color_convert(rgba, PICT_a8r8g8b8, format);
773 }
774 
775 static uint32_t
get_pixel(PicturePtr picture)776 get_pixel(PicturePtr picture)
777 {
778 	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
779 
780 	DBG(("%s: %p\n", __FUNCTION__, pixmap));
781 
782 	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
783 		return 0;
784 
785 	switch (pixmap->drawable.bitsPerPixel) {
786 	case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
787 	case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
788 	default: return *(uint8_t *)pixmap->devPrivate.ptr;
789 	}
790 }
791 
792 static uint32_t
get_solid_color(PicturePtr picture,uint32_t format)793 get_solid_color(PicturePtr picture, uint32_t format)
794 {
795 	if (picture->pSourcePict) {
796 		PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
797 		return color_convert(fill->color, PICT_a8r8g8b8, format);
798 	} else
799 		return color_convert(get_pixel(picture), picture->format, format);
800 }
801 
802 static bool
is_solid(PicturePtr picture)803 is_solid(PicturePtr picture)
804 {
805 	if (picture->pSourcePict) {
806 		if (picture->pSourcePict->type == SourcePictTypeSolidFill)
807 			return true;
808 	}
809 
810 	if (picture->pDrawable) {
811 		if (picture->pDrawable->width  == 1 &&
812 		    picture->pDrawable->height == 1 &&
813 		    picture->repeat)
814 			return true;
815 	}
816 
817 	return false;
818 }
819 
820 bool
sna_picture_is_solid(PicturePtr picture,uint32_t * color)821 sna_picture_is_solid(PicturePtr picture, uint32_t *color)
822 {
823 	if (!is_solid(picture))
824 		return false;
825 
826 	if (color)
827 		*color = get_solid_color(picture, PICT_a8r8g8b8);
828 	return true;
829 }
830 
831 static bool
pixel_is_transparent(uint32_t pixel,uint32_t format)832 pixel_is_transparent(uint32_t pixel, uint32_t format)
833 {
834 	unsigned int abits;
835 
836 	abits = PICT_FORMAT_A(format);
837 	if (!abits)
838 		return false;
839 
840 	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
841 	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
842 		return (pixel & ((1 << abits) - 1)) == 0;
843 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
844 		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
845 		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
846 		return (pixel >> ashift) == 0;
847 	} else
848 		return false;
849 }
850 
851 static bool
pixel_is_opaque(uint32_t pixel,uint32_t format)852 pixel_is_opaque(uint32_t pixel, uint32_t format)
853 {
854 	unsigned int abits;
855 
856 	abits = PICT_FORMAT_A(format);
857 	if (!abits)
858 		return true;
859 
860 	if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
861 	    PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
862 		return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
863 	} else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
864 		   PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
865 		unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
866 		return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
867 	} else
868 		return false;
869 }
870 
871 static bool
pixel_is_white(uint32_t pixel,uint32_t format)872 pixel_is_white(uint32_t pixel, uint32_t format)
873 {
874 	switch (PICT_FORMAT_TYPE(format)) {
875 	case PICT_TYPE_A:
876 	case PICT_TYPE_ARGB:
877 	case PICT_TYPE_ABGR:
878 	case PICT_TYPE_BGRA:
879 		return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
880 	default:
881 		return false;
882 	}
883 }
884 
885 static bool
is_opaque_solid(PicturePtr picture)886 is_opaque_solid(PicturePtr picture)
887 {
888 	if (picture->pSourcePict) {
889 		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
890 		return (fill->color >> 24) == 0xff;
891 	} else
892 		return pixel_is_opaque(get_pixel(picture), picture->format);
893 }
894 
895 static bool
is_white(PicturePtr picture)896 is_white(PicturePtr picture)
897 {
898 	if (picture->pSourcePict) {
899 		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
900 		return fill->color == 0xffffffff;
901 	} else
902 		return pixel_is_white(get_pixel(picture), picture->format);
903 }
904 
905 static bool
is_transparent(PicturePtr picture)906 is_transparent(PicturePtr picture)
907 {
908 	if (picture->pSourcePict) {
909 		PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
910 		return fill->color == 0;
911 	} else
912 		return pixel_is_transparent(get_pixel(picture), picture->format);
913 }
914 
915 bool
sna_composite_mask_is_opaque(PicturePtr mask)916 sna_composite_mask_is_opaque(PicturePtr mask)
917 {
918 	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
919 		return is_solid(mask) && is_white(mask);
920 	else if (!PICT_FORMAT_A(mask->format))
921 		return true;
922 	else if (mask->pSourcePict) {
923 		PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict;
924 		return (fill->color >> 24) == 0xff;
925 	} else {
926 		struct sna_pixmap *priv;
927 		assert(mask->pDrawable);
928 
929 		if (mask->pDrawable->width  == 1 &&
930 		    mask->pDrawable->height == 1 &&
931 		    mask->repeat)
932 			return pixel_is_opaque(get_pixel(mask), mask->format);
933 
934 		if (mask->transform)
935 			return false;
936 
937 		priv = sna_pixmap_from_drawable(mask->pDrawable);
938 		if (priv == NULL || !priv->clear)
939 			return false;
940 
941 		return pixel_is_opaque(priv->clear_color, mask->format);
942 	}
943 }
944 
945 fastcall
blt_composite_fill(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)946 static void blt_composite_fill(struct sna *sna,
947 			       const struct sna_composite_op *op,
948 			       const struct sna_composite_rectangles *r)
949 {
950 	int x1, x2, y1, y2;
951 
952 	x1 = r->dst.x + op->dst.x;
953 	y1 = r->dst.y + op->dst.y;
954 	x2 = x1 + r->width;
955 	y2 = y1 + r->height;
956 
957 	if (x1 < 0)
958 		x1 = 0;
959 	if (y1 < 0)
960 		y1 = 0;
961 
962 	if (x2 > op->dst.width)
963 		x2 = op->dst.width;
964 	if (y2 > op->dst.height)
965 		y2 = op->dst.height;
966 
967 	if (x2 <= x1 || y2 <= y1)
968 		return;
969 
970 	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
971 }
972 
973 fastcall
blt_composite_fill__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)974 static void blt_composite_fill__cpu(struct sna *sna,
975 				    const struct sna_composite_op *op,
976 				    const struct sna_composite_rectangles *r)
977 {
978 	int x1, x2, y1, y2;
979 
980 	x1 = r->dst.x + op->dst.x;
981 	y1 = r->dst.y + op->dst.y;
982 	x2 = x1 + r->width;
983 	y2 = y1 + r->height;
984 
985 	if (x1 < 0)
986 		x1 = 0;
987 	if (y1 < 0)
988 		y1 = 0;
989 
990 	if (x2 > op->dst.width)
991 		x2 = op->dst.width;
992 	if (y2 > op->dst.height)
993 		y2 = op->dst.height;
994 
995 	if (x2 <= x1 || y2 <= y1)
996 		return;
997 
998 	assert(op->dst.pixmap->devPrivate.ptr);
999 	assert(op->dst.pixmap->devKind);
1000 	sigtrap_assert_active();
1001 	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1002 		    op->dst.pixmap->devKind / sizeof(uint32_t),
1003 		    op->dst.pixmap->drawable.bitsPerPixel,
1004 		    x1, y1, x2-x1, y2-y1,
1005 		    op->u.blt.pixel);
1006 }
1007 
1008 fastcall static void
blt_composite_fill_box_no_offset__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1009 blt_composite_fill_box_no_offset__cpu(struct sna *sna,
1010 				      const struct sna_composite_op *op,
1011 				      const BoxRec *box)
1012 {
1013 	assert(box->x1 >= 0);
1014 	assert(box->y1 >= 0);
1015 	assert(box->x2 <= op->dst.pixmap->drawable.width);
1016 	assert(box->y2 <= op->dst.pixmap->drawable.height);
1017 
1018 	assert(op->dst.pixmap->devPrivate.ptr);
1019 	assert(op->dst.pixmap->devKind);
1020 	sigtrap_assert_active();
1021 	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1022 		    op->dst.pixmap->devKind / sizeof(uint32_t),
1023 		    op->dst.pixmap->drawable.bitsPerPixel,
1024 		    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1025 		    op->u.blt.pixel);
1026 }
1027 
1028 static void
blt_composite_fill_boxes_no_offset__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1029 blt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
1030 					const struct sna_composite_op *op,
1031 					const BoxRec *box, int n)
1032 {
1033 	do {
1034 		assert(box->x1 >= 0);
1035 		assert(box->y1 >= 0);
1036 		assert(box->x2 <= op->dst.pixmap->drawable.width);
1037 		assert(box->y2 <= op->dst.pixmap->drawable.height);
1038 
1039 		assert(op->dst.pixmap->devPrivate.ptr);
1040 		assert(op->dst.pixmap->devKind);
1041 		sigtrap_assert_active();
1042 		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1043 			    op->dst.pixmap->devKind / sizeof(uint32_t),
1044 			    op->dst.pixmap->drawable.bitsPerPixel,
1045 			    box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1046 			    op->u.blt.pixel);
1047 		box++;
1048 	} while (--n);
1049 }
1050 
1051 fastcall static void
blt_composite_fill_box__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1052 blt_composite_fill_box__cpu(struct sna *sna,
1053 			    const struct sna_composite_op *op,
1054 			    const BoxRec *box)
1055 {
1056 	assert(box->x1 + op->dst.x >= 0);
1057 	assert(box->y1 + op->dst.y >= 0);
1058 	assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1059 	assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1060 
1061 	assert(op->dst.pixmap->devPrivate.ptr);
1062 	assert(op->dst.pixmap->devKind);
1063 	sigtrap_assert_active();
1064 	pixman_fill(op->dst.pixmap->devPrivate.ptr,
1065 		    op->dst.pixmap->devKind / sizeof(uint32_t),
1066 		    op->dst.pixmap->drawable.bitsPerPixel,
1067 		    box->x1 + op->dst.x, box->y1 + op->dst.y,
1068 		    box->x2 - box->x1, box->y2 - box->y1,
1069 		    op->u.blt.pixel);
1070 }
1071 
1072 static void
blt_composite_fill_boxes__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1073 blt_composite_fill_boxes__cpu(struct sna *sna,
1074 			      const struct sna_composite_op *op,
1075 			      const BoxRec *box, int n)
1076 {
1077 	do {
1078 		assert(box->x1 + op->dst.x >= 0);
1079 		assert(box->y1 + op->dst.y >= 0);
1080 		assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1081 		assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1082 
1083 		assert(op->dst.pixmap->devPrivate.ptr);
1084 		assert(op->dst.pixmap->devKind);
1085 		sigtrap_assert_active();
1086 		pixman_fill(op->dst.pixmap->devPrivate.ptr,
1087 			    op->dst.pixmap->devKind / sizeof(uint32_t),
1088 			    op->dst.pixmap->drawable.bitsPerPixel,
1089 			    box->x1 + op->dst.x, box->y1 + op->dst.y,
1090 			    box->x2 - box->x1, box->y2 - box->y1,
1091 			    op->u.blt.pixel);
1092 		box++;
1093 	} while (--n);
1094 }
1095 
_sna_blt_fill_box(struct sna * sna,const struct sna_blt_state * blt,const BoxRec * box)1096 inline static void _sna_blt_fill_box(struct sna *sna,
1097 				     const struct sna_blt_state *blt,
1098 				     const BoxRec *box)
1099 {
1100 	struct kgem *kgem = &sna->kgem;
1101 	uint32_t *b;
1102 
1103 	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
1104 	     box->x1, box->y1, box->x2, box->y2,
1105 	     blt->pixel));
1106 
1107 	assert(box->x1 >= 0);
1108 	assert(box->y1 >= 0);
1109 	assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
1110 
1111 	if (!kgem_check_batch(kgem, 3))
1112 		sna_blt_fill_begin(sna, blt);
1113 
1114 	assert(sna->kgem.mode == KGEM_BLT);
1115 	b = kgem->batch + kgem->nbatch;
1116 	kgem->nbatch += 3;
1117 	assert(kgem->nbatch < kgem->surface);
1118 
1119 	b[0] = blt->cmd;
1120 	*(uint64_t *)(b+1) = *(const uint64_t *)box;
1121 }
1122 
_sna_blt_fill_boxes(struct sna * sna,const struct sna_blt_state * blt,const BoxRec * box,int nbox)1123 inline static void _sna_blt_fill_boxes(struct sna *sna,
1124 				       const struct sna_blt_state *blt,
1125 				       const BoxRec *box,
1126 				       int nbox)
1127 {
1128 	struct kgem *kgem = &sna->kgem;
1129 	uint32_t cmd = blt->cmd;
1130 
1131 	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1132 
1133 	if (!kgem_check_batch(kgem, 3))
1134 		sna_blt_fill_begin(sna, blt);
1135 
1136 	do {
1137 		uint32_t *b = kgem->batch + kgem->nbatch;
1138 		int nbox_this_time, rem;
1139 
1140 		assert(sna->kgem.mode == KGEM_BLT);
1141 		nbox_this_time = nbox;
1142 		rem = kgem_batch_space(kgem);
1143 		if (3*nbox_this_time > rem)
1144 			nbox_this_time = rem / 3;
1145 		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1146 		     __FUNCTION__, nbox_this_time, nbox, rem));
1147 		assert(nbox_this_time > 0);
1148 		nbox -= nbox_this_time;
1149 
1150 		kgem->nbatch += 3 * nbox_this_time;
1151 		assert(kgem->nbatch < kgem->surface);
1152 		while (nbox_this_time >= 8) {
1153 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1154 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1155 			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1156 			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1157 			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1158 			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1159 			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1160 			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1161 			b += 24;
1162 			nbox_this_time -= 8;
1163 		}
1164 		if (nbox_this_time & 4) {
1165 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1166 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1167 			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1168 			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1169 			b += 12;
1170 		}
1171 		if (nbox_this_time & 2) {
1172 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1173 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1174 			b += 6;
1175 		}
1176 		if (nbox_this_time & 1) {
1177 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1178 		}
1179 
1180 		if (!nbox)
1181 			return;
1182 
1183 		sna_blt_fill_begin(sna, blt);
1184 	} while (1);
1185 }
1186 
_sna_blt_maybe_clear(const struct sna_composite_op * op,const BoxRec * box)1187 static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
1188 {
1189 	if (box->x2 - box->x1 >= op->dst.width &&
1190 	    box->y2 - box->y1 >= op->dst.height) {
1191 		struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1192 		if (op->dst.bo == priv->gpu_bo) {
1193 			sna_damage_all(&priv->gpu_damage, op->dst.pixmap);
1194 			sna_damage_destroy(&priv->cpu_damage);
1195 			priv->clear = true;
1196 			priv->clear_color = op->u.blt.pixel;
1197 			DBG(("%s: pixmap=%ld marking clear [%08x]\n",
1198 			     __FUNCTION__,
1199 			     op->dst.pixmap->drawable.serialNumber,
1200 			     op->u.blt.pixel));
1201 			((struct sna_composite_op *)op)->damage = NULL;
1202 		}
1203 	}
1204 }
1205 
blt_composite_fill_box_no_offset(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1206 fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
1207 						      const struct sna_composite_op *op,
1208 						      const BoxRec *box)
1209 {
1210 	_sna_blt_fill_box(sna, &op->u.blt, box);
1211 	_sna_blt_maybe_clear(op, box);
1212 }
1213 
blt_composite_fill_boxes_no_offset(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1214 static void blt_composite_fill_boxes_no_offset(struct sna *sna,
1215 					       const struct sna_composite_op *op,
1216 					       const BoxRec *box, int n)
1217 {
1218 	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
1219 }
1220 
blt_composite_fill_boxes_no_offset__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1221 static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
1222 						       const struct sna_composite_op *op,
1223 						       const BoxRec *box, int nbox)
1224 {
1225 	struct kgem *kgem = &sna->kgem;
1226 	const struct sna_blt_state *blt = &op->u.blt;
1227 	uint32_t cmd = blt->cmd;
1228 
1229 	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1230 
1231 	sna_vertex_lock(&sna->render);
1232 	assert(kgem->mode == KGEM_BLT);
1233 	if (!kgem_check_batch(kgem, 3)) {
1234 		sna_vertex_wait__locked(&sna->render);
1235 		sna_blt_fill_begin(sna, blt);
1236 	}
1237 
1238 	do {
1239 		uint32_t *b = kgem->batch + kgem->nbatch;
1240 		int nbox_this_time, rem;
1241 
1242 		assert(sna->kgem.mode == KGEM_BLT);
1243 		nbox_this_time = nbox;
1244 		rem = kgem_batch_space(kgem);
1245 		if (3*nbox_this_time > rem)
1246 			nbox_this_time = rem / 3;
1247 		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1248 		     __FUNCTION__, nbox_this_time, nbox, rem));
1249 		assert(nbox_this_time > 0);
1250 		nbox -= nbox_this_time;
1251 
1252 		kgem->nbatch += 3 * nbox_this_time;
1253 		assert(kgem->nbatch < kgem->surface);
1254 		sna_vertex_acquire__locked(&sna->render);
1255 		sna_vertex_unlock(&sna->render);
1256 
1257 		while (nbox_this_time >= 8) {
1258 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1259 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1260 			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1261 			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1262 			b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1263 			b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1264 			b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1265 			b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1266 			b += 24;
1267 			nbox_this_time -= 8;
1268 		}
1269 		if (nbox_this_time & 4) {
1270 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1271 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1272 			b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1273 			b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1274 			b += 12;
1275 		}
1276 		if (nbox_this_time & 2) {
1277 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1278 			b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1279 			b += 6;
1280 		}
1281 		if (nbox_this_time & 1) {
1282 			b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1283 		}
1284 
1285 		sna_vertex_lock(&sna->render);
1286 		sna_vertex_release__locked(&sna->render);
1287 		if (!nbox)
1288 			break;
1289 
1290 		sna_vertex_wait__locked(&sna->render);
1291 		sna_blt_fill_begin(sna, blt);
1292 	} while (1);
1293 	sna_vertex_unlock(&sna->render);
1294 }
1295 
blt_composite_fill_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1296 fastcall static void blt_composite_fill_box(struct sna *sna,
1297 					    const struct sna_composite_op *op,
1298 					    const BoxRec *box)
1299 {
1300 	sna_blt_fill_one(sna, &op->u.blt,
1301 			 box->x1 + op->dst.x,
1302 			 box->y1 + op->dst.y,
1303 			 box->x2 - box->x1,
1304 			 box->y2 - box->y1);
1305 	_sna_blt_maybe_clear(op, box);
1306 }
1307 
blt_composite_fill_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1308 static void blt_composite_fill_boxes(struct sna *sna,
1309 				     const struct sna_composite_op *op,
1310 				     const BoxRec *box, int n)
1311 {
1312 	do {
1313 		sna_blt_fill_one(sna, &op->u.blt,
1314 				 box->x1 + op->dst.x, box->y1 + op->dst.y,
1315 				 box->x2 - box->x1, box->y2 - box->y1);
1316 		box++;
1317 	} while (--n);
1318 }
1319 
add4(const BoxRec * b,int16_t x,int16_t y)1320 static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
1321 {
1322 	union {
1323 		uint64_t v;
1324 		int16_t i[4];
1325 	} vi;
1326 	vi.v = *(uint64_t *)b;
1327 	vi.i[0] += x;
1328 	vi.i[1] += y;
1329 	vi.i[2] += x;
1330 	vi.i[3] += y;
1331 	return vi.v;
1332 }
1333 
blt_composite_fill_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1334 static void blt_composite_fill_boxes__thread(struct sna *sna,
1335 					     const struct sna_composite_op *op,
1336 					     const BoxRec *box, int nbox)
1337 {
1338 	struct kgem *kgem = &sna->kgem;
1339 	const struct sna_blt_state *blt = &op->u.blt;
1340 	uint32_t cmd = blt->cmd;
1341 	int16_t dx = op->dst.x;
1342 	int16_t dy = op->dst.y;
1343 
1344 	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1345 
1346 	sna_vertex_lock(&sna->render);
1347 	assert(kgem->mode == KGEM_BLT);
1348 	if (!kgem_check_batch(kgem, 3)) {
1349 		sna_vertex_wait__locked(&sna->render);
1350 		sna_blt_fill_begin(sna, blt);
1351 	}
1352 
1353 	do {
1354 		uint32_t *b = kgem->batch + kgem->nbatch;
1355 		int nbox_this_time, rem;
1356 
1357 		assert(sna->kgem.mode == KGEM_BLT);
1358 		nbox_this_time = nbox;
1359 		rem = kgem_batch_space(kgem);
1360 		if (3*nbox_this_time > rem)
1361 			nbox_this_time = rem / 3;
1362 		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1363 		     __FUNCTION__, nbox_this_time, nbox, rem));
1364 		assert(nbox_this_time > 0);
1365 		nbox -= nbox_this_time;
1366 
1367 		kgem->nbatch += 3 * nbox_this_time;
1368 		assert(kgem->nbatch < kgem->surface);
1369 		sna_vertex_acquire__locked(&sna->render);
1370 		sna_vertex_unlock(&sna->render);
1371 
1372 		while (nbox_this_time >= 8) {
1373 			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1374 			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1375 			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1376 			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1377 			b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
1378 			b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
1379 			b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
1380 			b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
1381 			b += 24;
1382 			nbox_this_time -= 8;
1383 		}
1384 		if (nbox_this_time & 4) {
1385 			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1386 			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1387 			b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1388 			b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1389 			b += 12;
1390 		}
1391 		if (nbox_this_time & 2) {
1392 			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1393 			b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1394 			b += 6;
1395 		}
1396 		if (nbox_this_time & 1) {
1397 			b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1398 		}
1399 
1400 		sna_vertex_lock(&sna->render);
1401 		sna_vertex_release__locked(&sna->render);
1402 		if (!nbox)
1403 			break;
1404 
1405 		sna_vertex_wait__locked(&sna->render);
1406 		sna_blt_fill_begin(sna, blt);
1407 	} while (1);
1408 	sna_vertex_unlock(&sna->render);
1409 }
1410 
1411 fastcall
blt_composite_nop(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1412 static void blt_composite_nop(struct sna *sna,
1413 			       const struct sna_composite_op *op,
1414 			       const struct sna_composite_rectangles *r)
1415 {
1416 }
1417 
blt_composite_nop_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1418 fastcall static void blt_composite_nop_box(struct sna *sna,
1419 					   const struct sna_composite_op *op,
1420 					   const BoxRec *box)
1421 {
1422 }
1423 
blt_composite_nop_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1424 static void blt_composite_nop_boxes(struct sna *sna,
1425 				    const struct sna_composite_op *op,
1426 				    const BoxRec *box, int n)
1427 {
1428 }
1429 
1430 static bool
begin_blt(struct sna * sna,struct sna_composite_op * op)1431 begin_blt(struct sna *sna,
1432 	  struct sna_composite_op *op)
1433 {
1434 	assert(sna->kgem.mode == KGEM_BLT);
1435 	if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
1436 		kgem_submit(&sna->kgem);
1437 		if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
1438 			return false;
1439 
1440 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
1441 		kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo);
1442 	}
1443 
1444 	return true;
1445 }
1446 
1447 static bool
prepare_blt_nop(struct sna * sna,struct sna_composite_op * op)1448 prepare_blt_nop(struct sna *sna,
1449 		struct sna_composite_op *op)
1450 {
1451 	DBG(("%s\n", __FUNCTION__));
1452 
1453 	op->blt   = blt_composite_nop;
1454 	op->box   = blt_composite_nop_box;
1455 	op->boxes = blt_composite_nop_boxes;
1456 	op->done  = nop_done;
1457 	return true;
1458 }
1459 
1460 static bool
prepare_blt_clear(struct sna * sna,struct sna_composite_op * op)1461 prepare_blt_clear(struct sna *sna,
1462 		  struct sna_composite_op *op)
1463 {
1464 	DBG(("%s\n", __FUNCTION__));
1465 
1466 	if (op->dst.bo == NULL) {
1467 		op->u.blt.pixel = 0;
1468 		op->blt   = blt_composite_fill__cpu;
1469 		if (op->dst.x|op->dst.y) {
1470 			op->box   = blt_composite_fill_box__cpu;
1471 			op->boxes = blt_composite_fill_boxes__cpu;
1472 			op->thread_boxes = blt_composite_fill_boxes__cpu;
1473 		} else {
1474 			op->box   = blt_composite_fill_box_no_offset__cpu;
1475 			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1476 			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1477 		}
1478 		op->done = sig_done;
1479 		return sigtrap_get() == 0;
1480 	}
1481 
1482 	op->blt = blt_composite_fill;
1483 	if (op->dst.x|op->dst.y) {
1484 		op->box   = blt_composite_fill_box;
1485 		op->boxes = blt_composite_fill_boxes;
1486 		op->thread_boxes = blt_composite_fill_boxes__thread;
1487 	} else {
1488 		op->box   = blt_composite_fill_box_no_offset;
1489 		op->boxes = blt_composite_fill_boxes_no_offset;
1490 		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1491 	}
1492 	op->done = nop_done;
1493 
1494 	if (!sna_blt_fill_init(sna, &op->u.blt,
1495 			       op->dst.bo,
1496 			       op->dst.pixmap->drawable.bitsPerPixel,
1497 			       GXclear, 0))
1498 		return false;
1499 
1500 	return begin_blt(sna, op);
1501 }
1502 
1503 static bool
prepare_blt_fill(struct sna * sna,struct sna_composite_op * op,uint32_t pixel)1504 prepare_blt_fill(struct sna *sna,
1505 		 struct sna_composite_op *op,
1506 		 uint32_t pixel)
1507 {
1508 	DBG(("%s\n", __FUNCTION__));
1509 
1510 	if (op->dst.bo == NULL) {
1511 		op->u.blt.pixel = pixel;
1512 		op->blt = blt_composite_fill__cpu;
1513 		if (op->dst.x|op->dst.y) {
1514 			op->box   = blt_composite_fill_box__cpu;
1515 			op->boxes = blt_composite_fill_boxes__cpu;
1516 			op->thread_boxes = blt_composite_fill_boxes__cpu;
1517 		} else {
1518 			op->box   = blt_composite_fill_box_no_offset__cpu;
1519 			op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1520 			op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1521 		}
1522 		op->done = sig_done;
1523 		return sigtrap_get() == 0;
1524 	}
1525 
1526 	op->blt = blt_composite_fill;
1527 	if (op->dst.x|op->dst.y) {
1528 		op->box   = blt_composite_fill_box;
1529 		op->boxes = blt_composite_fill_boxes;
1530 		op->thread_boxes = blt_composite_fill_boxes__thread;
1531 	} else {
1532 		op->box   = blt_composite_fill_box_no_offset;
1533 		op->boxes = blt_composite_fill_boxes_no_offset;
1534 		op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1535 	}
1536 	op->done = nop_done;
1537 
1538 	if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
1539 			       op->dst.pixmap->drawable.bitsPerPixel,
1540 			       GXcopy, pixel))
1541 		return false;
1542 
1543 	return begin_blt(sna, op);
1544 }
1545 
1546 fastcall static void
blt_composite_copy(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1547 blt_composite_copy(struct sna *sna,
1548 		   const struct sna_composite_op *op,
1549 		   const struct sna_composite_rectangles *r)
1550 {
1551 	int x1, x2, y1, y2;
1552 	int src_x, src_y;
1553 
1554 	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1555 	     __FUNCTION__,
1556 	     r->src.x, r->src.y,
1557 	     r->dst.x, r->dst.y,
1558 	     r->width, r->height));
1559 
1560 	/* XXX higher layer should have clipped? */
1561 
1562 	x1 = r->dst.x + op->dst.x;
1563 	y1 = r->dst.y + op->dst.y;
1564 	x2 = x1 + r->width;
1565 	y2 = y1 + r->height;
1566 
1567 	src_x = r->src.x - x1 + op->u.blt.sx;
1568 	src_y = r->src.y - y1 + op->u.blt.sy;
1569 
1570 	/* clip against dst */
1571 	if (x1 < 0)
1572 		x1 = 0;
1573 	if (y1 < 0)
1574 		y1 = 0;
1575 
1576 	if (x2 > op->dst.width)
1577 		x2 = op->dst.width;
1578 
1579 	if (y2 > op->dst.height)
1580 		y2 = op->dst.height;
1581 
1582 	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1583 
1584 	if (x2 <= x1 || y2 <= y1)
1585 		return;
1586 
1587 	sna_blt_copy_one(sna, &op->u.blt,
1588 			 x1 + src_x, y1 + src_y,
1589 			 x2 - x1, y2 - y1,
1590 			 x1, y1);
1591 }
1592 
blt_composite_copy_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1593 fastcall static void blt_composite_copy_box(struct sna *sna,
1594 					    const struct sna_composite_op *op,
1595 					    const BoxRec *box)
1596 {
1597 	DBG(("%s: box (%d, %d), (%d, %d)\n",
1598 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1599 	sna_blt_copy_one(sna, &op->u.blt,
1600 			 box->x1 + op->u.blt.sx,
1601 			 box->y1 + op->u.blt.sy,
1602 			 box->x2 - box->x1,
1603 			 box->y2 - box->y1,
1604 			 box->x1 + op->dst.x,
1605 			 box->y1 + op->dst.y);
1606 }
1607 
blt_composite_copy_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1608 static void blt_composite_copy_boxes(struct sna *sna,
1609 				     const struct sna_composite_op *op,
1610 				     const BoxRec *box, int nbox)
1611 {
1612 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1613 	do {
1614 		DBG(("%s: box (%d, %d), (%d, %d)\n",
1615 		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1616 		sna_blt_copy_one(sna, &op->u.blt,
1617 				 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1618 				 box->x2 - box->x1, box->y2 - box->y1,
1619 				 box->x1 + op->dst.x, box->y1 + op->dst.y);
1620 		box++;
1621 	} while(--nbox);
1622 }
1623 
add2(uint32_t v,int16_t x,int16_t y)1624 static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
1625 {
1626 	x += v & 0xffff;
1627 	y += v >> 16;
1628 	return (uint16_t)y << 16 | x;
1629 }
1630 
blt_composite_copy_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1631 static void blt_composite_copy_boxes__thread(struct sna *sna,
1632 					     const struct sna_composite_op *op,
1633 					     const BoxRec *box, int nbox)
1634 {
1635 	struct kgem *kgem = &sna->kgem;
1636 	int dst_dx = op->dst.x;
1637 	int dst_dy = op->dst.y;
1638 	int src_dx = op->src.offset[0];
1639 	int src_dy = op->src.offset[1];
1640 	uint32_t cmd = op->u.blt.cmd;
1641 	uint32_t br13 = op->u.blt.br13;
1642 	struct kgem_bo *src_bo = op->u.blt.bo[0];
1643 	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1644 	int src_pitch = op->u.blt.pitch[0];
1645 
1646 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1647 
1648 	sna_vertex_lock(&sna->render);
1649 
1650 	if ((dst_dx | dst_dy) == 0) {
1651 		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1652 		do {
1653 			int nbox_this_time, rem;
1654 
1655 			nbox_this_time = nbox;
1656 			rem = kgem_batch_space(kgem);
1657 			if (8*nbox_this_time > rem)
1658 				nbox_this_time = rem / 8;
1659 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1660 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1661 			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1662 			     __FUNCTION__, nbox_this_time, nbox, rem));
1663 			assert(nbox_this_time > 0);
1664 			nbox -= nbox_this_time;
1665 
1666 			assert(sna->kgem.mode == KGEM_BLT);
1667 			do {
1668 				uint32_t *b = kgem->batch + kgem->nbatch;
1669 
1670 				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1671 				     __FUNCTION__,
1672 				     box->x1, box->y1,
1673 				     box->x2 - box->x1, box->y2 - box->y1));
1674 
1675 				assert(box->x1 + src_dx >= 0);
1676 				assert(box->y1 + src_dy >= 0);
1677 				assert(box->x1 + src_dx <= INT16_MAX);
1678 				assert(box->y1 + src_dy <= INT16_MAX);
1679 
1680 				assert(box->x1 >= 0);
1681 				assert(box->y1 >= 0);
1682 
1683 				*(uint64_t *)&b[0] = hdr;
1684 				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1685 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1686 						      I915_GEM_DOMAIN_RENDER << 16 |
1687 						      I915_GEM_DOMAIN_RENDER |
1688 						      KGEM_RELOC_FENCED,
1689 						      0);
1690 				b[5] = add2(b[2], src_dx, src_dy);
1691 				b[6] = src_pitch;
1692 				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1693 						      I915_GEM_DOMAIN_RENDER << 16 |
1694 						      KGEM_RELOC_FENCED,
1695 						      0);
1696 				kgem->nbatch += 8;
1697 				assert(kgem->nbatch < kgem->surface);
1698 				box++;
1699 			} while (--nbox_this_time);
1700 
1701 			if (!nbox)
1702 				break;
1703 
1704 			_kgem_submit(kgem);
1705 			_kgem_set_mode(kgem, KGEM_BLT);
1706 			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1707 		} while (1);
1708 	} else {
1709 		do {
1710 			int nbox_this_time, rem;
1711 
1712 			nbox_this_time = nbox;
1713 			rem = kgem_batch_space(kgem);
1714 			if (8*nbox_this_time > rem)
1715 				nbox_this_time = rem / 8;
1716 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1717 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1718 			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1719 			     __FUNCTION__, nbox_this_time, nbox, rem));
1720 			assert(nbox_this_time > 0);
1721 			nbox -= nbox_this_time;
1722 
1723 			assert(sna->kgem.mode == KGEM_BLT);
1724 			do {
1725 				uint32_t *b = kgem->batch + kgem->nbatch;
1726 
1727 				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1728 				     __FUNCTION__,
1729 				     box->x1, box->y1,
1730 				     box->x2 - box->x1, box->y2 - box->y1));
1731 
1732 				assert(box->x1 + src_dx >= 0);
1733 				assert(box->y1 + src_dy >= 0);
1734 
1735 				assert(box->x1 + dst_dx >= 0);
1736 				assert(box->y1 + dst_dy >= 0);
1737 
1738 				b[0] = cmd;
1739 				b[1] = br13;
1740 				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1741 				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1742 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1743 						      I915_GEM_DOMAIN_RENDER << 16 |
1744 						      I915_GEM_DOMAIN_RENDER |
1745 						      KGEM_RELOC_FENCED,
1746 						      0);
1747 				b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1748 				b[6] = src_pitch;
1749 				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1750 						      I915_GEM_DOMAIN_RENDER << 16 |
1751 						      KGEM_RELOC_FENCED,
1752 						      0);
1753 				kgem->nbatch += 8;
1754 				assert(kgem->nbatch < kgem->surface);
1755 				box++;
1756 			} while (--nbox_this_time);
1757 
1758 			if (!nbox)
1759 				break;
1760 
1761 			_kgem_submit(kgem);
1762 			_kgem_set_mode(kgem, KGEM_BLT);
1763 			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1764 		} while (1);
1765 	}
1766 	sna_vertex_unlock(&sna->render);
1767 }
1768 
blt_composite_copy_boxes__thread64(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1769 static void blt_composite_copy_boxes__thread64(struct sna *sna,
1770 					       const struct sna_composite_op *op,
1771 					       const BoxRec *box, int nbox)
1772 {
1773 	struct kgem *kgem = &sna->kgem;
1774 	int dst_dx = op->dst.x;
1775 	int dst_dy = op->dst.y;
1776 	int src_dx = op->src.offset[0];
1777 	int src_dy = op->src.offset[1];
1778 	uint32_t cmd = op->u.blt.cmd;
1779 	uint32_t br13 = op->u.blt.br13;
1780 	struct kgem_bo *src_bo = op->u.blt.bo[0];
1781 	struct kgem_bo *dst_bo = op->u.blt.bo[1];
1782 	int src_pitch = op->u.blt.pitch[0];
1783 
1784 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1785 
1786 	sna_vertex_lock(&sna->render);
1787 
1788 	if ((dst_dx | dst_dy) == 0) {
1789 		uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1790 		do {
1791 			int nbox_this_time, rem;
1792 
1793 			nbox_this_time = nbox;
1794 			rem = kgem_batch_space(kgem);
1795 			if (10*nbox_this_time > rem)
1796 				nbox_this_time = rem / 10;
1797 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1798 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1799 			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1800 			     __FUNCTION__, nbox_this_time, nbox, rem));
1801 			assert(nbox_this_time > 0);
1802 			nbox -= nbox_this_time;
1803 
1804 			assert(kgem->mode == KGEM_BLT);
1805 			do {
1806 				uint32_t *b = kgem->batch + kgem->nbatch;
1807 
1808 				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1809 				     __FUNCTION__,
1810 				     box->x1, box->y1,
1811 				     box->x2 - box->x1, box->y2 - box->y1));
1812 
1813 				assert(box->x1 + src_dx >= 0);
1814 				assert(box->y1 + src_dy >= 0);
1815 				assert(box->x1 + src_dx <= INT16_MAX);
1816 				assert(box->y1 + src_dy <= INT16_MAX);
1817 
1818 				assert(box->x1 >= 0);
1819 				assert(box->y1 >= 0);
1820 
1821 				*(uint64_t *)&b[0] = hdr;
1822 				*(uint64_t *)&b[2] = *(const uint64_t *)box;
1823 				*(uint64_t *)(b+4) =
1824 					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1825 							 I915_GEM_DOMAIN_RENDER << 16 |
1826 							 I915_GEM_DOMAIN_RENDER |
1827 							 KGEM_RELOC_FENCED,
1828 							 0);
1829 				b[6] = add2(b[2], src_dx, src_dy);
1830 				b[7] = src_pitch;
1831 				*(uint64_t *)(b+8) =
1832 					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1833 							 I915_GEM_DOMAIN_RENDER << 16 |
1834 							 KGEM_RELOC_FENCED,
1835 							 0);
1836 				kgem->nbatch += 10;
1837 				assert(kgem->nbatch < kgem->surface);
1838 				box++;
1839 			} while (--nbox_this_time);
1840 
1841 			if (!nbox)
1842 				break;
1843 
1844 			_kgem_submit(kgem);
1845 			_kgem_set_mode(kgem, KGEM_BLT);
1846 			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1847 		} while (1);
1848 	} else {
1849 		do {
1850 			int nbox_this_time, rem;
1851 
1852 			nbox_this_time = nbox;
1853 			rem = kgem_batch_space(kgem);
1854 			if (10*nbox_this_time > rem)
1855 				nbox_this_time = rem / 10;
1856 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1857 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1858 			DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1859 			     __FUNCTION__, nbox_this_time, nbox, rem));
1860 			assert(nbox_this_time > 0);
1861 			nbox -= nbox_this_time;
1862 
1863 			assert(kgem->mode == KGEM_BLT);
1864 			do {
1865 				uint32_t *b = kgem->batch + kgem->nbatch;
1866 
1867 				DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
1868 				     __FUNCTION__,
1869 				     box->x1, box->y1,
1870 				     box->x2 - box->x1, box->y2 - box->y1));
1871 
1872 				assert(box->x1 + src_dx >= 0);
1873 				assert(box->y1 + src_dy >= 0);
1874 
1875 				assert(box->x1 + dst_dx >= 0);
1876 				assert(box->y1 + dst_dy >= 0);
1877 
1878 				b[0] = cmd;
1879 				b[1] = br13;
1880 				b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1881 				b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1882 				*(uint64_t *)(b+4) =
1883 					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1884 							 I915_GEM_DOMAIN_RENDER << 16 |
1885 							 I915_GEM_DOMAIN_RENDER |
1886 							 KGEM_RELOC_FENCED,
1887 							 0);
1888 				b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1889 				b[7] = src_pitch;
1890 				*(uint64_t *)(b+8) =
1891 					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1892 							 I915_GEM_DOMAIN_RENDER << 16 |
1893 							 KGEM_RELOC_FENCED,
1894 							 0);
1895 				kgem->nbatch += 10;
1896 				assert(kgem->nbatch < kgem->surface);
1897 				box++;
1898 			} while (--nbox_this_time);
1899 
1900 			if (!nbox)
1901 				break;
1902 
1903 			_kgem_submit(kgem);
1904 			_kgem_set_mode(kgem, KGEM_BLT);
1905 			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1906 		} while (1);
1907 	}
1908 	sna_vertex_unlock(&sna->render);
1909 }
1910 
1911 fastcall static void
blt_composite_copy_with_alpha(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1912 blt_composite_copy_with_alpha(struct sna *sna,
1913 			      const struct sna_composite_op *op,
1914 			      const struct sna_composite_rectangles *r)
1915 {
1916 	int x1, x2, y1, y2;
1917 	int src_x, src_y;
1918 
1919 	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1920 	     __FUNCTION__,
1921 	     r->src.x, r->src.y,
1922 	     r->dst.x, r->dst.y,
1923 	     r->width, r->height));
1924 
1925 	/* XXX higher layer should have clipped? */
1926 
1927 	x1 = r->dst.x + op->dst.x;
1928 	y1 = r->dst.y + op->dst.y;
1929 	x2 = x1 + r->width;
1930 	y2 = y1 + r->height;
1931 
1932 	src_x = r->src.x - x1 + op->u.blt.sx;
1933 	src_y = r->src.y - y1 + op->u.blt.sy;
1934 
1935 	/* clip against dst */
1936 	if (x1 < 0)
1937 		x1 = 0;
1938 	if (y1 < 0)
1939 		y1 = 0;
1940 
1941 	if (x2 > op->dst.width)
1942 		x2 = op->dst.width;
1943 
1944 	if (y2 > op->dst.height)
1945 		y2 = op->dst.height;
1946 
1947 	DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1948 
1949 	if (x2 <= x1 || y2 <= y1)
1950 		return;
1951 
1952 	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1953 				x1 + src_x, y1 + src_y,
1954 				x2 - x1, y2 - y1,
1955 				x1, y1);
1956 }
1957 
1958 fastcall static void
blt_composite_copy_box_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1959 blt_composite_copy_box_with_alpha(struct sna *sna,
1960 				  const struct sna_composite_op *op,
1961 				  const BoxRec *box)
1962 {
1963 	DBG(("%s: box (%d, %d), (%d, %d)\n",
1964 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1965 	sna_blt_alpha_fixup_one(sna, &op->u.blt,
1966 				box->x1 + op->u.blt.sx,
1967 				box->y1 + op->u.blt.sy,
1968 				box->x2 - box->x1,
1969 				box->y2 - box->y1,
1970 				box->x1 + op->dst.x,
1971 				box->y1 + op->dst.y);
1972 }
1973 
1974 static void
blt_composite_copy_boxes_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1975 blt_composite_copy_boxes_with_alpha(struct sna *sna,
1976 				    const struct sna_composite_op *op,
1977 				    const BoxRec *box, int nbox)
1978 {
1979 	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1980 	do {
1981 		DBG(("%s: box (%d, %d), (%d, %d)\n",
1982 		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1983 		sna_blt_alpha_fixup_one(sna, &op->u.blt,
1984 					box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1985 					box->x2 - box->x1, box->y2 - box->y1,
1986 					box->x1 + op->dst.x, box->y1 + op->dst.y);
1987 		box++;
1988 	} while(--nbox);
1989 }
1990 
1991 static bool
prepare_blt_copy(struct sna * sna,struct sna_composite_op * op,struct kgem_bo * bo,uint32_t alpha_fixup)1992 prepare_blt_copy(struct sna *sna,
1993 		 struct sna_composite_op *op,
1994 		 struct kgem_bo *bo,
1995 		 uint32_t alpha_fixup)
1996 {
1997 	PixmapPtr src = op->u.blt.src_pixmap;
1998 
1999 	assert(op->dst.bo);
2000 	assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
2001 	assert(kgem_bo_can_blt(&sna->kgem, bo));
2002 
2003 	kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
2004 	if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
2005 		kgem_submit(&sna->kgem);
2006 		if (!kgem_check_many_bo_fenced(&sna->kgem,
2007 					       op->dst.bo, bo, NULL)) {
2008 			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
2009 			return sna_tiling_blt_composite(sna, op, bo,
2010 							src->drawable.bitsPerPixel,
2011 							alpha_fixup);
2012 		}
2013 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
2014 	}
2015 	kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo);
2016 
2017 	DBG(("%s\n", __FUNCTION__));
2018 
2019 	if (sna->kgem.gen >= 060 && op->dst.bo == bo)
2020 		op->done = gen6_blt_copy_done;
2021 	else
2022 		op->done = nop_done;
2023 
2024 	if (alpha_fixup) {
2025 		op->blt   = blt_composite_copy_with_alpha;
2026 		op->box   = blt_composite_copy_box_with_alpha;
2027 		op->boxes = blt_composite_copy_boxes_with_alpha;
2028 
2029 		if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
2030 					      src->drawable.bitsPerPixel,
2031 					      alpha_fixup))
2032 			return false;
2033 	} else {
2034 		op->blt   = blt_composite_copy;
2035 		op->box   = blt_composite_copy_box;
2036 		op->boxes = blt_composite_copy_boxes;
2037 		if (sna->kgem.gen >= 0100)
2038 			op->thread_boxes = blt_composite_copy_boxes__thread64;
2039 		else
2040 			op->thread_boxes = blt_composite_copy_boxes__thread;
2041 
2042 		if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
2043 				       src->drawable.bitsPerPixel,
2044 				       GXcopy))
2045 			return false;
2046 	}
2047 
2048 	return true;
2049 }
2050 
2051 fastcall static void
blt_put_composite__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2052 blt_put_composite__cpu(struct sna *sna,
2053 		       const struct sna_composite_op *op,
2054 		       const struct sna_composite_rectangles *r)
2055 {
2056 	PixmapPtr dst = op->dst.pixmap;
2057 	PixmapPtr src = op->u.blt.src_pixmap;
2058 	assert(src->devPrivate.ptr);
2059 	assert(src->devKind);
2060 	assert(dst->devPrivate.ptr);
2061 	assert(dst->devKind);
2062 	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2063 		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2064 		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2065 		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2066 		   r->width, r->height);
2067 }
2068 
2069 fastcall static void
blt_put_composite_box__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2070 blt_put_composite_box__cpu(struct sna *sna,
2071 			   const struct sna_composite_op *op,
2072 			   const BoxRec *box)
2073 {
2074 	PixmapPtr dst = op->dst.pixmap;
2075 	PixmapPtr src = op->u.blt.src_pixmap;
2076 	assert(src->devPrivate.ptr);
2077 	assert(src->devKind);
2078 	assert(dst->devPrivate.ptr);
2079 	assert(dst->devKind);
2080 	memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2081 		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2082 		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2083 		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2084 		   box->x2-box->x1, box->y2-box->y1);
2085 }
2086 
2087 static void
blt_put_composite_boxes__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2088 blt_put_composite_boxes__cpu(struct sna *sna,
2089 			     const struct sna_composite_op *op,
2090 			     const BoxRec *box, int n)
2091 {
2092 	PixmapPtr dst = op->dst.pixmap;
2093 	PixmapPtr src = op->u.blt.src_pixmap;
2094 	assert(src->devPrivate.ptr);
2095 	assert(src->devKind);
2096 	assert(dst->devPrivate.ptr);
2097 	assert(dst->devKind);
2098 	do {
2099 		memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2100 			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2101 			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2102 			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2103 			   box->x2-box->x1, box->y2-box->y1);
2104 		box++;
2105 	} while (--n);
2106 }
2107 
2108 fastcall static void
blt_put_composite_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2109 blt_put_composite_with_alpha__cpu(struct sna *sna,
2110 				  const struct sna_composite_op *op,
2111 				  const struct sna_composite_rectangles *r)
2112 {
2113 	PixmapPtr dst = op->dst.pixmap;
2114 	PixmapPtr src = op->u.blt.src_pixmap;
2115 	assert(src->devPrivate.ptr);
2116 	assert(src->devKind);
2117 	assert(dst->devPrivate.ptr);
2118 	assert(dst->devKind);
2119 	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2120 		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2121 		   r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2122 		   r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2123 		   r->width, r->height,
2124 		   0xffffffff, op->u.blt.pixel);
2125 
2126 }
2127 
2128 fastcall static void
blt_put_composite_box_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2129 blt_put_composite_box_with_alpha__cpu(struct sna *sna,
2130 				      const struct sna_composite_op *op,
2131 				      const BoxRec *box)
2132 {
2133 	PixmapPtr dst = op->dst.pixmap;
2134 	PixmapPtr src = op->u.blt.src_pixmap;
2135 	assert(src->devPrivate.ptr);
2136 	assert(src->devKind);
2137 	assert(dst->devPrivate.ptr);
2138 	assert(dst->devKind);
2139 	memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2140 		   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2141 		   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2142 		   box->x1 + op->dst.x, box->y1 + op->dst.y,
2143 		   box->x2-box->x1, box->y2-box->y1,
2144 		   0xffffffff, op->u.blt.pixel);
2145 }
2146 
2147 static void
blt_put_composite_boxes_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2148 blt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
2149 					const struct sna_composite_op *op,
2150 					const BoxRec *box, int n)
2151 {
2152 	PixmapPtr dst = op->dst.pixmap;
2153 	PixmapPtr src = op->u.blt.src_pixmap;
2154 	assert(src->devPrivate.ptr);
2155 	assert(src->devKind);
2156 	assert(dst->devPrivate.ptr);
2157 	assert(dst->devKind);
2158 	do {
2159 		memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2160 			   src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2161 			   box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2162 			   box->x1 + op->dst.x, box->y1 + op->dst.y,
2163 			   box->x2-box->x1, box->y2-box->y1,
2164 			   0xffffffff, op->u.blt.pixel);
2165 		box++;
2166 	} while (--n);
2167 }
2168 
2169 fastcall static void
blt_put_composite(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2170 blt_put_composite(struct sna *sna,
2171 		  const struct sna_composite_op *op,
2172 		  const struct sna_composite_rectangles *r)
2173 {
2174 	PixmapPtr dst = op->dst.pixmap;
2175 	PixmapPtr src = op->u.blt.src_pixmap;
2176 	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2177 	int pitch = src->devKind;
2178 	char *data = src->devPrivate.ptr;
2179 	int bpp = src->drawable.bitsPerPixel;
2180 
2181 	int16_t dst_x = r->dst.x + op->dst.x;
2182 	int16_t dst_y = r->dst.y + op->dst.y;
2183 	int16_t src_x = r->src.x + op->u.blt.sx;
2184 	int16_t src_y = r->src.y + op->u.blt.sy;
2185 
2186 	if (!dst_priv->pinned &&
2187 	    dst_x <= 0 && dst_y <= 0 &&
2188 	    dst_x + r->width >= op->dst.width &&
2189 	    dst_y + r->height >= op->dst.height) {
2190 		data += (src_x - dst_x) * bpp / 8;
2191 		data += (src_y - dst_y) * pitch;
2192 
2193 		assert(op->dst.bo == dst_priv->gpu_bo);
2194 		sna_replace(sna, op->dst.pixmap, data, pitch);
2195 	} else {
2196 		BoxRec box;
2197 		bool ok;
2198 
2199 		box.x1 = dst_x;
2200 		box.y1 = dst_y;
2201 		box.x2 = dst_x + r->width;
2202 		box.y2 = dst_y + r->height;
2203 
2204 		ok = sna_write_boxes(sna, dst,
2205 				     dst_priv->gpu_bo, 0, 0,
2206 				     data, pitch, src_x, src_y,
2207 				     &box, 1);
2208 		assert(ok);
2209 		(void)ok;
2210 	}
2211 }
2212 
blt_put_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2213 fastcall static void blt_put_composite_box(struct sna *sna,
2214 					   const struct sna_composite_op *op,
2215 					   const BoxRec *box)
2216 {
2217 	PixmapPtr src = op->u.blt.src_pixmap;
2218 	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2219 
2220 	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2221 	     op->u.blt.sx, op->u.blt.sy,
2222 	     op->dst.x, op->dst.y));
2223 
2224 	assert(src->devPrivate.ptr);
2225 	assert(src->devKind);
2226 	if (!dst_priv->pinned &&
2227 	    box->x2 - box->x1 == op->dst.width &&
2228 	    box->y2 - box->y1 == op->dst.height) {
2229 		int pitch = src->devKind;
2230 		int bpp = src->drawable.bitsPerPixel / 8;
2231 		char *data = src->devPrivate.ptr;
2232 
2233 		data += (box->y1 + op->u.blt.sy) * pitch;
2234 		data += (box->x1 + op->u.blt.sx) * bpp;
2235 
2236 		assert(op->dst.bo == dst_priv->gpu_bo);
2237 		sna_replace(sna, op->dst.pixmap, data, pitch);
2238 	} else {
2239 		bool ok;
2240 
2241 		ok = sna_write_boxes(sna, op->dst.pixmap,
2242 				     op->dst.bo, op->dst.x, op->dst.y,
2243 				     src->devPrivate.ptr,
2244 				     src->devKind,
2245 				     op->u.blt.sx, op->u.blt.sy,
2246 				     box, 1);
2247 		assert(ok);
2248 		(void)ok;
2249 	}
2250 }
2251 
blt_put_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2252 static void blt_put_composite_boxes(struct sna *sna,
2253 				    const struct sna_composite_op *op,
2254 				    const BoxRec *box, int n)
2255 {
2256 	PixmapPtr src = op->u.blt.src_pixmap;
2257 	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2258 
2259 	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2260 	     op->u.blt.sx, op->u.blt.sy,
2261 	     op->dst.x, op->dst.y,
2262 	     box->x1, box->y1, box->x2, box->y2, n));
2263 
2264 	assert(src->devPrivate.ptr);
2265 	assert(src->devKind);
2266 	if (n == 1 && !dst_priv->pinned &&
2267 	    box->x2 - box->x1 == op->dst.width &&
2268 	    box->y2 - box->y1 == op->dst.height) {
2269 		int pitch = src->devKind;
2270 		int bpp = src->drawable.bitsPerPixel / 8;
2271 		char *data = src->devPrivate.ptr;
2272 
2273 		data += (box->y1 + op->u.blt.sy) * pitch;
2274 		data += (box->x1 + op->u.blt.sx) * bpp;
2275 
2276 		assert(op->dst.bo == dst_priv->gpu_bo);
2277 		sna_replace(sna, op->dst.pixmap, data, pitch);
2278 	} else {
2279 		bool ok;
2280 
2281 		ok = sna_write_boxes(sna, op->dst.pixmap,
2282 				     op->dst.bo, op->dst.x, op->dst.y,
2283 				     src->devPrivate.ptr,
2284 				     src->devKind,
2285 				     op->u.blt.sx, op->u.blt.sy,
2286 				     box, n);
2287 		assert(ok);
2288 		(void)ok;
2289 	}
2290 }
2291 
2292 fastcall static void
blt_put_composite_with_alpha(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2293 blt_put_composite_with_alpha(struct sna *sna,
2294 			     const struct sna_composite_op *op,
2295 			     const struct sna_composite_rectangles *r)
2296 {
2297 	PixmapPtr dst = op->dst.pixmap;
2298 	PixmapPtr src = op->u.blt.src_pixmap;
2299 	struct sna_pixmap *dst_priv = sna_pixmap(dst);
2300 	int pitch = src->devKind;
2301 	char *data = src->devPrivate.ptr;
2302 
2303 	int16_t dst_x = r->dst.x + op->dst.x;
2304 	int16_t dst_y = r->dst.y + op->dst.y;
2305 	int16_t src_x = r->src.x + op->u.blt.sx;
2306 	int16_t src_y = r->src.y + op->u.blt.sy;
2307 
2308 	assert(src->devPrivate.ptr);
2309 	assert(src->devKind);
2310 
2311 	if (!dst_priv->pinned &&
2312 	    dst_x <= 0 && dst_y <= 0 &&
2313 	    dst_x + r->width >= op->dst.width &&
2314 	    dst_y + r->height >= op->dst.height) {
2315 		int bpp = dst->drawable.bitsPerPixel / 8;
2316 
2317 		data += (src_x - dst_x) * bpp;
2318 		data += (src_y - dst_y) * pitch;
2319 
2320 		assert(op->dst.bo == dst_priv->gpu_bo);
2321 		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2322 				 0xffffffff, op->u.blt.pixel);
2323 	} else {
2324 		BoxRec box;
2325 
2326 		box.x1 = dst_x;
2327 		box.y1 = dst_y;
2328 		box.x2 = dst_x + r->width;
2329 		box.y2 = dst_y + r->height;
2330 
2331 		sna_write_boxes__xor(sna, dst,
2332 				     dst_priv->gpu_bo, 0, 0,
2333 				     data, pitch, src_x, src_y,
2334 				     &box, 1,
2335 				     0xffffffff, op->u.blt.pixel);
2336 	}
2337 }
2338 
2339 fastcall static void
blt_put_composite_box_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2340 blt_put_composite_box_with_alpha(struct sna *sna,
2341 				 const struct sna_composite_op *op,
2342 				 const BoxRec *box)
2343 {
2344 	PixmapPtr src = op->u.blt.src_pixmap;
2345 	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2346 
2347 	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2348 	     op->u.blt.sx, op->u.blt.sy,
2349 	     op->dst.x, op->dst.y));
2350 
2351 	assert(src->devPrivate.ptr);
2352 	assert(src->devKind);
2353 
2354 	if (!dst_priv->pinned &&
2355 	    box->x2 - box->x1 == op->dst.width &&
2356 	    box->y2 - box->y1 == op->dst.height) {
2357 		int pitch = src->devKind;
2358 		int bpp = src->drawable.bitsPerPixel / 8;
2359 		char *data = src->devPrivate.ptr;
2360 
2361 		data += (box->y1 + op->u.blt.sy) * pitch;
2362 		data += (box->x1 + op->u.blt.sx) * bpp;
2363 
2364 		assert(op->dst.bo == dst_priv->gpu_bo);
2365 		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2366 				 0xffffffff, op->u.blt.pixel);
2367 	} else {
2368 		sna_write_boxes__xor(sna, op->dst.pixmap,
2369 				     op->dst.bo, op->dst.x, op->dst.y,
2370 				     src->devPrivate.ptr,
2371 				     src->devKind,
2372 				     op->u.blt.sx, op->u.blt.sy,
2373 				     box, 1,
2374 				     0xffffffff, op->u.blt.pixel);
2375 	}
2376 }
2377 
2378 static void
blt_put_composite_boxes_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2379 blt_put_composite_boxes_with_alpha(struct sna *sna,
2380 				   const struct sna_composite_op *op,
2381 				   const BoxRec *box, int n)
2382 {
2383 	PixmapPtr src = op->u.blt.src_pixmap;
2384 	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2385 
2386 	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2387 	     op->u.blt.sx, op->u.blt.sy,
2388 	     op->dst.x, op->dst.y,
2389 	     box->x1, box->y1, box->x2, box->y2, n));
2390 
2391 	assert(src->devPrivate.ptr);
2392 	assert(src->devKind);
2393 
2394 	if (n == 1 && !dst_priv->pinned &&
2395 	    box->x2 - box->x1 == op->dst.width &&
2396 	    box->y2 - box->y1 == op->dst.height) {
2397 		int pitch = src->devKind;
2398 		int bpp = src->drawable.bitsPerPixel / 8;
2399 		char *data = src->devPrivate.ptr;
2400 
2401 		data += (box->y1 + op->u.blt.sy) * pitch;
2402 		data += (box->x1 + op->u.blt.sx) * bpp;
2403 
2404 		assert(dst_priv->gpu_bo == op->dst.bo);
2405 		sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2406 				 0xffffffff, op->u.blt.pixel);
2407 	} else {
2408 		sna_write_boxes__xor(sna, op->dst.pixmap,
2409 				     op->dst.bo, op->dst.x, op->dst.y,
2410 				     src->devPrivate.ptr,
2411 				     src->devKind,
2412 				     op->u.blt.sx, op->u.blt.sy,
2413 				     box, n,
2414 				     0xffffffff, op->u.blt.pixel);
2415 	}
2416 }
2417 
2418 static bool
prepare_blt_put(struct sna * sna,struct sna_composite_op * op,uint32_t alpha_fixup)2419 prepare_blt_put(struct sna *sna,
2420 		struct sna_composite_op *op,
2421 		uint32_t alpha_fixup)
2422 {
2423 	DBG(("%s\n", __FUNCTION__));
2424 
2425 	assert(!sna_pixmap(op->dst.pixmap)->clear);
2426 
2427 	if (op->dst.bo) {
2428 		assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
2429 		if (alpha_fixup) {
2430 			op->u.blt.pixel = alpha_fixup;
2431 			op->blt   = blt_put_composite_with_alpha;
2432 			op->box   = blt_put_composite_box_with_alpha;
2433 			op->boxes = blt_put_composite_boxes_with_alpha;
2434 		} else {
2435 			op->blt   = blt_put_composite;
2436 			op->box   = blt_put_composite_box;
2437 			op->boxes = blt_put_composite_boxes;
2438 		}
2439 
2440 		op->done = nop_done;
2441 		return true;
2442 	} else {
2443 		if (alpha_fixup) {
2444 			op->u.blt.pixel = alpha_fixup;
2445 			op->blt   = blt_put_composite_with_alpha__cpu;
2446 			op->box   = blt_put_composite_box_with_alpha__cpu;
2447 			op->boxes = blt_put_composite_boxes_with_alpha__cpu;
2448 		} else {
2449 			op->blt   = blt_put_composite__cpu;
2450 			op->box   = blt_put_composite_box__cpu;
2451 			op->boxes = blt_put_composite_boxes__cpu;
2452 		}
2453 
2454 		op->done = sig_done;
2455 		return sigtrap_get() == 0;
2456 	}
2457 }
2458 
2459 static bool
is_clear(PixmapPtr pixmap)2460 is_clear(PixmapPtr pixmap)
2461 {
2462 	struct sna_pixmap *priv = sna_pixmap(pixmap);
2463 	return priv && priv->clear;
2464 }
2465 
2466 static inline uint32_t
over(uint32_t src,uint32_t dst)2467 over(uint32_t src, uint32_t dst)
2468 {
2469 	uint32_t a = ~src >> 24;
2470 
2471 #define G_SHIFT 8
2472 #define RB_MASK 0xff00ff
2473 #define RB_ONE_HALF 0x800080
2474 #define RB_MASK_PLUS_ONE 0x10000100
2475 
2476 #define UN8_rb_MUL_UN8(x, a, t) do {				\
2477 	t  = ((x) & RB_MASK) * (a);				\
2478 	t += RB_ONE_HALF;					\
2479 	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;	\
2480 	x &= RB_MASK;						\
2481 } while (0)
2482 
2483 #define UN8_rb_ADD_UN8_rb(x, y, t) do {				\
2484 	t = ((x) + (y));					\
2485 	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);	\
2486 	x = (t & RB_MASK);					\
2487 } while (0)
2488 
2489 #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do {			\
2490 	uint32_t r1__, r2__, r3__, t__;				\
2491 	\
2492 	r1__ = (x);						\
2493 	r2__ = (y) & RB_MASK;					\
2494 	UN8_rb_MUL_UN8(r1__, (a), t__);				\
2495 	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2496 	\
2497 	r2__ = (x) >> G_SHIFT;					\
2498 	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2499 	UN8_rb_MUL_UN8(r2__, (a), t__);				\
2500 	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2501 	\
2502 	(x) = r1__ | (r2__ << G_SHIFT);				\
2503 } while (0)
2504 
2505 	UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
2506 
2507 	return dst;
2508 }
2509 
2510 static inline uint32_t
add(uint32_t src,uint32_t dst)2511 add(uint32_t src, uint32_t dst)
2512 {
2513 #define UN8x4_ADD_UN8x4(x, y) do {				\
2514 	uint32_t r1__, r2__, r3__, t__;				\
2515 	\
2516 	r1__ = (x) & RB_MASK;					\
2517 	r2__ = (y) & RB_MASK;					\
2518 	UN8_rb_ADD_UN8_rb(r1__, r2__, t__);			\
2519 	\
2520 	r2__ = ((x) >> G_SHIFT) & RB_MASK;			\
2521 	r3__ = ((y) >> G_SHIFT) & RB_MASK;			\
2522 	UN8_rb_ADD_UN8_rb(r2__, r3__, t__);			\
2523 	\
2524 	x = r1__ | (r2__ << G_SHIFT);				\
2525 } while (0)
2526 
2527 	UN8x4_ADD_UN8x4(src, dst);
2528 	return src;
2529 }
2530 
2531 bool
sna_blt_composite(struct sna * sna,uint32_t op,PicturePtr src,PicturePtr dst,int16_t x,int16_t y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)2532 sna_blt_composite(struct sna *sna,
2533 		  uint32_t op,
2534 		  PicturePtr src,
2535 		  PicturePtr dst,
2536 		  int16_t x, int16_t y,
2537 		  int16_t dst_x, int16_t dst_y,
2538 		  int16_t width, int16_t height,
2539 		  unsigned flags,
2540 		  struct sna_composite_op *tmp)
2541 {
2542 	PictFormat src_format = src->format;
2543 	PixmapPtr src_pixmap;
2544 	struct kgem_bo *bo;
2545 	int16_t tx, ty;
2546 	BoxRec dst_box, src_box;
2547 	uint32_t alpha_fixup;
2548 	uint32_t color, hint;
2549 	bool was_clear;
2550 	bool ret;
2551 
2552 #if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2553 	return false;
2554 #endif
2555 	DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
2556 	     __FUNCTION__, x, y, dst_x, dst_y, width, height));
2557 
2558 	switch (dst->pDrawable->bitsPerPixel) {
2559 	case 8:
2560 	case 16:
2561 	case 32:
2562 		break;
2563 	default:
2564 		DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
2565 		     dst->pDrawable->bitsPerPixel));
2566 		return false;
2567 	}
2568 
2569 	tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2570 	was_clear = is_clear(tmp->dst.pixmap);
2571 
2572 	if (width | height) {
2573 		dst_box.x1 = dst_x;
2574 		dst_box.x2 = bound(dst_x, width);
2575 		dst_box.y1 = dst_y;
2576 		dst_box.y2 = bound(dst_y, height);
2577 	} else
2578 		sna_render_picture_extents(dst, &dst_box);
2579 
2580 	tmp->dst.format = dst->format;
2581 	tmp->dst.width = tmp->dst.pixmap->drawable.width;
2582 	tmp->dst.height = tmp->dst.pixmap->drawable.height;
2583 	get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
2584 			    &tmp->dst.x, &tmp->dst.y);
2585 
2586 	if (op == PictOpClear) {
2587 clear:
2588 		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
2589 			sna_pixmap(tmp->dst.pixmap)->clear = true;
2590 nop:
2591 			return prepare_blt_nop(sna, tmp);
2592 		}
2593 
2594 		hint = 0;
2595 		if (can_render(sna)) {
2596 			hint |= PREFER_GPU;
2597 			if ((flags & COMPOSITE_PARTIAL) == 0) {
2598 				hint |= IGNORE_DAMAGE;
2599 				if (width  == tmp->dst.pixmap->drawable.width &&
2600 				    height == tmp->dst.pixmap->drawable.height)
2601 					hint |= REPLACES;
2602 			}
2603 		}
2604 		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2605 						  &dst_box, &tmp->damage);
2606 		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2607 		if (tmp->dst.bo) {
2608 			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2609 				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2610 				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2611 				return false;
2612 			}
2613 			if (hint & REPLACES)
2614 				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2615 			if (flags & COMPOSITE_UPLOAD)
2616 				return false;
2617 		} else {
2618 			RegionRec region;
2619 
2620 			region.extents = dst_box;
2621 			region.data = NULL;
2622 
2623 			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2624 			if (flags & COMPOSITE_PARTIAL)
2625 				hint |= MOVE_READ;
2626 			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2627 				return false;
2628 		}
2629 
2630 		return prepare_blt_clear(sna, tmp);
2631 	}
2632 
2633 	if (is_solid(src)) {
2634 		if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
2635 			sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
2636 			return prepare_blt_nop(sna, tmp);
2637 		}
2638 		if (op == PictOpOver && is_opaque_solid(src))
2639 			op = PictOpSrc;
2640 		if (op == PictOpAdd &&
2641 		    PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) &&
2642 		    is_white(src))
2643 			op = PictOpSrc;
2644 		if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
2645 			if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
2646 				op = PictOpSrc;
2647 			if (op == PictOpOver) {
2648 				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2649 				color = over(get_solid_color(src, PICT_a8r8g8b8),
2650 					     dst_color);
2651 				op = PictOpSrc;
2652 				DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
2653 				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2654 				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2655 				     color));
2656 				if (color == dst_color)
2657 					goto nop;
2658 				else
2659 					goto fill;
2660 			}
2661 			if (op == PictOpAdd) {
2662 				unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2663 				color = add(get_solid_color(src, PICT_a8r8g8b8),
2664 					    dst_color);
2665 				op = PictOpSrc;
2666 				DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
2667 				     __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2668 				     solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2669 				     color));
2670 				if (color == dst_color)
2671 					goto nop;
2672 				else
2673 					goto fill;
2674 			}
2675 		}
2676 		if (op == PictOpOutReverse && is_opaque_solid(src))
2677 			goto clear;
2678 
2679 		if (op != PictOpSrc) {
2680 			DBG(("%s: unsupported op [%d] for blitting\n",
2681 			     __FUNCTION__, op));
2682 			return false;
2683 		}
2684 
2685 		color = get_solid_color(src, tmp->dst.format);
2686 fill:
2687 		if (color == 0)
2688 			goto clear;
2689 
2690 		if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
2691 			sna_pixmap(tmp->dst.pixmap)->clear = true;
2692 			return prepare_blt_nop(sna, tmp);
2693 		}
2694 
2695 		hint = 0;
2696 		if (can_render(sna)) {
2697 			hint |= PREFER_GPU;
2698 			if ((flags & COMPOSITE_PARTIAL) == 0) {
2699 				hint |= IGNORE_DAMAGE;
2700 				if (width  == tmp->dst.pixmap->drawable.width &&
2701 				    height == tmp->dst.pixmap->drawable.height)
2702 					hint |= REPLACES;
2703 			}
2704 		}
2705 		tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2706 						  &dst_box, &tmp->damage);
2707 		assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2708 		if (tmp->dst.bo) {
2709 			if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2710 				DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2711 				     __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2712 				return false;
2713 			}
2714 			if (hint & REPLACES)
2715 				kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2716 			if (flags & COMPOSITE_UPLOAD)
2717 				return false;
2718 		} else {
2719 			RegionRec region;
2720 
2721 			region.extents = dst_box;
2722 			region.data = NULL;
2723 
2724 			hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2725 			if (flags & COMPOSITE_PARTIAL)
2726 				hint |= MOVE_READ;
2727 			if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, hint))
2728 				return false;
2729 		}
2730 
2731 		return prepare_blt_fill(sna, tmp, color);
2732 	}
2733 
2734 	if (!src->pDrawable) {
2735 		DBG(("%s: unsupported procedural source\n",
2736 		     __FUNCTION__));
2737 		return false;
2738 	}
2739 
2740 	if (src->filter == PictFilterConvolution) {
2741 		DBG(("%s: convolutions filters not handled\n",
2742 		     __FUNCTION__));
2743 		return false;
2744 	}
2745 
2746 	if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
2747 		op = PictOpSrc;
2748 
2749 	if (op != PictOpSrc) {
2750 		DBG(("%s: unsupported op [%d] for blitting\n",
2751 		     __FUNCTION__, op));
2752 		return false;
2753 	}
2754 
2755 	if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
2756 							    dst->polyMode == PolyModePrecise,
2757 							    &tx, &ty)) {
2758 		DBG(("%s: source transform is not an integer translation\n",
2759 		     __FUNCTION__));
2760 		return false;
2761 	}
2762 	DBG(("%s: converting transform to integer translation? (%d, %d)\n",
2763 	     __FUNCTION__, src->transform != NULL, tx, ty));
2764 	x += tx;
2765 	y += ty;
2766 
2767 	if ((x >= src->pDrawable->width ||
2768 	     y >= src->pDrawable->height ||
2769 	     x + width  <= 0 ||
2770 	     y + height <= 0) &&
2771 	    (!src->repeat || src->repeatType == RepeatNone)) {
2772 		DBG(("%s: source is outside of valid area, converting to clear\n",
2773 		     __FUNCTION__));
2774 		goto clear;
2775 	}
2776 
2777 	src_pixmap = get_drawable_pixmap(src->pDrawable);
2778 	if (is_clear(src_pixmap)) {
2779 		if (src->repeat ||
2780 		    (x >= 0 && y >= 0 &&
2781 		     x + width  <= src_pixmap->drawable.width &&
2782 		     y + height <= src_pixmap->drawable.height)) {
2783 			color = color_convert(sna_pixmap(src_pixmap)->clear_color,
2784 					      src->format, tmp->dst.format);
2785 			goto fill;
2786 		}
2787 	}
2788 
2789 	alpha_fixup = 0;
2790 	if (!(dst->format == src_format ||
2791 	      dst->format == alphaless(src_format) ||
2792 	      (alphaless(dst->format) == alphaless(src_format) &&
2793 	       sna_get_pixel_from_rgba(&alpha_fixup,
2794 				       0, 0, 0, 0xffff,
2795 				       dst->format)))) {
2796 		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2797 		     __FUNCTION__, (unsigned)src_format, dst->format));
2798 		return false;
2799 	}
2800 
2801 	/* XXX tiling? fixup extend none? */
2802 	if (x < 0 || y < 0 ||
2803 	    x + width  > src->pDrawable->width ||
2804 	    y + height > src->pDrawable->height) {
2805 		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
2806 		     __FUNCTION__,
2807 		     x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
2808 		if (src->repeat && src->repeatType == RepeatNormal) {
2809 			x = x % src->pDrawable->width;
2810 			y = y % src->pDrawable->height;
2811 			if (x < 0)
2812 				x += src->pDrawable->width;
2813 			if (y < 0)
2814 				y += src->pDrawable->height;
2815 			if (x + width  > src->pDrawable->width ||
2816 			    y + height > src->pDrawable->height)
2817 				return false;
2818 		} else
2819 			return false;
2820 	}
2821 
2822 	get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
2823 	x += tx + src->pDrawable->x;
2824 	y += ty + src->pDrawable->y;
2825 	if (x < 0 || y < 0 ||
2826 	    x + width  > src_pixmap->drawable.width ||
2827 	    y + height > src_pixmap->drawable.height) {
2828 		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
2829 		     __FUNCTION__,
2830 		     x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
2831 		return false;
2832 	}
2833 
2834 	tmp->u.blt.src_pixmap = src_pixmap;
2835 	tmp->u.blt.sx = x - dst_x;
2836 	tmp->u.blt.sy = y - dst_y;
2837 	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2838 	     __FUNCTION__,
2839 	     tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2840 
2841 	src_box.x1 = x;
2842 	src_box.y1 = y;
2843 	src_box.x2 = x + width;
2844 	src_box.y2 = y + height;
2845 	bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2846 	if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
2847 		DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
2848 		     __FUNCTION__,
2849 		     src_pixmap->drawable.width  < sna->render.max_3d_size,
2850 		     src_pixmap->drawable.height < sna->render.max_3d_size,
2851 		     bo->tiling, bo->pitch));
2852 
2853 		if (src_pixmap->drawable.width  <= sna->render.max_3d_size &&
2854 		    src_pixmap->drawable.height <= sna->render.max_3d_size &&
2855 		    bo->pitch <= sna->render.max_3d_pitch &&
2856 		    (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0)
2857 		{
2858 			return false;
2859 		}
2860 
2861 		bo = NULL;
2862 	}
2863 
2864 	hint = 0;
2865 	if (bo || can_render(sna)) {
2866 		hint |= PREFER_GPU;
2867 		if ((flags & COMPOSITE_PARTIAL) == 0) {
2868 			hint |= IGNORE_DAMAGE;
2869 			if (width  == tmp->dst.pixmap->drawable.width &&
2870 			    height == tmp->dst.pixmap->drawable.height)
2871 				hint |= REPLACES;
2872 		}
2873 		if (bo)
2874 			hint |= FORCE_GPU;
2875 	}
2876 	tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2877 					  &dst_box, &tmp->damage);
2878 	assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2879 
2880 	if (tmp->dst.bo && hint & REPLACES) {
2881 		struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
2882 		kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2883 	}
2884 
2885 	if (tmp->dst.pixmap == src_pixmap)
2886 		bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2887 
2888 	ret = false;
2889 	if (bo) {
2890 		if (!tmp->dst.bo) {
2891 			DBG(("%s: fallback -- unaccelerated read back\n",
2892 			     __FUNCTION__));
2893 fallback:
2894 			if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
2895 				goto put;
2896 		} else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
2897 			DBG(("%s: fallback -- cannot blit from source\n",
2898 			     __FUNCTION__));
2899 			goto fallback;
2900 		} else if (bo->snoop && tmp->dst.bo->snoop) {
2901 			DBG(("%s: fallback -- can not copy between snooped bo\n",
2902 			     __FUNCTION__));
2903 			goto put;
2904 		} else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2905 			DBG(("%s: fallback -- unaccelerated upload\n",
2906 			     __FUNCTION__));
2907 			goto fallback;
2908 		} else if ((flags & COMPOSITE_UPLOAD) == 0) {
2909 			ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
2910 			if (!ret)
2911 				goto fallback;
2912 		}
2913 	} else {
2914 		RegionRec region;
2915 
2916 put:
2917 		if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
2918 			DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
2919 			tmp->dst.bo = NULL;
2920 			tmp->damage = NULL;
2921 		}
2922 
2923 		if (tmp->dst.bo == NULL) {
2924 			hint = MOVE_INPLACE_HINT | MOVE_WRITE;
2925 			if (flags & COMPOSITE_PARTIAL)
2926 				hint |= MOVE_READ;
2927 
2928 			region.extents = dst_box;
2929 			region.data = NULL;
2930 			if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
2931 							     &region, hint))
2932 				return false;
2933 
2934 			assert(tmp->damage == NULL);
2935 		}
2936 
2937 		region.extents = src_box;
2938 		region.data = NULL;
2939 		if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
2940 						     &region, MOVE_READ))
2941 			return false;
2942 
2943 		ret = prepare_blt_put(sna, tmp, alpha_fixup);
2944 	}
2945 
2946 	return ret;
2947 }
2948 
convert_done(struct sna * sna,const struct sna_composite_op * op)2949 static void convert_done(struct sna *sna, const struct sna_composite_op *op)
2950 {
2951 	struct kgem *kgem = &sna->kgem;
2952 
2953 	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
2954 	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
2955 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
2956 		_kgem_submit(kgem);
2957 	}
2958 
2959 	kgem_bo_destroy(kgem, op->src.bo);
2960 	sna_render_composite_redirect_done(sna, op);
2961 }
2962 
gen6_convert_done(struct sna * sna,const struct sna_composite_op * op)2963 static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
2964 {
2965 	struct kgem *kgem = &sna->kgem;
2966 
2967 	if (kgem_check_batch(kgem, 3)) {
2968 		uint32_t *b = kgem->batch + kgem->nbatch;
2969 		assert(sna->kgem.mode == KGEM_BLT);
2970 		b[0] = XY_SETUP_CLIP;
2971 		b[1] = b[2] = 0;
2972 		kgem->nbatch += 3;
2973 		assert(kgem->nbatch < kgem->surface);
2974 	}
2975 
2976 	convert_done(sna, op);
2977 }
2978 
2979 bool
sna_blt_composite__convert(struct sna * sna,int x,int y,int width,int height,struct sna_composite_op * tmp)2980 sna_blt_composite__convert(struct sna *sna,
2981 			   int x, int y,
2982 			   int width, int height,
2983 			   struct sna_composite_op *tmp)
2984 {
2985 	uint32_t alpha_fixup;
2986 	int sx, sy;
2987 	uint8_t op;
2988 
2989 #if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2990 	return false;
2991 #endif
2992 
2993 	DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
2994 	     tmp->src.bo->handle, tmp->dst.bo->handle,
2995 	     tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
2996 
2997 	if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
2998 	    !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
2999 		DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
3000 		return false;
3001 	}
3002 
3003 	if (tmp->src.transform) {
3004 		DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
3005 		return false;
3006 	}
3007 
3008 	if (tmp->src.filter == PictFilterConvolution) {
3009 		DBG(("%s: convolutions filters not handled\n",
3010 		     __FUNCTION__));
3011 		return false;
3012 	}
3013 
3014 	op = tmp->op;
3015 	if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
3016 		op = PictOpSrc;
3017 	if (op != PictOpSrc) {
3018 		DBG(("%s: unsupported op [%d] for blitting\n",
3019 		     __FUNCTION__, op));
3020 		return false;
3021 	}
3022 
3023 	alpha_fixup = 0;
3024 	if (!(tmp->dst.format == tmp->src.pict_format ||
3025 	      tmp->dst.format == alphaless(tmp->src.pict_format) ||
3026 	      (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
3027 	       sna_get_pixel_from_rgba(&alpha_fixup,
3028 				       0, 0, 0, 0xffff,
3029 				       tmp->dst.format)))) {
3030 		DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
3031 		     __FUNCTION__,
3032 		     (unsigned)tmp->src.pict_format,
3033 		     (unsigned)tmp->dst.format));
3034 		return false;
3035 	}
3036 
3037 	sx = tmp->src.offset[0];
3038 	sy = tmp->src.offset[1];
3039 
3040 	x += sx;
3041 	y += sy;
3042 	if (x < 0 || y < 0 ||
3043 	    x + width  > tmp->src.width ||
3044 	    y + height > tmp->src.height) {
3045 		DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
3046 		     __FUNCTION__,
3047 		     x, y, x+width, y+width, tmp->src.width, tmp->src.height));
3048 		if (tmp->src.repeat == RepeatNormal) {
3049 			int xx = x % tmp->src.width;
3050 			int yy = y % tmp->src.height;
3051 			if (xx < 0)
3052 				xx += tmp->src.width;
3053 			if (yy < 0)
3054 				yy += tmp->src.height;
3055 			if (xx + width  > tmp->src.width ||
3056 			    yy + height > tmp->src.height)
3057 				return false;
3058 
3059 			sx += xx - x;
3060 			sy += yy - y;
3061 		} else
3062 			return false;
3063 	}
3064 
3065 	DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
3066 	     __FUNCTION__,
3067 	     tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
3068 
3069 	tmp->u.blt.src_pixmap = NULL;
3070 	tmp->u.blt.sx = sx;
3071 	tmp->u.blt.sy = sy;
3072 
3073 	kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
3074 	if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
3075 		kgem_submit(&sna->kgem);
3076 		if (!kgem_check_many_bo_fenced(&sna->kgem,
3077 					       tmp->dst.bo, tmp->src.bo, NULL)) {
3078 			DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
3079 			return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
3080 							PICT_FORMAT_BPP(tmp->src.pict_format),
3081 							alpha_fixup);
3082 		}
3083 		_kgem_set_mode(&sna->kgem, KGEM_BLT);
3084 	}
3085 	kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo);
3086 
3087 	if (alpha_fixup) {
3088 		tmp->blt   = blt_composite_copy_with_alpha;
3089 		tmp->box   = blt_composite_copy_box_with_alpha;
3090 		tmp->boxes = blt_composite_copy_boxes_with_alpha;
3091 
3092 		if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
3093 					      tmp->src.bo, tmp->dst.bo,
3094 					      PICT_FORMAT_BPP(tmp->src.pict_format),
3095 					      alpha_fixup))
3096 			return false;
3097 	} else {
3098 		tmp->blt   = blt_composite_copy;
3099 		tmp->box   = blt_composite_copy_box;
3100 		tmp->boxes = blt_composite_copy_boxes;
3101 		if (sna->kgem.gen >= 0100)
3102 			tmp->thread_boxes = blt_composite_copy_boxes__thread64;
3103 		else
3104 			tmp->thread_boxes = blt_composite_copy_boxes__thread;
3105 
3106 		if (!sna_blt_copy_init(sna, &tmp->u.blt,
3107 				       tmp->src.bo, tmp->dst.bo,
3108 				       PICT_FORMAT_BPP(tmp->src.pict_format),
3109 				       GXcopy))
3110 			return false;
3111 	}
3112 
3113 	tmp->done = convert_done;
3114 	if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
3115 		tmp->done = gen6_convert_done;
3116 
3117 	return true;
3118 }
3119 
sna_blt_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t width,int16_t height)3120 static void sna_blt_fill_op_blt(struct sna *sna,
3121 				const struct sna_fill_op *op,
3122 				int16_t x, int16_t y,
3123 				int16_t width, int16_t height)
3124 {
3125 	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3126 		const struct sna_blt_state *blt = &op->base.u.blt;
3127 
3128 		__sna_blt_fill_begin(sna, blt);
3129 
3130 		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3131 		sna->blt_state.fill_pixel = blt->pixel;
3132 		sna->blt_state.fill_alu = blt->alu;
3133 	}
3134 
3135 	sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
3136 }
3137 
sna_blt_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)3138 fastcall static void sna_blt_fill_op_box(struct sna *sna,
3139 					 const struct sna_fill_op *op,
3140 					 const BoxRec *box)
3141 {
3142 	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3143 		const struct sna_blt_state *blt = &op->base.u.blt;
3144 
3145 		__sna_blt_fill_begin(sna, blt);
3146 
3147 		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3148 		sna->blt_state.fill_pixel = blt->pixel;
3149 		sna->blt_state.fill_alu = blt->alu;
3150 	}
3151 
3152 	_sna_blt_fill_box(sna, &op->base.u.blt, box);
3153 }
3154 
sna_blt_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)3155 fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
3156 					   const struct sna_fill_op *op,
3157 					   const BoxRec *box,
3158 					   int nbox)
3159 {
3160 	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3161 		const struct sna_blt_state *blt = &op->base.u.blt;
3162 
3163 		__sna_blt_fill_begin(sna, blt);
3164 
3165 		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3166 		sna->blt_state.fill_pixel = blt->pixel;
3167 		sna->blt_state.fill_alu = blt->alu;
3168 	}
3169 
3170 	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
3171 }
3172 
pt_add(uint32_t cmd,const DDXPointRec * pt,int16_t dx,int16_t dy)3173 static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
3174 {
3175 	union {
3176 		DDXPointRec pt;
3177 		uint32_t i;
3178 	} u;
3179 
3180 	u.pt.x = pt->x + dx;
3181 	u.pt.y = pt->y + dy;
3182 
3183 	return cmd | (uint64_t)u.i<<32;
3184 }
3185 
sna_blt_fill_op_points(struct sna * sna,const struct sna_fill_op * op,int16_t dx,int16_t dy,const DDXPointRec * p,int n)3186 fastcall static void sna_blt_fill_op_points(struct sna *sna,
3187 					    const struct sna_fill_op *op,
3188 					    int16_t dx, int16_t dy,
3189 					    const DDXPointRec *p, int n)
3190 {
3191 	const struct sna_blt_state *blt = &op->base.u.blt;
3192 	struct kgem *kgem = &sna->kgem;
3193 	uint32_t cmd;
3194 
3195 	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
3196 
3197 	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3198 		__sna_blt_fill_begin(sna, blt);
3199 
3200 		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3201 		sna->blt_state.fill_pixel = blt->pixel;
3202 		sna->blt_state.fill_alu = blt->alu;
3203 	}
3204 
3205 	if (!kgem_check_batch(kgem, 2))
3206 		sna_blt_fill_begin(sna, blt);
3207 
3208 	cmd = XY_PIXEL_BLT;
3209 	if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
3210 		cmd |= BLT_DST_TILED;
3211 
3212 	do {
3213 		uint32_t *b = kgem->batch + kgem->nbatch;
3214 		int n_this_time, rem;
3215 
3216 		assert(sna->kgem.mode == KGEM_BLT);
3217 		n_this_time = n;
3218 		rem = kgem_batch_space(kgem);
3219 		if (2*n_this_time > rem)
3220 			n_this_time = rem / 2;
3221 		assert(n_this_time);
3222 		n -= n_this_time;
3223 
3224 		kgem->nbatch += 2 * n_this_time;
3225 		assert(kgem->nbatch < kgem->surface);
3226 
3227 		if ((dx|dy) == 0) {
3228 			do {
3229 				*(uint64_t *)b = pt_add(cmd, p++, 0, 0);
3230 				b += 2;
3231 			} while (--n_this_time);
3232 		} else {
3233 			do {
3234 				*(uint64_t *)b = pt_add(cmd, p++, dx, dy);
3235 				b += 2;
3236 			} while (--n_this_time);
3237 		}
3238 
3239 		if (!n)
3240 			return;
3241 
3242 		sna_blt_fill_begin(sna, blt);
3243 	} while (1);
3244 }
3245 
sna_blt_fill(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t pixel,struct sna_fill_op * fill)3246 bool sna_blt_fill(struct sna *sna, uint8_t alu,
3247 		  struct kgem_bo *bo, int bpp,
3248 		  uint32_t pixel,
3249 		  struct sna_fill_op *fill)
3250 {
3251 #if DEBUG_NO_BLT || NO_BLT_FILL
3252 	return false;
3253 #endif
3254 
3255 	DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
3256 
3257 	if (!kgem_bo_can_blt(&sna->kgem, bo)) {
3258 		DBG(("%s: rejected due to incompatible Y-tiling\n",
3259 		     __FUNCTION__));
3260 		return false;
3261 	}
3262 
3263 	if (!sna_blt_fill_init(sna, &fill->base.u.blt,
3264 			       bo, bpp, alu, pixel))
3265 		return false;
3266 
3267 	assert(sna->kgem.mode == KGEM_BLT);
3268 	fill->blt   = sna_blt_fill_op_blt;
3269 	fill->box   = sna_blt_fill_op_box;
3270 	fill->boxes = sna_blt_fill_op_boxes;
3271 	fill->points = sna_blt_fill_op_points;
3272 	fill->done  =
3273 		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
3274 	return true;
3275 }
3276 
sna_blt_copy_op_blt(struct sna * sna,const struct sna_copy_op * op,int16_t src_x,int16_t src_y,int16_t width,int16_t height,int16_t dst_x,int16_t dst_y)3277 static void sna_blt_copy_op_blt(struct sna *sna,
3278 				const struct sna_copy_op *op,
3279 				int16_t src_x, int16_t src_y,
3280 				int16_t width, int16_t height,
3281 				int16_t dst_x, int16_t dst_y)
3282 {
3283 	sna_blt_copy_one(sna, &op->base.u.blt,
3284 			 src_x, src_y,
3285 			 width, height,
3286 			 dst_x, dst_y);
3287 }
3288 
sna_blt_copy(struct sna * sna,uint8_t alu,struct kgem_bo * src,struct kgem_bo * dst,int bpp,struct sna_copy_op * op)3289 bool sna_blt_copy(struct sna *sna, uint8_t alu,
3290 		  struct kgem_bo *src,
3291 		  struct kgem_bo *dst,
3292 		  int bpp,
3293 		  struct sna_copy_op *op)
3294 {
3295 #if DEBUG_NO_BLT || NO_BLT_COPY
3296 	return false;
3297 #endif
3298 
3299 	if (!kgem_bo_can_blt(&sna->kgem, src))
3300 		return false;
3301 
3302 	if (!kgem_bo_can_blt(&sna->kgem, dst))
3303 		return false;
3304 
3305 	if (!sna_blt_copy_init(sna, &op->base.u.blt,
3306 			       src, dst,
3307 			       bpp, alu))
3308 		return false;
3309 
3310 	op->blt  = sna_blt_copy_op_blt;
3311 	if (sna->kgem.gen >= 060 && src == dst)
3312 		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3313 			    gen6_blt_copy_done;
3314 	else
3315 		op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3316 			    nop_done;
3317 	return true;
3318 }
3319 
sna_blt_fill_box(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t color,const BoxRec * box)3320 static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
3321 			     struct kgem_bo *bo, int bpp,
3322 			     uint32_t color,
3323 			     const BoxRec *box)
3324 {
3325 	struct kgem *kgem = &sna->kgem;
3326 	uint32_t br13, cmd, *b;
3327 	bool overwrites;
3328 
3329 	assert(kgem_bo_can_blt (kgem, bo));
3330 
3331 	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
3332 	     box->x1, box->y1, box->x2, box->y2));
3333 
3334 	assert(box->x1 >= 0);
3335 	assert(box->y1 >= 0);
3336 
3337 	cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
3338 	br13 = bo->pitch;
3339 	if (kgem->gen >= 040 && bo->tiling) {
3340 		cmd |= BLT_DST_TILED;
3341 		br13 >>= 2;
3342 	}
3343 	assert(br13 <= MAXSHORT);
3344 
3345 	br13 |= fill_ROP[alu] << 16;
3346 	br13 |= sna_br13_color_depth(bpp);
3347 	if (bpp == 32)
3348 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3349 
3350 	/* All too frequently one blt completely overwrites the previous */
3351 	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
3352 	if (overwrites) {
3353 		if (sna->kgem.gen >= 0100) {
3354 			if (kgem->nbatch >= 7 &&
3355 			    kgem->batch[kgem->nbatch-7] == cmd &&
3356 			    *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
3357 			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3358 				DBG(("%s: replacing last fill\n", __FUNCTION__));
3359 				kgem->batch[kgem->nbatch-6] = br13;
3360 				kgem->batch[kgem->nbatch-1] = color;
3361 				return true;
3362 			}
3363 			if (kgem->nbatch >= 10 &&
3364 			    (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3365 			    *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
3366 			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3367 				DBG(("%s: replacing last copy\n", __FUNCTION__));
3368 				kgem->batch[kgem->nbatch-10] = cmd;
3369 				kgem->batch[kgem->nbatch-8] = br13;
3370 				kgem->batch[kgem->nbatch-4] = color;
3371 				/* Keep the src bo as part of the execlist, just remove
3372 				 * its relocation entry.
3373 				 */
3374 				kgem->nreloc--;
3375 				kgem->nbatch -= 3;
3376 				return true;
3377 			}
3378 		} else {
3379 			if (kgem->nbatch >= 6 &&
3380 			    kgem->batch[kgem->nbatch-6] == cmd &&
3381 			    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
3382 			    kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3383 				DBG(("%s: replacing last fill\n", __FUNCTION__));
3384 				kgem->batch[kgem->nbatch-5] = br13;
3385 				kgem->batch[kgem->nbatch-1] = color;
3386 				return true;
3387 			}
3388 			if (kgem->nbatch >= 8 &&
3389 			    (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3390 			    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
3391 			    kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3392 				DBG(("%s: replacing last copy\n", __FUNCTION__));
3393 				kgem->batch[kgem->nbatch-8] = cmd;
3394 				kgem->batch[kgem->nbatch-7] = br13;
3395 				kgem->batch[kgem->nbatch-3] = color;
3396 				/* Keep the src bo as part of the execlist, just remove
3397 				 * its relocation entry.
3398 				 */
3399 				kgem->nreloc--;
3400 				kgem->nbatch -= 2;
3401 				return true;
3402 			}
3403 		}
3404 	}
3405 
3406 	/* If we are currently emitting SCANLINES, keep doing so */
3407 	if (sna->blt_state.fill_bo == bo->unique_id &&
3408 	    sna->blt_state.fill_pixel == color &&
3409 	    (sna->blt_state.fill_alu == alu ||
3410 	     sna->blt_state.fill_alu == ~alu)) {
3411 		DBG(("%s: matching last fill, converting to scanlines\n",
3412 		     __FUNCTION__));
3413 		return false;
3414 	}
3415 
3416 	kgem_set_mode(kgem, KGEM_BLT, bo);
3417 	if (!kgem_check_batch(kgem, 7) ||
3418 	    !kgem_check_reloc(kgem, 1) ||
3419 	    !kgem_check_bo_fenced(kgem, bo)) {
3420 		kgem_submit(kgem);
3421 		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3422 			return false;
3423 
3424 		_kgem_set_mode(kgem, KGEM_BLT);
3425 	}
3426 	kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3427 
3428 	assert(kgem_check_batch(kgem, 6));
3429 	assert(kgem_check_reloc(kgem, 1));
3430 
3431 	assert(sna->kgem.mode == KGEM_BLT);
3432 	b = kgem->batch + kgem->nbatch;
3433 	b[0] = cmd;
3434 	b[1] = br13;
3435 	*(uint64_t *)(b+2) = *(const uint64_t *)box;
3436 	if (kgem->gen >= 0100) {
3437 		*(uint64_t *)(b+4) =
3438 			kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3439 					 I915_GEM_DOMAIN_RENDER << 16 |
3440 					 I915_GEM_DOMAIN_RENDER |
3441 					 KGEM_RELOC_FENCED,
3442 					 0);
3443 		b[6] = color;
3444 		kgem->nbatch += 7;
3445 	} else {
3446 		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3447 				      I915_GEM_DOMAIN_RENDER << 16 |
3448 				      I915_GEM_DOMAIN_RENDER |
3449 				      KGEM_RELOC_FENCED,
3450 				      0);
3451 		b[5] = color;
3452 		kgem->nbatch += 6;
3453 	}
3454 	assert(kgem->nbatch < kgem->surface);
3455 
3456 	sna->blt_state.fill_bo = bo->unique_id;
3457 	sna->blt_state.fill_pixel = color;
3458 	sna->blt_state.fill_alu = ~alu;
3459 	return true;
3460 }
3461 
sna_blt_fill_boxes(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t pixel,const BoxRec * box,int nbox)3462 bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
3463 			struct kgem_bo *bo, int bpp,
3464 			uint32_t pixel,
3465 			const BoxRec *box, int nbox)
3466 {
3467 	struct kgem *kgem = &sna->kgem;
3468 	uint32_t br13, cmd;
3469 
3470 #if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
3471 	return false;
3472 #endif
3473 
3474 	DBG(("%s (%d, %08x, %d) x %d\n",
3475 	     __FUNCTION__, bpp, pixel, alu, nbox));
3476 
3477 	if (!kgem_bo_can_blt(kgem, bo)) {
3478 		DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
3479 		return false;
3480 	}
3481 
3482 	if (alu == GXclear)
3483 		pixel = 0;
3484 	else if (alu == GXcopy) {
3485 		if (pixel == 0)
3486 			alu = GXclear;
3487 		else if (pixel == -1)
3488 			alu = GXset;
3489 	}
3490 
3491 	if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
3492 		return true;
3493 
3494 	br13 = bo->pitch;
3495 	cmd = XY_SCANLINE_BLT;
3496 	if (kgem->gen >= 040 && bo->tiling) {
3497 		cmd |= 1 << 11;
3498 		br13 >>= 2;
3499 	}
3500 	assert(br13 <= MAXSHORT);
3501 
3502 	br13 |= 1<<31 | fill_ROP[alu] << 16;
3503 	br13 |= sna_br13_color_depth(bpp);
3504 
3505 	kgem_set_mode(kgem, KGEM_BLT, bo);
3506 	if (!kgem_check_batch(kgem, 14) ||
3507 	    !kgem_check_bo_fenced(kgem, bo)) {
3508 		kgem_submit(kgem);
3509 		if (!kgem_check_bo_fenced(&sna->kgem, bo))
3510 			return false;
3511 		_kgem_set_mode(kgem, KGEM_BLT);
3512 	}
3513 
3514 	if (sna->blt_state.fill_bo != bo->unique_id ||
3515 	    sna->blt_state.fill_pixel != pixel ||
3516 	    sna->blt_state.fill_alu != alu)
3517 	{
3518 		uint32_t *b;
3519 
3520 		if (!kgem_check_batch(kgem, 24) ||
3521 		    !kgem_check_reloc(kgem, 1)) {
3522 			_kgem_submit(kgem);
3523 			if (!kgem_check_bo_fenced(&sna->kgem, bo))
3524 				return false;
3525 			_kgem_set_mode(kgem, KGEM_BLT);
3526 		}
3527 
3528 		kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3529 
3530 		assert(sna->kgem.mode == KGEM_BLT);
3531 		b = kgem->batch + kgem->nbatch;
3532 		if (kgem->gen >= 0100) {
3533 			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3534 			if (bpp == 32)
3535 				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3536 			if (bo->tiling)
3537 				b[0] |= BLT_DST_TILED;
3538 			b[1] = br13;
3539 			b[2] = 0;
3540 			b[3] = 0;
3541 			*(uint64_t *)(b+4) =
3542 				kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3543 						 I915_GEM_DOMAIN_RENDER << 16 |
3544 						 I915_GEM_DOMAIN_RENDER |
3545 						 KGEM_RELOC_FENCED,
3546 						 0);
3547 			b[6] = pixel;
3548 			b[7] = pixel;
3549 			b[8] = 0;
3550 			b[9] = 0;
3551 			kgem->nbatch += 10;
3552 		} else {
3553 			b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3554 			if (bpp == 32)
3555 				b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3556 			if (bo->tiling && kgem->gen >= 040)
3557 				b[0] |= BLT_DST_TILED;
3558 			b[1] = br13;
3559 			b[2] = 0;
3560 			b[3] = 0;
3561 			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3562 					      I915_GEM_DOMAIN_RENDER << 16 |
3563 					      I915_GEM_DOMAIN_RENDER |
3564 					      KGEM_RELOC_FENCED,
3565 					      0);
3566 			b[5] = pixel;
3567 			b[6] = pixel;
3568 			b[7] = 0;
3569 			b[8] = 0;
3570 			kgem->nbatch += 9;
3571 		}
3572 		assert(kgem->nbatch < kgem->surface);
3573 
3574 		sna->blt_state.fill_bo = bo->unique_id;
3575 		sna->blt_state.fill_pixel = pixel;
3576 		sna->blt_state.fill_alu = alu;
3577 	}
3578 
3579 	do {
3580 		int nbox_this_time, rem;
3581 
3582 		nbox_this_time = nbox;
3583 		rem = kgem_batch_space(kgem);
3584 		if (3*nbox_this_time > rem)
3585 			nbox_this_time = rem / 3;
3586 		DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3587 		     __FUNCTION__, nbox_this_time, nbox, rem));
3588 		assert(nbox_this_time > 0);
3589 		nbox -= nbox_this_time;
3590 
3591 		assert(sna->kgem.mode == KGEM_BLT);
3592 		do {
3593 			uint32_t *b;
3594 
3595 			DBG(("%s: (%d, %d), (%d, %d): %08x\n",
3596 			     __FUNCTION__,
3597 			     box->x1, box->y1,
3598 			     box->x2, box->y2,
3599 			     pixel));
3600 
3601 			assert(box->x1 >= 0);
3602 			assert(box->y1 >= 0);
3603 			assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
3604 
3605 			b = kgem->batch + kgem->nbatch;
3606 			kgem->nbatch += 3;
3607 			assert(kgem->nbatch < kgem->surface);
3608 			b[0] = cmd;
3609 			*(uint64_t *)(b+1) = *(const uint64_t *)box;
3610 			box++;
3611 		} while (--nbox_this_time);
3612 
3613 		if (nbox) {
3614 			uint32_t *b;
3615 
3616 			_kgem_submit(kgem);
3617 			_kgem_set_mode(kgem, KGEM_BLT);
3618 			kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3619 
3620 			assert(sna->kgem.mode == KGEM_BLT);
3621 			b = kgem->batch + kgem->nbatch;
3622 			if (kgem->gen >= 0100) {
3623 				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3624 				if (bpp == 32)
3625 					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3626 				if (bo->tiling)
3627 					b[0] |= BLT_DST_TILED;
3628 				b[1] = br13;
3629 				b[2] = 0;
3630 				b[3] = 0;
3631 				*(uint64_t *)(b+4) =
3632 					kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3633 							 I915_GEM_DOMAIN_RENDER << 16 |
3634 							 I915_GEM_DOMAIN_RENDER |
3635 							 KGEM_RELOC_FENCED,
3636 							 0);
3637 				b[6] = pixel;
3638 				b[7] = pixel;
3639 				b[8] = 0;
3640 				b[9] = 0;
3641 				kgem->nbatch += 10;
3642 			} else {
3643 				b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3644 				if (bpp == 32)
3645 					b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3646 				if (bo->tiling && kgem->gen >= 040)
3647 					b[0] |= BLT_DST_TILED;
3648 				b[1] = br13;
3649 				b[2] = 0;
3650 				b[3] = 0;
3651 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3652 						      I915_GEM_DOMAIN_RENDER << 16 |
3653 						      I915_GEM_DOMAIN_RENDER |
3654 						      KGEM_RELOC_FENCED,
3655 						      0);
3656 				b[5] = pixel;
3657 				b[6] = pixel;
3658 				b[7] = 0;
3659 				b[8] = 0;
3660 				kgem->nbatch += 9;
3661 			}
3662 			assert(kgem->nbatch < kgem->surface);
3663 			assert(kgem_check_batch(kgem, 3));
3664 		}
3665 	} while (nbox);
3666 
3667 	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3668 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
3669 		_kgem_submit(kgem);
3670 	}
3671 
3672 	return true;
3673 }
3674 
sna_blt_copy_boxes(struct sna * sna,uint8_t alu,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,int bpp,const BoxRec * box,int nbox)3675 bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
3676 			struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3677 			struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3678 			int bpp, const BoxRec *box, int nbox)
3679 {
3680 	struct kgem *kgem = &sna->kgem;
3681 	unsigned src_pitch, br13, cmd;
3682 
3683 #if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
3684 	return false;
3685 #endif
3686 
3687 	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
3688 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
3689 	    src_bo->tiling, dst_bo->tiling,
3690 	    src_bo->pitch, dst_bo->pitch));
3691 	assert(nbox);
3692 
3693 	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
3694 		DBG(("%s: cannot blt to src? %d or dst? %d\n",
3695 		     __FUNCTION__,
3696 		     kgem_bo_can_blt(kgem, src_bo),
3697 		     kgem_bo_can_blt(kgem, dst_bo)));
3698 		return false;
3699 	}
3700 
3701 	cmd = XY_SRC_COPY_BLT_CMD;
3702 	if (bpp == 32)
3703 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3704 
3705 	src_pitch = src_bo->pitch;
3706 	if (kgem->gen >= 040 && src_bo->tiling) {
3707 		cmd |= BLT_SRC_TILED;
3708 		src_pitch >>= 2;
3709 	}
3710 	assert(src_pitch <= MAXSHORT);
3711 
3712 	br13 = dst_bo->pitch;
3713 	if (kgem->gen >= 040 && dst_bo->tiling) {
3714 		cmd |= BLT_DST_TILED;
3715 		br13 >>= 2;
3716 	}
3717 	assert(br13 <= MAXSHORT);
3718 
3719 	br13 |= copy_ROP[alu] << 16;
3720 	br13 |= sna_br13_color_depth(bpp);
3721 
3722 	/* Compare first box against a previous fill */
3723 	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
3724 	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
3725 		if (kgem->gen >= 0100) {
3726 			if (kgem->nbatch >= 7 &&
3727 			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
3728 			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3729 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3730 				DBG(("%s: deleting last fill\n", __FUNCTION__));
3731 				kgem->nbatch -= 7;
3732 				kgem->nreloc--;
3733 			}
3734 		} else {
3735 			if (kgem->nbatch >= 6 &&
3736 			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
3737 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3738 			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3739 				DBG(("%s: deleting last fill\n", __FUNCTION__));
3740 				kgem->nbatch -= 6;
3741 				kgem->nreloc--;
3742 			}
3743 		}
3744 	}
3745 
3746 	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
3747 	if (!kgem_check_batch(kgem, 10) ||
3748 	    !kgem_check_reloc(kgem, 2) ||
3749 	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3750 		kgem_submit(kgem);
3751 		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3752 			DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
3753 			return sna_tiling_blt_copy_boxes(sna, alu,
3754 							 src_bo, src_dx, src_dy,
3755 							 dst_bo, dst_dx, dst_dy,
3756 							 bpp, box, nbox);
3757 		}
3758 		_kgem_set_mode(kgem, KGEM_BLT);
3759 	}
3760 	kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3761 
3762 	if ((dst_dx | dst_dy) == 0) {
3763 		if (kgem->gen >= 0100) {
3764 			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
3765 			do {
3766 				int nbox_this_time, rem;
3767 
3768 				nbox_this_time = nbox;
3769 				rem = kgem_batch_space(kgem);
3770 				if (10*nbox_this_time > rem)
3771 					nbox_this_time = rem / 10;
3772 				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3773 					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3774 				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3775 				     __FUNCTION__, nbox_this_time, nbox, rem));
3776 				assert(nbox_this_time > 0);
3777 				nbox -= nbox_this_time;
3778 
3779 				assert(sna->kgem.mode == KGEM_BLT);
3780 				do {
3781 					uint32_t *b = kgem->batch + kgem->nbatch;
3782 
3783 					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3784 					     __FUNCTION__,
3785 					     box->x1, box->y1,
3786 					     box->x2 - box->x1, box->y2 - box->y1));
3787 
3788 					assert(box->x1 + src_dx >= 0);
3789 					assert(box->y1 + src_dy >= 0);
3790 					assert(box->x1 + src_dx <= INT16_MAX);
3791 					assert(box->y1 + src_dy <= INT16_MAX);
3792 
3793 					assert(box->x1 >= 0);
3794 					assert(box->y1 >= 0);
3795 
3796 					*(uint64_t *)&b[0] = hdr;
3797 					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3798 					*(uint64_t *)(b+4) =
3799 						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3800 								 I915_GEM_DOMAIN_RENDER << 16 |
3801 								 I915_GEM_DOMAIN_RENDER |
3802 								 KGEM_RELOC_FENCED,
3803 								 0);
3804 					b[6] = add2(b[2], src_dx, src_dy);
3805 					b[7] = src_pitch;
3806 					*(uint64_t *)(b+8) =
3807 						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3808 								 I915_GEM_DOMAIN_RENDER << 16 |
3809 								 KGEM_RELOC_FENCED,
3810 								 0);
3811 					kgem->nbatch += 10;
3812 					assert(kgem->nbatch < kgem->surface);
3813 					box++;
3814 				} while (--nbox_this_time);
3815 
3816 				if (!nbox)
3817 					break;
3818 
3819 				_kgem_submit(kgem);
3820 				_kgem_set_mode(kgem, KGEM_BLT);
3821 				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3822 			} while (1);
3823 		} else {
3824 			uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
3825 			do {
3826 				int nbox_this_time, rem;
3827 
3828 				nbox_this_time = nbox;
3829 				rem = kgem_batch_space(kgem);
3830 				if (8*nbox_this_time > rem)
3831 					nbox_this_time = rem / 8;
3832 				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3833 					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3834 				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3835 				     __FUNCTION__, nbox_this_time, nbox, rem));
3836 				assert(nbox_this_time > 0);
3837 				nbox -= nbox_this_time;
3838 
3839 				assert(sna->kgem.mode == KGEM_BLT);
3840 				do {
3841 					uint32_t *b = kgem->batch + kgem->nbatch;
3842 
3843 					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3844 					     __FUNCTION__,
3845 					     box->x1, box->y1,
3846 					     box->x2 - box->x1, box->y2 - box->y1));
3847 
3848 					assert(box->x1 + src_dx >= 0);
3849 					assert(box->y1 + src_dy >= 0);
3850 					assert(box->x1 + src_dx <= INT16_MAX);
3851 					assert(box->y1 + src_dy <= INT16_MAX);
3852 
3853 					assert(box->x1 >= 0);
3854 					assert(box->y1 >= 0);
3855 
3856 					*(uint64_t *)&b[0] = hdr;
3857 					*(uint64_t *)&b[2] = *(const uint64_t *)box;
3858 					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3859 							      I915_GEM_DOMAIN_RENDER << 16 |
3860 							      I915_GEM_DOMAIN_RENDER |
3861 							      KGEM_RELOC_FENCED,
3862 							      0);
3863 					b[5] = add2(b[2], src_dx, src_dy);
3864 					b[6] = src_pitch;
3865 					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3866 							      I915_GEM_DOMAIN_RENDER << 16 |
3867 							      KGEM_RELOC_FENCED,
3868 							      0);
3869 					kgem->nbatch += 8;
3870 					assert(kgem->nbatch < kgem->surface);
3871 					box++;
3872 				} while (--nbox_this_time);
3873 
3874 				if (!nbox)
3875 					break;
3876 
3877 				_kgem_submit(kgem);
3878 				_kgem_set_mode(kgem, KGEM_BLT);
3879 				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3880 			} while (1);
3881 		}
3882 	} else {
3883 		if (kgem->gen >= 0100) {
3884 			cmd |= 8;
3885 			do {
3886 				int nbox_this_time, rem;
3887 
3888 				nbox_this_time = nbox;
3889 				rem = kgem_batch_space(kgem);
3890 				if (10*nbox_this_time > rem)
3891 					nbox_this_time = rem / 10;
3892 				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3893 					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3894 				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3895 				     __FUNCTION__, nbox_this_time, nbox, rem));
3896 				assert(nbox_this_time > 0);
3897 				nbox -= nbox_this_time;
3898 
3899 				assert(sna->kgem.mode == KGEM_BLT);
3900 				do {
3901 					uint32_t *b = kgem->batch + kgem->nbatch;
3902 
3903 					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3904 					     __FUNCTION__,
3905 					     box->x1, box->y1,
3906 					     box->x2 - box->x1, box->y2 - box->y1));
3907 
3908 					assert(box->x1 + src_dx >= 0);
3909 					assert(box->y1 + src_dy >= 0);
3910 
3911 					assert(box->x1 + dst_dx >= 0);
3912 					assert(box->y1 + dst_dy >= 0);
3913 
3914 					b[0] = cmd;
3915 					b[1] = br13;
3916 					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3917 					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3918 					*(uint64_t *)(b+4) =
3919 						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3920 								 I915_GEM_DOMAIN_RENDER << 16 |
3921 								 I915_GEM_DOMAIN_RENDER |
3922 								 KGEM_RELOC_FENCED,
3923 								 0);
3924 					b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3925 					b[7] = src_pitch;
3926 					*(uint64_t *)(b+8) =
3927 						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3928 								 I915_GEM_DOMAIN_RENDER << 16 |
3929 								 KGEM_RELOC_FENCED,
3930 								 0);
3931 					kgem->nbatch += 10;
3932 					assert(kgem->nbatch < kgem->surface);
3933 					box++;
3934 				} while (--nbox_this_time);
3935 
3936 				if (!nbox)
3937 					break;
3938 
3939 				_kgem_submit(kgem);
3940 				_kgem_set_mode(kgem, KGEM_BLT);
3941 				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3942 			} while (1);
3943 		} else {
3944 			cmd |= 6;
3945 			do {
3946 				int nbox_this_time, rem;
3947 
3948 				nbox_this_time = nbox;
3949 				rem = kgem_batch_space(kgem);
3950 				if (8*nbox_this_time > rem)
3951 					nbox_this_time = rem / 8;
3952 				if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3953 					nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3954 				DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3955 				     __FUNCTION__, nbox_this_time, nbox, rem));
3956 				assert(nbox_this_time > 0);
3957 				nbox -= nbox_this_time;
3958 
3959 				assert(sna->kgem.mode == KGEM_BLT);
3960 				do {
3961 					uint32_t *b = kgem->batch + kgem->nbatch;
3962 
3963 					DBG(("  %s: box=(%d, %d)x(%d, %d)\n",
3964 					     __FUNCTION__,
3965 					     box->x1, box->y1,
3966 					     box->x2 - box->x1, box->y2 - box->y1));
3967 
3968 					assert(box->x1 + src_dx >= 0);
3969 					assert(box->y1 + src_dy >= 0);
3970 
3971 					assert(box->x1 + dst_dx >= 0);
3972 					assert(box->y1 + dst_dy >= 0);
3973 
3974 					b[0] = cmd;
3975 					b[1] = br13;
3976 					b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3977 					b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3978 					b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3979 							      I915_GEM_DOMAIN_RENDER << 16 |
3980 							      I915_GEM_DOMAIN_RENDER |
3981 							      KGEM_RELOC_FENCED,
3982 							      0);
3983 					b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3984 					b[6] = src_pitch;
3985 					b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3986 							      I915_GEM_DOMAIN_RENDER << 16 |
3987 							      KGEM_RELOC_FENCED,
3988 							      0);
3989 					kgem->nbatch += 8;
3990 					assert(kgem->nbatch < kgem->surface);
3991 					box++;
3992 				} while (--nbox_this_time);
3993 
3994 				if (!nbox)
3995 					break;
3996 
3997 				_kgem_submit(kgem);
3998 				_kgem_set_mode(kgem, KGEM_BLT);
3999 				kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4000 			} while (1);
4001 		}
4002 	}
4003 
4004 	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4005 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4006 		_kgem_submit(kgem);
4007 	} else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
4008 		uint32_t *b = kgem->batch + kgem->nbatch;
4009 		assert(sna->kgem.mode == KGEM_BLT);
4010 		b[0] = XY_SETUP_CLIP;
4011 		b[1] = b[2] = 0;
4012 		kgem->nbatch += 3;
4013 		assert(kgem->nbatch < kgem->surface);
4014 	}
4015 
4016 	sna->blt_state.fill_bo = 0;
4017 	return true;
4018 }
4019 
sna_blt_copy_boxes__with_alpha(struct sna * sna,uint8_t alu,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,int bpp,int alpha_fixup,const BoxRec * box,int nbox)4020 bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
4021 				    struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4022 				    struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4023 				    int bpp, int alpha_fixup,
4024 				    const BoxRec *box, int nbox)
4025 {
4026 	struct kgem *kgem = &sna->kgem;
4027 	unsigned src_pitch, br13, cmd;
4028 
4029 #if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
4030 	return false;
4031 #endif
4032 
4033 	DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
4034 	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
4035 	    src_bo->tiling, dst_bo->tiling,
4036 	    src_bo->pitch, dst_bo->pitch));
4037 
4038 	if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
4039 		DBG(("%s: cannot blt to src? %d or dst? %d\n",
4040 		     __FUNCTION__,
4041 		     kgem_bo_can_blt(kgem, src_bo),
4042 		     kgem_bo_can_blt(kgem, dst_bo)));
4043 		return false;
4044 	}
4045 
4046 	cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
4047 	src_pitch = src_bo->pitch;
4048 	if (kgem->gen >= 040 && src_bo->tiling) {
4049 		cmd |= BLT_SRC_TILED;
4050 		src_pitch >>= 2;
4051 	}
4052 	assert(src_pitch <= MAXSHORT);
4053 
4054 	br13 = dst_bo->pitch;
4055 	if (kgem->gen >= 040 && dst_bo->tiling) {
4056 		cmd |= BLT_DST_TILED;
4057 		br13 >>= 2;
4058 	}
4059 	assert(br13 <= MAXSHORT);
4060 
4061 	br13 |= copy_ROP[alu] << 16;
4062 	br13 |= sna_br13_color_depth(bpp);
4063 
4064 	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
4065 	if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
4066 		DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
4067 		return false;
4068 	}
4069 
4070 	/* Compare first box against a previous fill */
4071 	if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
4072 	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
4073 		if (kgem->gen >= 0100) {
4074 			if (kgem->nbatch >= 7 &&
4075 			    kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
4076 			    kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4077 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4078 				DBG(("%s: deleting last fill\n", __FUNCTION__));
4079 				kgem->nbatch -= 7;
4080 				kgem->nreloc--;
4081 			}
4082 		} else {
4083 			if (kgem->nbatch >= 6 &&
4084 			    kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
4085 			    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4086 			    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4087 				DBG(("%s: deleting last fill\n", __FUNCTION__));
4088 				kgem->nbatch -= 6;
4089 				kgem->nreloc--;
4090 			}
4091 		}
4092 	}
4093 
4094 	while (nbox--) {
4095 		uint32_t *b;
4096 
4097 		if (!kgem_check_batch(kgem, 14) ||
4098 		    !kgem_check_reloc(kgem, 2)) {
4099 			_kgem_submit(kgem);
4100 			_kgem_set_mode(kgem, KGEM_BLT);
4101 			kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4102 		}
4103 
4104 		assert(sna->kgem.mode == KGEM_BLT);
4105 		b = kgem->batch + kgem->nbatch;
4106 		b[0] = cmd;
4107 		b[1] = br13;
4108 		b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
4109 		b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
4110 		if (sna->kgem.gen >= 0100) {
4111 			*(uint64_t *)(b+4) =
4112 				kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
4113 						 I915_GEM_DOMAIN_RENDER << 16 |
4114 						 I915_GEM_DOMAIN_RENDER |
4115 						 KGEM_RELOC_FENCED,
4116 						 0);
4117 			b[6] = src_pitch;
4118 			b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4119 			*(uint64_t *)(b+8) =
4120 				kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
4121 						 I915_GEM_DOMAIN_RENDER << 16 |
4122 						 KGEM_RELOC_FENCED,
4123 						 0);
4124 			b[10] = alpha_fixup;
4125 			b[11] = alpha_fixup;
4126 			b[12] = 0;
4127 			b[13] = 0;
4128 			kgem->nbatch += 14;
4129 		} else {
4130 			b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
4131 					      I915_GEM_DOMAIN_RENDER << 16 |
4132 					      I915_GEM_DOMAIN_RENDER |
4133 					      KGEM_RELOC_FENCED,
4134 					      0);
4135 			b[5] = src_pitch;
4136 			b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4137 			b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
4138 					      I915_GEM_DOMAIN_RENDER << 16 |
4139 					      KGEM_RELOC_FENCED,
4140 					      0);
4141 			b[8] = alpha_fixup;
4142 			b[9] = alpha_fixup;
4143 			b[10] = 0;
4144 			b[11] = 0;
4145 			kgem->nbatch += 12;
4146 		}
4147 		assert(kgem->nbatch < kgem->surface);
4148 		box++;
4149 	}
4150 
4151 	if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4152 		DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4153 		_kgem_submit(kgem);
4154 	}
4155 
4156 	sna->blt_state.fill_bo = 0;
4157 	return true;
4158 }
4159 
box_extents(const BoxRec * box,int n,BoxRec * extents)4160 static void box_extents(const BoxRec *box, int n, BoxRec *extents)
4161 {
4162 	*extents = *box;
4163 	while (--n) {
4164 		box++;
4165 		if (box->x1 < extents->x1)
4166 			extents->x1 = box->x1;
4167 		if (box->y1 < extents->y1)
4168 			extents->y1 = box->y1;
4169 
4170 		if (box->x2 > extents->x2)
4171 			extents->x2 = box->x2;
4172 		if (box->y2 > extents->y2)
4173 			extents->y2 = box->y2;
4174 	}
4175 }
4176 
sna_blt_copy_boxes_fallback(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int nbox)4177 bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
4178 				 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4179 				 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4180 				 const BoxRec *box, int nbox)
4181 {
4182 	struct kgem_bo *free_bo = NULL;
4183 	bool ret;
4184 
4185 	DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
4186 
4187 	if (!sna_blt_compare_depth(src, dst)) {
4188 		DBG(("%s: mismatching depths %d -> %d\n",
4189 		     __FUNCTION__, src->depth, dst->depth));
4190 		return false;
4191 	}
4192 
4193 	if (src_bo == dst_bo) {
4194 		DBG(("%s: dst == src\n", __FUNCTION__));
4195 
4196 		if (src_bo->tiling == I915_TILING_Y &&
4197 		    !sna->kgem.can_blt_y &&
4198 		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4199 			struct kgem_bo *bo;
4200 
4201 			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
4202 
4203 			if (src->type != DRAWABLE_PIXMAP)
4204 				return false;
4205 
4206 			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4207 			bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4208 			if (bo == NULL) {
4209 				BoxRec extents;
4210 
4211 				DBG(("%s: y-tiling conversion failed\n",
4212 				     __FUNCTION__));
4213 
4214 				box_extents(box, nbox, &extents);
4215 				free_bo = kgem_create_2d(&sna->kgem,
4216 							 extents.x2 - extents.x1,
4217 							 extents.y2 - extents.y1,
4218 							 src->bitsPerPixel,
4219 							 I915_TILING_X, 0);
4220 				if (free_bo == NULL) {
4221 					DBG(("%s: fallback -- temp allocation failed\n",
4222 					     __FUNCTION__));
4223 					return false;
4224 				}
4225 
4226 				if (!sna_blt_copy_boxes(sna, GXcopy,
4227 							src_bo, src_dx, src_dy,
4228 							free_bo, -extents.x1, -extents.y1,
4229 							src->bitsPerPixel,
4230 							box, nbox)) {
4231 					DBG(("%s: fallback -- temp copy failed\n",
4232 					     __FUNCTION__));
4233 					kgem_bo_destroy(&sna->kgem, free_bo);
4234 					return false;
4235 				}
4236 
4237 				src_dx = -extents.x1;
4238 				src_dy = -extents.y1;
4239 				src_bo = free_bo;
4240 			} else
4241 				dst_bo = src_bo = bo;
4242 		}
4243 	} else {
4244 		if (src_bo->tiling == I915_TILING_Y &&
4245 		    !sna->kgem.can_blt_y &&
4246 		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4247 			DBG(("%s: src is y-tiled\n", __FUNCTION__));
4248 			if (src->type != DRAWABLE_PIXMAP)
4249 				return false;
4250 			assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4251 			src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4252 			if (src_bo == NULL) {
4253 				DBG(("%s: fallback -- src y-tiling conversion failed\n",
4254 				     __FUNCTION__));
4255 				return false;
4256 			}
4257 		}
4258 
4259 		if (dst_bo->tiling == I915_TILING_Y &&
4260 		    !sna->kgem.can_blt_y &&
4261 		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
4262 			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
4263 			if (dst->type != DRAWABLE_PIXMAP)
4264 				return false;
4265 			assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
4266 			dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
4267 			if (dst_bo == NULL) {
4268 				DBG(("%s: fallback -- dst y-tiling conversion failed\n",
4269 				     __FUNCTION__));
4270 				return false;
4271 			}
4272 		}
4273 	}
4274 
4275 	ret =  sna_blt_copy_boxes(sna, alu,
4276 				  src_bo, src_dx, src_dy,
4277 				  dst_bo, dst_dx, dst_dy,
4278 				  dst->bitsPerPixel,
4279 				  box, nbox);
4280 
4281 	if (free_bo)
4282 		kgem_bo_destroy(&sna->kgem, free_bo);
4283 
4284 	return ret;
4285 }
4286