1 /*
2  * Copyright (c) 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "sna.h"
33 #include "sna_render.h"
34 #include "sna_reg.h"
35 
36 #include <sys/mman.h>
37 
38 #define PITCH(x, y) ALIGN((x)*(y), 4)
39 
40 #define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */
41 
42 /* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
43 
upload_too_large(struct sna * sna,int width,int height)44 static inline bool upload_too_large(struct sna *sna, int width, int height)
45 {
46 	return width * height * 4 > sna->kgem.max_upload_tile_size;
47 }
48 
must_tile(struct sna * sna,int width,int height)49 static inline bool must_tile(struct sna *sna, int width, int height)
50 {
51 	return (width  > sna->render.max_3d_size ||
52 		height > sna->render.max_3d_size ||
53 		upload_too_large(sna, width, height));
54 }
55 
download_inplace__cpu(struct kgem * kgem,PixmapPtr p,struct kgem_bo * bo,const BoxRec * box,int nbox)56 static bool download_inplace__cpu(struct kgem *kgem,
57 				  PixmapPtr p, struct kgem_bo *bo,
58 				  const BoxRec *box, int nbox)
59 {
60 	BoxRec extents;
61 
62 	switch (bo->tiling) {
63 	case I915_TILING_X:
64 		if (!kgem->memcpy_from_tiled_x)
65 			return false;
66 	case I915_TILING_NONE:
67 		break;
68 	default:
69 		return false;
70 	}
71 
72 	if (!kgem_bo_can_map__cpu(kgem, bo, false))
73 		return false;
74 
75 	if (kgem->has_llc)
76 		return true;
77 
78 	extents = *box;
79 	while (--nbox) {
80 		++box;
81 		if (box->x1 < extents.x1)
82 			extents.x1 = box->x1;
83 		if (box->x2 > extents.x2)
84 			extents.x2 = box->x2;
85 		extents.y2 = box->y2;
86 	}
87 
88 	if (extents.x2 - extents.x1 == p->drawable.width &&
89 	    extents.y2 - extents.y1 == p->drawable.height)
90 		return true;
91 
92 	return __kgem_bo_size(bo) <= PAGE_SIZE;
93 }
94 
95 static bool
read_boxes_inplace__cpu(struct kgem * kgem,PixmapPtr pixmap,struct kgem_bo * bo,const BoxRec * box,int n)96 read_boxes_inplace__cpu(struct kgem *kgem,
97 			PixmapPtr pixmap, struct kgem_bo *bo,
98 			const BoxRec *box, int n)
99 {
100 	int bpp = pixmap->drawable.bitsPerPixel;
101 	void *src, *dst = pixmap->devPrivate.ptr;
102 	int src_pitch = bo->pitch;
103 	int dst_pitch = pixmap->devKind;
104 
105 	if (!download_inplace__cpu(kgem, dst, bo, box, n))
106 		return false;
107 
108 	if (bo->tiling == I915_TILING_Y)
109 		return false;
110 
111 	assert(kgem_bo_can_map__cpu(kgem, bo, false));
112 
113 	src = kgem_bo_map__cpu(kgem, bo);
114 	if (src == NULL)
115 		return false;
116 
117 	kgem_bo_sync__cpu_full(kgem, bo, 0);
118 
119 	if (sigtrap_get())
120 		return false;
121 
122 	DBG(("%s x %d\n", __FUNCTION__, n));
123 
124 	if (bo->tiling == I915_TILING_X) {
125 		do {
126 			memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
127 					    box->x1, box->y1,
128 					    box->x1, box->y1,
129 					    box->x2 - box->x1, box->y2 - box->y1);
130 			box++;
131 		} while (--n);
132 	} else {
133 		do {
134 			memcpy_blt(src, dst, bpp, src_pitch, dst_pitch,
135 				   box->x1, box->y1,
136 				   box->x1, box->y1,
137 				   box->x2 - box->x1, box->y2 - box->y1);
138 			box++;
139 		} while (--n);
140 	}
141 
142 	sigtrap_put();
143 	return true;
144 }
145 
read_boxes_inplace(struct kgem * kgem,PixmapPtr pixmap,struct kgem_bo * bo,const BoxRec * box,int n)146 static void read_boxes_inplace(struct kgem *kgem,
147 			       PixmapPtr pixmap, struct kgem_bo *bo,
148 			       const BoxRec *box, int n)
149 {
150 	int bpp = pixmap->drawable.bitsPerPixel;
151 	void *src, *dst = pixmap->devPrivate.ptr;
152 	int src_pitch = bo->pitch;
153 	int dst_pitch = pixmap->devKind;
154 
155 	if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n))
156 		return;
157 
158 	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
159 
160 	if (!kgem_bo_can_map(kgem, bo))
161 		return;
162 
163 	kgem_bo_submit(kgem, bo);
164 
165 	src = kgem_bo_map(kgem, bo);
166 	if (src == NULL)
167 		return;
168 
169 	if (sigtrap_get())
170 		return;
171 
172 	assert(src != dst);
173 	do {
174 		DBG(("%s: copying box (%d, %d), (%d, %d)\n",
175 		     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
176 
177 		assert(box->x2 > box->x1);
178 		assert(box->y2 > box->y1);
179 
180 		assert(box->x1 >= 0);
181 		assert(box->y1 >= 0);
182 		assert(box->x2 <= pixmap->drawable.width);
183 		assert(box->y2 <= pixmap->drawable.height);
184 
185 		assert(box->x1 >= 0);
186 		assert(box->y1 >= 0);
187 		assert(box->x2 <= pixmap->drawable.width);
188 		assert(box->y2 <= pixmap->drawable.height);
189 
190 		memcpy_blt(src, dst, bpp,
191 			   src_pitch, dst_pitch,
192 			   box->x1, box->y1,
193 			   box->x1, box->y1,
194 			   box->x2 - box->x1, box->y2 - box->y1);
195 		box++;
196 	} while (--n);
197 
198 	sigtrap_put();
199 }
200 
download_inplace(struct kgem * kgem,PixmapPtr p,struct kgem_bo * bo,const BoxRec * box,int nbox)201 static bool download_inplace(struct kgem *kgem,
202 			     PixmapPtr p, struct kgem_bo *bo,
203 			     const BoxRec *box, int nbox)
204 {
205 	bool cpu;
206 
207 	if (unlikely(kgem->wedged))
208 		return true;
209 
210 	cpu = download_inplace__cpu(kgem, p, bo, box, nbox);
211 	if (!cpu && !kgem_bo_can_map(kgem, bo))
212 		return false;
213 
214 	if (FORCE_INPLACE)
215 		return FORCE_INPLACE > 0;
216 
217 	if (cpu)
218 		return true;
219 
220 	if (kgem->can_blt_cpu && kgem->max_cpu_size)
221 		return false;
222 
223 	return !__kgem_bo_is_busy(kgem, bo);
224 }
225 
sna_read_boxes(struct sna * sna,PixmapPtr dst,struct kgem_bo * src_bo,const BoxRec * box,int nbox)226 void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
227 		    const BoxRec *box, int nbox)
228 {
229 	struct kgem *kgem = &sna->kgem;
230 	struct kgem_bo *dst_bo;
231 	BoxRec extents;
232 	const BoxRec *tmp_box;
233 	int tmp_nbox;
234 	void *ptr;
235 	int src_pitch, cpp, offset;
236 	int n, cmd, br13;
237 	bool can_blt;
238 
239 	DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n",
240 	     __FUNCTION__, nbox, src_bo->handle,
241 	     dst->drawable.width, dst->drawable.height));
242 
243 #ifndef NDEBUG
244 	for (n = 0; n < nbox; n++) {
245 		if (box[n].x1 < 0 || box[n].y1 < 0 ||
246 		    box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch ||
247 		    box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo))
248 		{
249 			FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n,
250 				   box[n].x1, box[n].y1,
251 				   box[n].x2, box[n].y2,
252 				   src_bo->pitch, kgem_bo_size(src_bo));
253 		}
254 	}
255 #endif
256 
257 	/* XXX The gpu is faster to perform detiling in bulk, but takes
258 	 * longer to setup and retrieve the results, with an additional
259 	 * copy. The long term solution is to use snoopable bo and avoid
260 	 * this path.
261 	 */
262 
263 	if (download_inplace(kgem, dst, src_bo, box, nbox)) {
264 fallback:
265 		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
266 		return;
267 	}
268 
269 	can_blt = kgem_bo_can_blt(kgem, src_bo) &&
270 		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
271 	extents = box[0];
272 	for (n = 1; n < nbox; n++) {
273 		if (box[n].x1 < extents.x1)
274 			extents.x1 = box[n].x1;
275 		if (box[n].x2 > extents.x2)
276 			extents.x2 = box[n].x2;
277 
278 		if (can_blt)
279 			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
280 
281 		if (box[n].y1 < extents.y1)
282 			extents.y1 = box[n].y1;
283 		if (box[n].y2 > extents.y2)
284 			extents.y2 = box[n].y2;
285 	}
286 	if (!can_blt && sna->render.max_3d_size == 0)
287 		goto fallback;
288 
289 	if (kgem_bo_can_map(kgem, src_bo)) {
290 		/* Is it worth detiling? */
291 		if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
292 			goto fallback;
293 	}
294 
295 	/* Try to avoid switching rings... */
296 	if (!can_blt || kgem->ring == KGEM_RENDER ||
297 	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
298 		DrawableRec tmp;
299 
300 		tmp.width  = extents.x2 - extents.x1;
301 		tmp.height = extents.y2 - extents.y1;
302 		tmp.depth  = dst->drawable.depth;
303 		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
304 
305 		assert(tmp.width);
306 		assert(tmp.height);
307 
308 		if (must_tile(sna, tmp.width, tmp.height)) {
309 			BoxRec tile, stack[64], *clipped, *c;
310 			int step;
311 
312 			if (n > ARRAY_SIZE(stack)) {
313 				clipped = malloc(sizeof(BoxRec) * n);
314 				if (clipped == NULL)
315 					goto fallback;
316 			} else
317 				clipped = stack;
318 
319 			step = MIN(sna->render.max_3d_size,
320 				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
321 			while (step * step * 4 > sna->kgem.max_upload_tile_size)
322 				step /= 2;
323 
324 			DBG(("%s: tiling download, using %dx%d tiles\n",
325 			     __FUNCTION__, step, step));
326 			assert(step);
327 
328 			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
329 				int y2 = tile.y1 + step;
330 				if (y2 > extents.y2)
331 					y2 = extents.y2;
332 				tile.y2 = y2;
333 
334 				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
335 					int x2 = tile.x1 + step;
336 					if (x2 > extents.x2)
337 						x2 = extents.x2;
338 					tile.x2 = x2;
339 
340 					tmp.width  = tile.x2 - tile.x1;
341 					tmp.height = tile.y2 - tile.y1;
342 
343 					c = clipped;
344 					for (n = 0; n < nbox; n++) {
345 						*c = box[n];
346 						if (!box_intersect(c, &tile))
347 							continue;
348 
349 						DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n",
350 						     __FUNCTION__,
351 						     c->x1, c->y1,
352 						     c->x2, c->y2,
353 						     c->x1 - tile.x1,
354 						     c->y1 - tile.y1));
355 						c++;
356 					}
357 					if (c == clipped)
358 						continue;
359 
360 					dst_bo = kgem_create_buffer_2d(kgem,
361 								       tmp.width,
362 								       tmp.height,
363 								       tmp.bitsPerPixel,
364 								       KGEM_BUFFER_LAST,
365 								       &ptr);
366 					if (!dst_bo) {
367 						if (clipped != stack)
368 							free(clipped);
369 						goto fallback;
370 					}
371 
372 					if (!sna->render.copy_boxes(sna, GXcopy,
373 								    &dst->drawable, src_bo, 0, 0,
374 								    &tmp, dst_bo, -tile.x1, -tile.y1,
375 								    clipped, c-clipped, COPY_LAST)) {
376 						kgem_bo_destroy(&sna->kgem, dst_bo);
377 						if (clipped != stack)
378 							free(clipped);
379 						goto fallback;
380 					}
381 
382 					kgem_bo_submit(&sna->kgem, dst_bo);
383 					kgem_buffer_read_sync(kgem, dst_bo);
384 
385 					if (sigtrap_get() == 0) {
386 						while (c-- != clipped) {
387 							memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
388 								   dst_bo->pitch, dst->devKind,
389 								   c->x1 - tile.x1,
390 								   c->y1 - tile.y1,
391 								   c->x1, c->y1,
392 								   c->x2 - c->x1,
393 								   c->y2 - c->y1);
394 						}
395 						sigtrap_put();
396 					}
397 
398 					kgem_bo_destroy(&sna->kgem, dst_bo);
399 				}
400 			}
401 
402 			if (clipped != stack)
403 				free(clipped);
404 		} else {
405 			dst_bo = kgem_create_buffer_2d(kgem,
406 						       tmp.width,
407 						       tmp.height,
408 						       tmp.bitsPerPixel,
409 						       KGEM_BUFFER_LAST,
410 						       &ptr);
411 			if (!dst_bo)
412 				goto fallback;
413 
414 			if (!sna->render.copy_boxes(sna, GXcopy,
415 						    &dst->drawable, src_bo, 0, 0,
416 						    &tmp, dst_bo, -extents.x1, -extents.y1,
417 						    box, nbox, COPY_LAST)) {
418 				kgem_bo_destroy(&sna->kgem, dst_bo);
419 				goto fallback;
420 			}
421 
422 			kgem_bo_submit(&sna->kgem, dst_bo);
423 			kgem_buffer_read_sync(kgem, dst_bo);
424 
425 			if (sigtrap_get() == 0) {
426 				for (n = 0; n < nbox; n++) {
427 					memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
428 						   dst_bo->pitch, dst->devKind,
429 						   box[n].x1 - extents.x1,
430 						   box[n].y1 - extents.y1,
431 						   box[n].x1, box[n].y1,
432 						   box[n].x2 - box[n].x1,
433 						   box[n].y2 - box[n].y1);
434 				}
435 				sigtrap_put();
436 			}
437 
438 			kgem_bo_destroy(&sna->kgem, dst_bo);
439 		}
440 		return;
441 	}
442 
443 	/* count the total number of bytes to be read and allocate a bo */
444 	cpp = dst->drawable.bitsPerPixel / 8;
445 	offset = 0;
446 	for (n = 0; n < nbox; n++) {
447 		int height = box[n].y2 - box[n].y1;
448 		int width = box[n].x2 - box[n].x1;
449 		offset += PITCH(width, cpp) * height;
450 	}
451 
452 	DBG(("    read buffer size=%d\n", offset));
453 
454 	dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
455 	if (!dst_bo) {
456 		read_boxes_inplace(kgem, dst, src_bo, box, nbox);
457 		return;
458 	}
459 
460 	cmd = XY_SRC_COPY_BLT_CMD;
461 	src_pitch = src_bo->pitch;
462 	if (kgem->gen >= 040 && src_bo->tiling) {
463 		cmd |= BLT_SRC_TILED;
464 		src_pitch >>= 2;
465 	}
466 
467 	br13 = 0xcc << 16;
468 	br13 |= sna_br13_color_depth(cpp * 8);
469 	if (cpp == 4)
470 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
471 
472 	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
473 	if (!kgem_check_batch(kgem, 10) ||
474 	    !kgem_check_reloc_and_exec(kgem, 2) ||
475 	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
476 		kgem_submit(kgem);
477 		if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
478 			goto fallback;
479 		_kgem_set_mode(kgem, KGEM_BLT);
480 	}
481 	kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
482 
483 	tmp_nbox = nbox;
484 	tmp_box = box;
485 	offset = 0;
486 	if (sna->kgem.gen >= 0100) {
487 		cmd |= 8;
488 		do {
489 			int nbox_this_time, rem;
490 
491 			nbox_this_time = tmp_nbox;
492 			rem = kgem_batch_space(kgem);
493 			if (10*nbox_this_time > rem)
494 				nbox_this_time = rem / 10;
495 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
496 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
497 			assert(nbox_this_time);
498 			tmp_nbox -= nbox_this_time;
499 
500 			assert(kgem->mode == KGEM_BLT);
501 			for (n = 0; n < nbox_this_time; n++) {
502 				int height = tmp_box[n].y2 - tmp_box[n].y1;
503 				int width = tmp_box[n].x2 - tmp_box[n].x1;
504 				int pitch = PITCH(width, cpp);
505 				uint32_t *b = kgem->batch + kgem->nbatch;
506 
507 				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
508 				     offset,
509 				     tmp_box[n].x1, tmp_box[n].y1,
510 				     width, height, pitch));
511 
512 				assert(tmp_box[n].x1 >= 0);
513 				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
514 				assert(tmp_box[n].y1 >= 0);
515 				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
516 
517 				b[0] = cmd;
518 				b[1] = br13 | pitch;
519 				b[2] = 0;
520 				b[3] = height << 16 | width;
521 				*(uint64_t *)(b+4) =
522 					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
523 							 I915_GEM_DOMAIN_RENDER << 16 |
524 							 I915_GEM_DOMAIN_RENDER |
525 							 KGEM_RELOC_FENCED,
526 							 offset);
527 				b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
528 				b[7] = src_pitch;
529 				*(uint64_t *)(b+8) =
530 					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
531 							 I915_GEM_DOMAIN_RENDER << 16 |
532 							 KGEM_RELOC_FENCED,
533 							 0);
534 				kgem->nbatch += 10;
535 
536 				offset += pitch * height;
537 			}
538 
539 			_kgem_submit(kgem);
540 			if (!tmp_nbox)
541 				break;
542 
543 			_kgem_set_mode(kgem, KGEM_BLT);
544 			kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
545 			tmp_box += nbox_this_time;
546 		} while (1);
547 	} else {
548 		cmd |= 6;
549 		do {
550 			int nbox_this_time, rem;
551 
552 			nbox_this_time = tmp_nbox;
553 			rem = kgem_batch_space(kgem);
554 			if (8*nbox_this_time > rem)
555 				nbox_this_time = rem / 8;
556 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
557 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
558 			assert(nbox_this_time);
559 			tmp_nbox -= nbox_this_time;
560 
561 			assert(kgem->mode == KGEM_BLT);
562 			for (n = 0; n < nbox_this_time; n++) {
563 				int height = tmp_box[n].y2 - tmp_box[n].y1;
564 				int width = tmp_box[n].x2 - tmp_box[n].x1;
565 				int pitch = PITCH(width, cpp);
566 				uint32_t *b = kgem->batch + kgem->nbatch;
567 
568 				DBG(("    blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
569 				     offset,
570 				     tmp_box[n].x1, tmp_box[n].y1,
571 				     width, height, pitch));
572 
573 				assert(tmp_box[n].x1 >= 0);
574 				assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
575 				assert(tmp_box[n].y1 >= 0);
576 				assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
577 
578 				b[0] = cmd;
579 				b[1] = br13 | pitch;
580 				b[2] = 0;
581 				b[3] = height << 16 | width;
582 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
583 						      I915_GEM_DOMAIN_RENDER << 16 |
584 						      I915_GEM_DOMAIN_RENDER |
585 						      KGEM_RELOC_FENCED,
586 						      offset);
587 				b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
588 				b[6] = src_pitch;
589 				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
590 						      I915_GEM_DOMAIN_RENDER << 16 |
591 						      KGEM_RELOC_FENCED,
592 						      0);
593 				kgem->nbatch += 8;
594 
595 				offset += pitch * height;
596 			}
597 
598 			_kgem_submit(kgem);
599 			if (!tmp_nbox)
600 				break;
601 
602 			_kgem_set_mode(kgem, KGEM_BLT);
603 			kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
604 			tmp_box += nbox_this_time;
605 		} while (1);
606 	}
607 	assert(offset == __kgem_buffer_size(dst_bo));
608 
609 	kgem_buffer_read_sync(kgem, dst_bo);
610 
611 	if (sigtrap_get() == 0) {
612 		char *src = ptr;
613 		do {
614 			int height = box->y2 - box->y1;
615 			int width  = box->x2 - box->x1;
616 			int pitch = PITCH(width, cpp);
617 
618 			DBG(("    copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
619 			     (long)((char *)src - (char *)ptr),
620 			     *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4),
621 			     box->x1, box->y1,
622 			     width, height,
623 			     pitch, dst->devKind, cpp*8));
624 
625 			assert(box->x1 >= 0);
626 			assert(box->x2 <= dst->drawable.width);
627 			assert(box->y1 >= 0);
628 			assert(box->y2 <= dst->drawable.height);
629 
630 			memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
631 				   pitch, dst->devKind,
632 				   0, 0,
633 				   box->x1, box->y1,
634 				   width, height);
635 			box++;
636 
637 			src += pitch * height;
638 		} while (--nbox);
639 		assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
640 		sigtrap_put();
641 	}
642 	kgem_bo_destroy(kgem, dst_bo);
643 	sna->blt_state.fill_bo = 0;
644 }
645 
upload_inplace__tiled(struct kgem * kgem,struct kgem_bo * bo)646 static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
647 {
648 	DBG(("%s: tiling=%d\n", __FUNCTION__, bo->tiling));
649 	switch (bo->tiling) {
650 	case I915_TILING_Y:
651 		return false;
652 	case I915_TILING_X:
653 		if (!kgem->memcpy_to_tiled_x)
654 			return false;
655 	default:
656 		break;
657 	}
658 
659 	if (kgem->has_wc_mmap)
660 		return true;
661 
662 	return kgem_bo_can_map__cpu(kgem, bo, true);
663 }
664 
665 static bool
write_boxes_inplace__tiled(struct kgem * kgem,const uint8_t * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n)666 write_boxes_inplace__tiled(struct kgem *kgem,
667                            const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
668                            struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
669                            const BoxRec *box, int n)
670 {
671 	uint8_t *dst;
672 
673 	if (bo->tiling == I915_TILING_Y)
674 		return false;
675 
676 	assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
677 
678 	if (kgem_bo_can_map__cpu(kgem, bo, true)) {
679 		dst = kgem_bo_map__cpu(kgem, bo);
680 		if (dst == NULL)
681 			return false;
682 
683 		kgem_bo_sync__cpu(kgem, bo);
684 	} else {
685 		dst = kgem_bo_map__wc(kgem, bo);
686 		if (dst == NULL)
687 			return false;
688 
689 		kgem_bo_sync__gtt(kgem, bo);
690 	}
691 
692 	if (sigtrap_get())
693 		return false;
694 
695 	if (bo->tiling) {
696 		do {
697 			memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
698 					  box->x1 + src_dx, box->y1 + src_dy,
699 					  box->x1 + dst_dx, box->y1 + dst_dy,
700 					  box->x2 - box->x1, box->y2 - box->y1);
701 			box++;
702 		} while (--n);
703 	} else {
704 		do {
705 			memcpy_blt(src, dst, bpp, stride, bo->pitch,
706 				   box->x1 + src_dx, box->y1 + src_dy,
707 				   box->x1 + dst_dx, box->y1 + dst_dy,
708 				   box->x2 - box->x1, box->y2 - box->y1);
709 			box++;
710 		} while (--n);
711 	}
712 
713 	sigtrap_put();
714 	return true;
715 }
716 
write_boxes_inplace(struct kgem * kgem,const void * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n)717 static bool write_boxes_inplace(struct kgem *kgem,
718 				const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
719 				struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
720 				const BoxRec *box, int n)
721 {
722 	void *dst;
723 
724 	DBG(("%s x %d, handle=%d, tiling=%d\n",
725 	     __FUNCTION__, n, bo->handle, bo->tiling));
726 
727 	if (upload_inplace__tiled(kgem, bo) &&
728 	    write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
729 				       bo, dst_dx, dst_dy, box, n))
730 		return true;
731 
732 	if (!kgem_bo_can_map(kgem, bo))
733 		return false;
734 
735 	kgem_bo_submit(kgem, bo);
736 
737 	dst = kgem_bo_map(kgem, bo);
738 	if (dst == NULL)
739 		return false;
740 
741 	assert(dst != src);
742 
743 	if (sigtrap_get())
744 		return false;
745 
746 	do {
747 		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
748 		     box->x1 + src_dx, box->y1 + src_dy,
749 		     box->x1 + dst_dx, box->y1 + dst_dy,
750 		     box->x2 - box->x1, box->y2 - box->y1,
751 		     bpp, stride, bo->pitch));
752 
753 		assert(box->x2 > box->x1);
754 		assert(box->y2 > box->y1);
755 
756 		assert(box->x1 + dst_dx >= 0);
757 		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
758 		assert(box->y1 + dst_dy >= 0);
759 		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
760 
761 		assert(box->x1 + src_dx >= 0);
762 		assert((box->x2 + src_dx)*bpp <= 8*stride);
763 		assert(box->y1 + src_dy >= 0);
764 
765 		memcpy_blt(src, dst, bpp,
766 			   stride, bo->pitch,
767 			   box->x1 + src_dx, box->y1 + src_dy,
768 			   box->x1 + dst_dx, box->y1 + dst_dy,
769 			   box->x2 - box->x1, box->y2 - box->y1);
770 		box++;
771 	} while (--n);
772 
773 	sigtrap_put();
774 	return true;
775 }
776 
__upload_inplace(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)777 static bool __upload_inplace(struct kgem *kgem,
778 			     struct kgem_bo *bo,
779 			     const BoxRec *box,
780 			     int n, int bpp)
781 {
782 	unsigned int bytes;
783 
784 	if (FORCE_INPLACE)
785 		return FORCE_INPLACE > 0;
786 
787 	if (bo->exec)
788 		return false;
789 
790 	if (bo->flush)
791 		return true;
792 
793 	if (kgem_bo_can_map__cpu(kgem, bo, true))
794 		return true;
795 
796 	/* If we are writing through the GTT, check first if we might be
797 	 * able to almagamate a series of small writes into a single
798 	 * operation.
799 	 */
800 	bytes = 0;
801 	while (n--) {
802 		bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
803 		box++;
804 	}
805 	if (__kgem_bo_is_busy(kgem, bo))
806 		return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
807 	else
808 		return bytes * bpp >> 12;
809 }
810 
upload_inplace(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)811 static bool upload_inplace(struct kgem *kgem,
812 			   struct kgem_bo *bo,
813 			   const BoxRec *box,
814 			   int n, int bpp)
815 {
816 	if (unlikely(kgem->wedged))
817 		return true;
818 
819 	if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
820 		return false;
821 
822 	return __upload_inplace(kgem, bo, box, n,bpp);
823 }
824 
sna_write_boxes(struct sna * sna,PixmapPtr dst,struct kgem_bo * const dst_bo,int16_t const dst_dx,int16_t const dst_dy,const void * const src,int const stride,int16_t const src_dx,int16_t const src_dy,const BoxRec * box,int nbox)825 bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
826 		     struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
827 		     const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
828 		     const BoxRec *box, int nbox)
829 {
830 	struct kgem *kgem = &sna->kgem;
831 	struct kgem_bo *src_bo;
832 	BoxRec extents;
833 	void *ptr;
834 	int offset;
835 	int n, cmd, br13;
836 	bool can_blt;
837 
838 	DBG(("%s x %d, src stride=%d,  src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy));
839 
840 	if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
841 	    write_boxes_inplace(kgem,
842 				src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
843 				dst_bo, dst_dx, dst_dy,
844 				box, nbox))
845 		return true;
846 
847 	if (wedged(sna))
848 		return false;
849 
850 	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
851 		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
852 	extents = box[0];
853 	for (n = 1; n < nbox; n++) {
854 		if (box[n].x1 < extents.x1)
855 			extents.x1 = box[n].x1;
856 		if (box[n].x2 > extents.x2)
857 			extents.x2 = box[n].x2;
858 
859 		if (can_blt)
860 			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
861 
862 		if (box[n].y1 < extents.y1)
863 			extents.y1 = box[n].y1;
864 		if (box[n].y2 > extents.y2)
865 			extents.y2 = box[n].y2;
866 	}
867 	if (!can_blt && sna->render.max_3d_size == 0)
868 		goto fallback;
869 
870 	/* Try to avoid switching rings... */
871 	if (!can_blt || kgem->ring == KGEM_RENDER ||
872 	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
873 		DrawableRec tmp;
874 
875 		tmp.width  = extents.x2 - extents.x1;
876 		tmp.height = extents.y2 - extents.y1;
877 		tmp.depth  = dst->drawable.depth;
878 		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
879 
880 		assert(tmp.width);
881 		assert(tmp.height);
882 
883 		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
884 		     __FUNCTION__,
885 		     extents.x1, extents.y1,
886 		     tmp.width, tmp.height,
887 		     sna->render.max_3d_size, sna->render.max_3d_size));
888 		if (must_tile(sna, tmp.width, tmp.height)) {
889 			BoxRec tile, stack[64], *clipped;
890 			int cpp, step;
891 
892 tile:
893 			cpp = dst->drawable.bitsPerPixel / 8;
894 			step = MIN(sna->render.max_3d_size,
895 				   (MAXSHORT&~63) / cpp);
896 			while (step * step * cpp > sna->kgem.max_upload_tile_size)
897 				step /= 2;
898 
899 			if (step * cpp > 4096)
900 				step = 4096 / cpp;
901 			assert(step);
902 
903 			DBG(("%s: tiling upload, using %dx%d tiles\n",
904 			     __FUNCTION__, step, step));
905 
906 			if (n > ARRAY_SIZE(stack)) {
907 				clipped = malloc(sizeof(BoxRec) * n);
908 				if (clipped == NULL)
909 					goto fallback;
910 			} else
911 				clipped = stack;
912 
913 			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
914 				int y2 = tile.y1 + step;
915 				if (y2 > extents.y2)
916 					y2 = extents.y2;
917 				tile.y2 = y2;
918 
919 				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
920 					int x2 = tile.x1 + step;
921 					if (x2 > extents.x2)
922 						x2 = extents.x2;
923 					tile.x2 = x2;
924 
925 					tmp.width  = tile.x2 - tile.x1;
926 					tmp.height = tile.y2 - tile.y1;
927 
928 					src_bo = kgem_create_buffer_2d(kgem,
929 								       tmp.width,
930 								       tmp.height,
931 								       tmp.bitsPerPixel,
932 								       KGEM_BUFFER_WRITE_INPLACE,
933 								       &ptr);
934 					if (!src_bo) {
935 						if (clipped != stack)
936 							free(clipped);
937 						goto fallback;
938 					}
939 
940 					if (sigtrap_get() == 0) {
941 						BoxRec *c = clipped;
942 						for (n = 0; n < nbox; n++) {
943 							*c = box[n];
944 							if (!box_intersect(c, &tile))
945 								continue;
946 
947 							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
948 							     __FUNCTION__,
949 							     c->x1, c->y1,
950 							     c->x2, c->y2,
951 							     src_dx, src_dy,
952 							     c->x1 - tile.x1,
953 							     c->y1 - tile.y1));
954 							memcpy_blt(src, ptr, tmp.bitsPerPixel,
955 								   stride, src_bo->pitch,
956 								   c->x1 + src_dx,
957 								   c->y1 + src_dy,
958 								   c->x1 - tile.x1,
959 								   c->y1 - tile.y1,
960 								   c->x2 - c->x1,
961 								   c->y2 - c->y1);
962 							c++;
963 						}
964 
965 						if (c != clipped)
966 							n = sna->render.copy_boxes(sna, GXcopy,
967 										   &tmp, src_bo, -tile.x1, -tile.y1,
968 										   &dst->drawable, dst_bo, dst_dx, dst_dy,
969 										   clipped, c - clipped, 0);
970 						else
971 							n = 1;
972 						sigtrap_put();
973 					} else
974 						n = 0;
975 
976 					kgem_bo_destroy(&sna->kgem, src_bo);
977 
978 					if (!n) {
979 						if (clipped != stack)
980 							free(clipped);
981 						goto fallback;
982 					}
983 				}
984 			}
985 
986 			if (clipped != stack)
987 				free(clipped);
988 		} else {
989 			src_bo = kgem_create_buffer_2d(kgem,
990 						       tmp.width,
991 						       tmp.height,
992 						       tmp.bitsPerPixel,
993 						       KGEM_BUFFER_WRITE_INPLACE,
994 						       &ptr);
995 			if (!src_bo)
996 				goto fallback;
997 
998 			if (sigtrap_get() == 0) {
999 				for (n = 0; n < nbox; n++) {
1000 					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1001 					     __FUNCTION__,
1002 					     box[n].x1, box[n].y1,
1003 					     box[n].x2, box[n].y2,
1004 					     src_dx, src_dy,
1005 					     box[n].x1 - extents.x1,
1006 					     box[n].y1 - extents.y1));
1007 					memcpy_blt(src, ptr, tmp.bitsPerPixel,
1008 						   stride, src_bo->pitch,
1009 						   box[n].x1 + src_dx,
1010 						   box[n].y1 + src_dy,
1011 						   box[n].x1 - extents.x1,
1012 						   box[n].y1 - extents.y1,
1013 						   box[n].x2 - box[n].x1,
1014 						   box[n].y2 - box[n].y1);
1015 				}
1016 
1017 				n = sna->render.copy_boxes(sna, GXcopy,
1018 							   &tmp, src_bo, -extents.x1, -extents.y1,
1019 							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1020 							   box, nbox, 0);
1021 				sigtrap_put();
1022 			} else
1023 				n = 0;
1024 
1025 			kgem_bo_destroy(&sna->kgem, src_bo);
1026 
1027 			if (!n)
1028 				goto tile;
1029 		}
1030 
1031 		return true;
1032 	}
1033 
1034 	cmd = XY_SRC_COPY_BLT_CMD;
1035 	br13 = dst_bo->pitch;
1036 	if (kgem->gen >= 040 && dst_bo->tiling) {
1037 		cmd |= BLT_DST_TILED;
1038 		br13 >>= 2;
1039 	}
1040 	br13 |= 0xcc << 16;
1041 	br13 |= sna_br13_color_depth(dst->drawable.bitsPerPixel);
1042 	if (dst->drawable.bitsPerPixel == 32)
1043 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1044 
1045 	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1046 	if (!kgem_check_batch(kgem, 10) ||
1047 	    !kgem_check_reloc_and_exec(kgem, 2) ||
1048 	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1049 		kgem_submit(kgem);
1050 		if (!kgem_check_bo_fenced(kgem, dst_bo))
1051 			goto fallback;
1052 		_kgem_set_mode(kgem, KGEM_BLT);
1053 	}
1054 	kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1055 
1056 	if (kgem->gen >= 0100) {
1057 		cmd |= 8;
1058 		do {
1059 			int nbox_this_time, rem;
1060 
1061 			nbox_this_time = nbox;
1062 			rem = kgem_batch_space(kgem);
1063 			if (10*nbox_this_time > rem)
1064 				nbox_this_time = rem / 10;
1065 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1066 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1067 			assert(nbox_this_time);
1068 			nbox -= nbox_this_time;
1069 
1070 			/* Count the total number of bytes to be read and allocate a
1071 			 * single buffer large enough. Or if it is very small, combine
1072 			 * with other allocations. */
1073 			offset = 0;
1074 			for (n = 0; n < nbox_this_time; n++) {
1075 				int height = box[n].y2 - box[n].y1;
1076 				int width = box[n].x2 - box[n].x1;
1077 				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1078 			}
1079 
1080 			src_bo = kgem_create_buffer(kgem, offset,
1081 						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1082 						    &ptr);
1083 			if (!src_bo)
1084 				break;
1085 
1086 			if (sigtrap_get() == 0) {
1087 				offset = 0;
1088 				do {
1089 					int height = box->y2 - box->y1;
1090 					int width = box->x2 - box->x1;
1091 					int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1092 					uint32_t *b;
1093 
1094 					DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1095 					     __FUNCTION__,
1096 					     box->x1 + src_dx, box->y1 + src_dy,
1097 					     box->x1 + dst_dx, box->y1 + dst_dy,
1098 					     width, height,
1099 					     offset, pitch));
1100 
1101 					assert(box->x1 + src_dx >= 0);
1102 					assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1103 					assert(box->y1 + src_dy >= 0);
1104 
1105 					assert(box->x1 + dst_dx >= 0);
1106 					assert(box->y1 + dst_dy >= 0);
1107 
1108 					memcpy_blt(src, (char *)ptr + offset,
1109 						   dst->drawable.bitsPerPixel,
1110 						   stride, pitch,
1111 						   box->x1 + src_dx, box->y1 + src_dy,
1112 						   0, 0,
1113 						   width, height);
1114 
1115 					assert(kgem->mode == KGEM_BLT);
1116 					b = kgem->batch + kgem->nbatch;
1117 					b[0] = cmd;
1118 					b[1] = br13;
1119 					b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1120 					b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1121 					*(uint64_t *)(b+4) =
1122 						kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1123 								 I915_GEM_DOMAIN_RENDER << 16 |
1124 								 I915_GEM_DOMAIN_RENDER |
1125 								 KGEM_RELOC_FENCED,
1126 								 0);
1127 					b[6] = 0;
1128 					b[7] = pitch;
1129 					*(uint64_t *)(b+8) =
1130 						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1131 								 I915_GEM_DOMAIN_RENDER << 16 |
1132 								 KGEM_RELOC_FENCED,
1133 								 offset);
1134 					kgem->nbatch += 10;
1135 
1136 					box++;
1137 					offset += pitch * height;
1138 				} while (--nbox_this_time);
1139 				assert(offset == __kgem_buffer_size(src_bo));
1140 				sigtrap_put();
1141 			}
1142 
1143 			if (nbox) {
1144 				_kgem_submit(kgem);
1145 				_kgem_set_mode(kgem, KGEM_BLT);
1146 				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1147 			}
1148 
1149 			kgem_bo_destroy(kgem, src_bo);
1150 		} while (nbox);
1151 	} else {
1152 		cmd |= 6;
1153 		do {
1154 			int nbox_this_time, rem;
1155 
1156 			nbox_this_time = nbox;
1157 			rem = kgem_batch_space(kgem);
1158 			if (8*nbox_this_time > rem)
1159 				nbox_this_time = rem / 8;
1160 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1161 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1162 			assert(nbox_this_time);
1163 			nbox -= nbox_this_time;
1164 
1165 			/* Count the total number of bytes to be read and allocate a
1166 			 * single buffer large enough. Or if it is very small, combine
1167 			 * with other allocations. */
1168 			offset = 0;
1169 			for (n = 0; n < nbox_this_time; n++) {
1170 				int height = box[n].y2 - box[n].y1;
1171 				int width = box[n].x2 - box[n].x1;
1172 				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1173 			}
1174 
1175 			src_bo = kgem_create_buffer(kgem, offset,
1176 						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1177 						    &ptr);
1178 			if (!src_bo)
1179 				break;
1180 
1181 			if (sigtrap_get()) {
1182 				kgem_bo_destroy(kgem, src_bo);
1183 				goto fallback;
1184 			}
1185 
1186 			offset = 0;
1187 			do {
1188 				int height = box->y2 - box->y1;
1189 				int width = box->x2 - box->x1;
1190 				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1191 				uint32_t *b;
1192 
1193 				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1194 				     __FUNCTION__,
1195 				     box->x1 + src_dx, box->y1 + src_dy,
1196 				     box->x1 + dst_dx, box->y1 + dst_dy,
1197 				     width, height,
1198 				     offset, pitch));
1199 
1200 				assert(box->x1 + src_dx >= 0);
1201 				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1202 				assert(box->y1 + src_dy >= 0);
1203 
1204 				assert(box->x1 + dst_dx >= 0);
1205 				assert(box->y1 + dst_dy >= 0);
1206 
1207 				memcpy_blt(src, (char *)ptr + offset,
1208 					   dst->drawable.bitsPerPixel,
1209 					   stride, pitch,
1210 					   box->x1 + src_dx, box->y1 + src_dy,
1211 					   0, 0,
1212 					   width, height);
1213 
1214 				assert(kgem->mode == KGEM_BLT);
1215 				b = kgem->batch + kgem->nbatch;
1216 				b[0] = cmd;
1217 				b[1] = br13;
1218 				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1219 				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1220 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1221 						      I915_GEM_DOMAIN_RENDER << 16 |
1222 						      I915_GEM_DOMAIN_RENDER |
1223 						      KGEM_RELOC_FENCED,
1224 						      0);
1225 				b[5] = 0;
1226 				b[6] = pitch;
1227 				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1228 						      I915_GEM_DOMAIN_RENDER << 16 |
1229 						      KGEM_RELOC_FENCED,
1230 						      offset);
1231 				kgem->nbatch += 8;
1232 
1233 				box++;
1234 				offset += pitch * height;
1235 			} while (--nbox_this_time);
1236 			assert(offset == __kgem_buffer_size(src_bo));
1237 			sigtrap_put();
1238 
1239 			if (nbox) {
1240 				_kgem_submit(kgem);
1241 				_kgem_set_mode(kgem, KGEM_BLT);
1242 				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1243 			}
1244 
1245 			kgem_bo_destroy(kgem, src_bo);
1246 		} while (nbox);
1247 	}
1248 
1249 	sna->blt_state.fill_bo = 0;
1250 	return true;
1251 
1252 fallback:
1253 	return write_boxes_inplace(kgem,
1254 				   src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1255 				   dst_bo, dst_dx, dst_dy,
1256 				   box, nbox);
1257 }
1258 
1259 static bool
write_boxes_inplace__xor(struct kgem * kgem,const void * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,uint32_t and,uint32_t or)1260 write_boxes_inplace__xor(struct kgem *kgem,
1261 			 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
1262 			 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
1263 			 const BoxRec *box, int n,
1264 			 uint32_t and, uint32_t or)
1265 {
1266 	void *dst;
1267 
1268 	DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
1269 
1270 	if (!kgem_bo_can_map(kgem, bo))
1271 		return false;
1272 
1273 	kgem_bo_submit(kgem, bo);
1274 
1275 	dst = kgem_bo_map(kgem, bo);
1276 	if (dst == NULL)
1277 		return false;
1278 
1279 	if (sigtrap_get())
1280 		return false;
1281 
1282 	do {
1283 		DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
1284 		     box->x1 + src_dx, box->y1 + src_dy,
1285 		     box->x1 + dst_dx, box->y1 + dst_dy,
1286 		     box->x2 - box->x1, box->y2 - box->y1,
1287 		     bpp, stride, bo->pitch));
1288 
1289 		assert(box->x2 > box->x1);
1290 		assert(box->y2 > box->y1);
1291 
1292 		assert(box->x1 + dst_dx >= 0);
1293 		assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
1294 		assert(box->y1 + dst_dy >= 0);
1295 		assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
1296 
1297 		assert(box->x1 + src_dx >= 0);
1298 		assert((box->x2 + src_dx)*bpp <= 8*stride);
1299 		assert(box->y1 + src_dy >= 0);
1300 
1301 		memcpy_xor(src, dst, bpp,
1302 			   stride, bo->pitch,
1303 			   box->x1 + src_dx, box->y1 + src_dy,
1304 			   box->x1 + dst_dx, box->y1 + dst_dy,
1305 			   box->x2 - box->x1, box->y2 - box->y1,
1306 			   and, or);
1307 		box++;
1308 	} while (--n);
1309 
1310 	sigtrap_put();
1311 	return true;
1312 }
1313 
upload_inplace__xor(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)1314 static bool upload_inplace__xor(struct kgem *kgem,
1315 				struct kgem_bo *bo,
1316 				const BoxRec *box,
1317 				int n, int bpp)
1318 {
1319 	if (unlikely(kgem->wedged))
1320 		return true;
1321 
1322 	if (!kgem_bo_can_map(kgem, bo))
1323 		return false;
1324 
1325 	return __upload_inplace(kgem, bo, box, n, bpp);
1326 }
1327 
sna_write_boxes__xor(struct sna * sna,PixmapPtr dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const void * src,int stride,int16_t src_dx,int16_t src_dy,const BoxRec * box,int nbox,uint32_t and,uint32_t or)1328 bool sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
1329 			  struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1330 			  const void *src, int stride, int16_t src_dx, int16_t src_dy,
1331 			  const BoxRec *box, int nbox,
1332 			  uint32_t and, uint32_t or)
1333 {
1334 	struct kgem *kgem = &sna->kgem;
1335 	struct kgem_bo *src_bo;
1336 	BoxRec extents;
1337 	bool can_blt;
1338 	void *ptr;
1339 	int offset;
1340 	int n, cmd, br13;
1341 
1342 	DBG(("%s x %d\n", __FUNCTION__, nbox));
1343 
1344 	if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
1345 	    write_boxes_inplace__xor(kgem,
1346 				     src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1347 				     dst_bo, dst_dx, dst_dy,
1348 				     box, nbox,
1349 				     and, or))
1350 		return true;
1351 
1352 	if (wedged(sna))
1353 		return false;
1354 
1355 	can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
1356 		(box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1357 	extents = box[0];
1358 	for (n = 1; n < nbox; n++) {
1359 		if (box[n].x1 < extents.x1)
1360 			extents.x1 = box[n].x1;
1361 		if (box[n].x2 > extents.x2)
1362 			extents.x2 = box[n].x2;
1363 
1364 		if (can_blt)
1365 			can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1366 
1367 		if (box[n].y1 < extents.y1)
1368 			extents.y1 = box[n].y1;
1369 		if (box[n].y2 > extents.y2)
1370 			extents.y2 = box[n].y2;
1371 	}
1372 
1373 	/* Try to avoid switching rings... */
1374 	if (!can_blt || kgem->ring == KGEM_RENDER ||
1375 	    upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
1376 		DrawableRec tmp;
1377 
1378 		tmp.width  = extents.x2 - extents.x1;
1379 		tmp.height = extents.y2 - extents.y1;
1380 		tmp.depth  = dst->drawable.depth;
1381 		tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
1382 
1383 		assert(tmp.width);
1384 		assert(tmp.height);
1385 
1386 		DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
1387 		     __FUNCTION__,
1388 		     extents.x1, extents.y1,
1389 		     tmp.width, tmp.height,
1390 		     sna->render.max_3d_size, sna->render.max_3d_size));
1391 		if (must_tile(sna, tmp.width, tmp.height)) {
1392 			BoxRec tile, stack[64], *clipped;
1393 			int step;
1394 
1395 tile:
1396 			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
1397 				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
1398 			while (step * step * 4 > sna->kgem.max_upload_tile_size)
1399 				step /= 2;
1400 
1401 			DBG(("%s: tiling upload, using %dx%d tiles\n",
1402 			     __FUNCTION__, step, step));
1403 			assert(step);
1404 
1405 			if (n > ARRAY_SIZE(stack)) {
1406 				clipped = malloc(sizeof(BoxRec) * n);
1407 				if (clipped == NULL)
1408 					goto fallback;
1409 			} else
1410 				clipped = stack;
1411 
1412 			for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
1413 				int y2 = tile.y1 + step;
1414 				if (y2 > extents.y2)
1415 					y2 = extents.y2;
1416 				tile.y2 = y2;
1417 
1418 				for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
1419 					int x2 = tile.x1 + step;
1420 					if (x2 > extents.x2)
1421 						x2 = extents.x2;
1422 					tile.x2 = x2;
1423 
1424 					tmp.width  = tile.x2 - tile.x1;
1425 					tmp.height = tile.y2 - tile.y1;
1426 
1427 					src_bo = kgem_create_buffer_2d(kgem,
1428 								       tmp.width,
1429 								       tmp.height,
1430 								       tmp.bitsPerPixel,
1431 								       KGEM_BUFFER_WRITE_INPLACE,
1432 								       &ptr);
1433 					if (!src_bo) {
1434 						if (clipped != stack)
1435 							free(clipped);
1436 						goto fallback;
1437 					}
1438 
1439 					if (sigtrap_get() == 0) {
1440 						BoxRec *c = clipped;
1441 						for (n = 0; n < nbox; n++) {
1442 							*c = box[n];
1443 							if (!box_intersect(c, &tile))
1444 								continue;
1445 
1446 							DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1447 							     __FUNCTION__,
1448 							     c->x1, c->y1,
1449 							     c->x2, c->y2,
1450 							     src_dx, src_dy,
1451 							     c->x1 - tile.x1,
1452 							     c->y1 - tile.y1));
1453 							memcpy_xor(src, ptr, tmp.bitsPerPixel,
1454 								   stride, src_bo->pitch,
1455 								   c->x1 + src_dx,
1456 								   c->y1 + src_dy,
1457 								   c->x1 - tile.x1,
1458 								   c->y1 - tile.y1,
1459 								   c->x2 - c->x1,
1460 								   c->y2 - c->y1,
1461 								   and, or);
1462 							c++;
1463 						}
1464 
1465 						if (c != clipped)
1466 							n = sna->render.copy_boxes(sna, GXcopy,
1467 										   &tmp, src_bo, -tile.x1, -tile.y1,
1468 										   &dst->drawable, dst_bo, dst_dx, dst_dy,
1469 										   clipped, c - clipped, 0);
1470 						else
1471 							n = 1;
1472 
1473 						sigtrap_put();
1474 					} else
1475 						n = 0;
1476 
1477 					kgem_bo_destroy(&sna->kgem, src_bo);
1478 
1479 					if (!n) {
1480 						if (clipped != stack)
1481 							free(clipped);
1482 						goto fallback;
1483 					}
1484 				}
1485 			}
1486 
1487 			if (clipped != stack)
1488 				free(clipped);
1489 		} else {
1490 			src_bo = kgem_create_buffer_2d(kgem,
1491 						       tmp.width,
1492 						       tmp.height,
1493 						       tmp.bitsPerPixel,
1494 						       KGEM_BUFFER_WRITE_INPLACE,
1495 						       &ptr);
1496 			if (!src_bo)
1497 				goto fallback;
1498 
1499 			if (sigtrap_get() == 0) {
1500 				for (n = 0; n < nbox; n++) {
1501 					DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1502 					     __FUNCTION__,
1503 					     box[n].x1, box[n].y1,
1504 					     box[n].x2, box[n].y2,
1505 					     src_dx, src_dy,
1506 					     box[n].x1 - extents.x1,
1507 					     box[n].y1 - extents.y1));
1508 					memcpy_xor(src, ptr, tmp.bitsPerPixel,
1509 						   stride, src_bo->pitch,
1510 						   box[n].x1 + src_dx,
1511 						   box[n].y1 + src_dy,
1512 						   box[n].x1 - extents.x1,
1513 						   box[n].y1 - extents.y1,
1514 						   box[n].x2 - box[n].x1,
1515 						   box[n].y2 - box[n].y1,
1516 						   and, or);
1517 				}
1518 
1519 				n = sna->render.copy_boxes(sna, GXcopy,
1520 							   &tmp, src_bo, -extents.x1, -extents.y1,
1521 							   &dst->drawable, dst_bo, dst_dx, dst_dy,
1522 							   box, nbox, 0);
1523 				sigtrap_put();
1524 			} else
1525 				n = 0;
1526 
1527 			kgem_bo_destroy(&sna->kgem, src_bo);
1528 
1529 			if (!n)
1530 				goto tile;
1531 		}
1532 
1533 		return true;
1534 	}
1535 
1536 	cmd = XY_SRC_COPY_BLT_CMD;
1537 	br13 = dst_bo->pitch;
1538 	if (kgem->gen >= 040 && dst_bo->tiling) {
1539 		cmd |= BLT_DST_TILED;
1540 		br13 >>= 2;
1541 	}
1542 	br13 |= 0xcc << 16;
1543 	br13 |= sna_br13_color_depth(dst->drawable.bitsPerPixel);
1544 	if (dst->drawable.bitsPerPixel == 32)
1545 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1546 
1547 	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1548 	if (!kgem_check_batch(kgem, 10) ||
1549 	    !kgem_check_reloc_and_exec(kgem, 2) ||
1550 	    !kgem_check_bo_fenced(kgem, dst_bo)) {
1551 		kgem_submit(kgem);
1552 		if (!kgem_check_bo_fenced(kgem, dst_bo))
1553 			goto fallback;
1554 		_kgem_set_mode(kgem, KGEM_BLT);
1555 	}
1556 	kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1557 
1558 	if (sna->kgem.gen >= 0100) {
1559 		cmd |= 8;
1560 		do {
1561 			int nbox_this_time, rem;
1562 
1563 			nbox_this_time = nbox;
1564 			rem = kgem_batch_space(kgem);
1565 			if (10*nbox_this_time > rem)
1566 				nbox_this_time = rem / 10;
1567 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1568 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1569 			assert(nbox_this_time);
1570 			nbox -= nbox_this_time;
1571 
1572 			/* Count the total number of bytes to be read and allocate a
1573 			 * single buffer large enough. Or if it is very small, combine
1574 			 * with other allocations. */
1575 			offset = 0;
1576 			for (n = 0; n < nbox_this_time; n++) {
1577 				int height = box[n].y2 - box[n].y1;
1578 				int width = box[n].x2 - box[n].x1;
1579 				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1580 			}
1581 
1582 			src_bo = kgem_create_buffer(kgem, offset,
1583 						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1584 						    &ptr);
1585 			if (!src_bo)
1586 				goto fallback;
1587 
1588 			if (sigtrap_get()) {
1589 				kgem_bo_destroy(kgem, src_bo);
1590 				goto fallback;
1591 			}
1592 
1593 			offset = 0;
1594 			do {
1595 				int height = box->y2 - box->y1;
1596 				int width = box->x2 - box->x1;
1597 				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1598 				uint32_t *b;
1599 
1600 				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1601 				     __FUNCTION__,
1602 				     box->x1 + src_dx, box->y1 + src_dy,
1603 				     box->x1 + dst_dx, box->y1 + dst_dy,
1604 				     width, height,
1605 				     offset, pitch));
1606 
1607 				assert(box->x1 + src_dx >= 0);
1608 				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1609 				assert(box->y1 + src_dy >= 0);
1610 
1611 				assert(box->x1 + dst_dx >= 0);
1612 				assert(box->y1 + dst_dy >= 0);
1613 
1614 				memcpy_xor(src, (char *)ptr + offset,
1615 					   dst->drawable.bitsPerPixel,
1616 					   stride, pitch,
1617 					   box->x1 + src_dx, box->y1 + src_dy,
1618 					   0, 0,
1619 					   width, height,
1620 					   and, or);
1621 
1622 				assert(kgem->mode == KGEM_BLT);
1623 				b = kgem->batch + kgem->nbatch;
1624 				b[0] = cmd;
1625 				b[1] = br13;
1626 				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1627 				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1628 				*(uint64_t *)(b+4) =
1629 					kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1630 							 I915_GEM_DOMAIN_RENDER << 16 |
1631 							 I915_GEM_DOMAIN_RENDER |
1632 							 KGEM_RELOC_FENCED,
1633 							 0);
1634 				b[6] = 0;
1635 				b[7] = pitch;
1636 				*(uint64_t *)(b+8) =
1637 					kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1638 							 I915_GEM_DOMAIN_RENDER << 16 |
1639 							 KGEM_RELOC_FENCED,
1640 							 offset);
1641 				kgem->nbatch += 10;
1642 
1643 				box++;
1644 				offset += pitch * height;
1645 			} while (--nbox_this_time);
1646 			assert(offset == __kgem_buffer_size(src_bo));
1647 			sigtrap_put();
1648 
1649 			if (nbox) {
1650 				_kgem_submit(kgem);
1651 				_kgem_set_mode(kgem, KGEM_BLT);
1652 				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1653 			}
1654 
1655 			kgem_bo_destroy(kgem, src_bo);
1656 		} while (nbox);
1657 	} else {
1658 		cmd |= 6;
1659 		do {
1660 			int nbox_this_time, rem;
1661 
1662 			nbox_this_time = nbox;
1663 			rem = kgem_batch_space(kgem);
1664 			if (8*nbox_this_time > rem)
1665 				nbox_this_time = rem / 8;
1666 			if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1667 				nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1668 			assert(nbox_this_time);
1669 			nbox -= nbox_this_time;
1670 
1671 			/* Count the total number of bytes to be read and allocate a
1672 			 * single buffer large enough. Or if it is very small, combine
1673 			 * with other allocations. */
1674 			offset = 0;
1675 			for (n = 0; n < nbox_this_time; n++) {
1676 				int height = box[n].y2 - box[n].y1;
1677 				int width = box[n].x2 - box[n].x1;
1678 				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1679 			}
1680 
1681 			src_bo = kgem_create_buffer(kgem, offset,
1682 						    KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1683 						    &ptr);
1684 			if (!src_bo)
1685 				goto fallback;
1686 
1687 			if (sigtrap_get()) {
1688 				kgem_bo_destroy(kgem, src_bo);
1689 				goto fallback;
1690 			}
1691 
1692 			offset = 0;
1693 			do {
1694 				int height = box->y2 - box->y1;
1695 				int width = box->x2 - box->x1;
1696 				int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1697 				uint32_t *b;
1698 
1699 				DBG(("  %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1700 				     __FUNCTION__,
1701 				     box->x1 + src_dx, box->y1 + src_dy,
1702 				     box->x1 + dst_dx, box->y1 + dst_dy,
1703 				     width, height,
1704 				     offset, pitch));
1705 
1706 				assert(box->x1 + src_dx >= 0);
1707 				assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1708 				assert(box->y1 + src_dy >= 0);
1709 
1710 				assert(box->x1 + dst_dx >= 0);
1711 				assert(box->y1 + dst_dy >= 0);
1712 
1713 				memcpy_xor(src, (char *)ptr + offset,
1714 					   dst->drawable.bitsPerPixel,
1715 					   stride, pitch,
1716 					   box->x1 + src_dx, box->y1 + src_dy,
1717 					   0, 0,
1718 					   width, height,
1719 					   and, or);
1720 
1721 				assert(kgem->mode == KGEM_BLT);
1722 				b = kgem->batch + kgem->nbatch;
1723 				b[0] = cmd;
1724 				b[1] = br13;
1725 				b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1726 				b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1727 				b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1728 						      I915_GEM_DOMAIN_RENDER << 16 |
1729 						      I915_GEM_DOMAIN_RENDER |
1730 						      KGEM_RELOC_FENCED,
1731 						      0);
1732 				b[5] = 0;
1733 				b[6] = pitch;
1734 				b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1735 						      I915_GEM_DOMAIN_RENDER << 16 |
1736 						      KGEM_RELOC_FENCED,
1737 						      offset);
1738 				kgem->nbatch += 8;
1739 
1740 				box++;
1741 				offset += pitch * height;
1742 			} while (--nbox_this_time);
1743 			assert(offset == __kgem_buffer_size(src_bo));
1744 			sigtrap_put();
1745 
1746 			if (nbox) {
1747 				_kgem_submit(kgem);
1748 				_kgem_set_mode(kgem, KGEM_BLT);
1749 				kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1750 			}
1751 
1752 			kgem_bo_destroy(kgem, src_bo);
1753 		} while (nbox);
1754 	}
1755 
1756 	sna->blt_state.fill_bo = 0;
1757 	return true;
1758 
1759 fallback:
1760 	return write_boxes_inplace__xor(kgem,
1761 					src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1762 					dst_bo, dst_dx, dst_dy,
1763 					box, nbox,
1764 					and, or);
1765 }
1766 
1767 static bool
indirect_replace(struct sna * sna,PixmapPtr pixmap,struct kgem_bo * bo,const void * src,int stride)1768 indirect_replace(struct sna *sna,
1769 		 PixmapPtr pixmap,
1770 		 struct kgem_bo *bo,
1771 		 const void *src, int stride)
1772 {
1773 	struct kgem *kgem = &sna->kgem;
1774 	struct kgem_bo *src_bo;
1775 	BoxRec box;
1776 	void *ptr;
1777 	bool ret;
1778 
1779 	DBG(("%s: size=%d vs %d\n",
1780 	     __FUNCTION__,
1781 	     stride * pixmap->drawable.height >> 12,
1782 	     kgem->half_cpu_cache_pages));
1783 
1784 	if (stride * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages)
1785 		return false;
1786 
1787 	if (!kgem_bo_can_blt(kgem, bo) &&
1788 	    must_tile(sna, pixmap->drawable.width, pixmap->drawable.height))
1789 		return false;
1790 
1791 	src_bo = kgem_create_buffer_2d(kgem,
1792 				       pixmap->drawable.width,
1793 				       pixmap->drawable.height,
1794 				       pixmap->drawable.bitsPerPixel,
1795 				       KGEM_BUFFER_WRITE_INPLACE,
1796 				       &ptr);
1797 	if (!src_bo)
1798 		return false;
1799 
1800 	ret = false;
1801 	if (sigtrap_get() == 0) {
1802 		memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel,
1803 			   stride, src_bo->pitch,
1804 			   0, 0,
1805 			   0, 0,
1806 			   pixmap->drawable.width,
1807 			   pixmap->drawable.height);
1808 
1809 		box.x1 = box.y1 = 0;
1810 		box.x2 = pixmap->drawable.width;
1811 		box.y2 = pixmap->drawable.height;
1812 
1813 		ret = sna->render.copy_boxes(sna, GXcopy,
1814 					     &pixmap->drawable, src_bo, 0, 0,
1815 					     &pixmap->drawable, bo, 0, 0,
1816 					     &box, 1, 0);
1817 		sigtrap_put();
1818 	}
1819 
1820 	kgem_bo_destroy(kgem, src_bo);
1821 
1822 	return ret;
1823 }
1824 
sna_replace(struct sna * sna,PixmapPtr pixmap,const void * src,int stride)1825 bool sna_replace(struct sna *sna, PixmapPtr pixmap,
1826 		 const void *src, int stride)
1827 {
1828 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1829 	struct kgem_bo *bo = priv->gpu_bo;
1830 	void *dst;
1831 
1832 	assert(bo);
1833 	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
1834 	     __FUNCTION__, bo->handle,
1835 	     pixmap->drawable.width,
1836 	     pixmap->drawable.height,
1837 	     pixmap->drawable.bitsPerPixel,
1838 	     bo->tiling,
1839 	     __kgem_bo_is_busy(&sna->kgem, bo)));
1840 
1841 	assert(!priv->pinned);
1842 
1843 	kgem_bo_undo(&sna->kgem, bo);
1844 
1845 	if (__kgem_bo_is_busy(&sna->kgem, bo)) {
1846 		struct kgem_bo *new_bo;
1847 
1848 		if (indirect_replace(sna, pixmap, bo, src, stride))
1849 			return true;
1850 
1851 		new_bo = kgem_create_2d(&sna->kgem,
1852 					pixmap->drawable.width,
1853 					pixmap->drawable.height,
1854 					pixmap->drawable.bitsPerPixel,
1855 					bo->tiling,
1856 					CREATE_GTT_MAP | CREATE_INACTIVE);
1857 		if (new_bo)
1858 			bo = new_bo;
1859 	}
1860 
1861 	if (bo->tiling == I915_TILING_NONE && bo->pitch == stride &&
1862 	    kgem_bo_write(&sna->kgem, bo, src,
1863 			  (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
1864 			goto done;
1865 
1866 	if (upload_inplace__tiled(&sna->kgem, bo)) {
1867 		BoxRec box;
1868 
1869 		box.x1 = box.y1 = 0;
1870 		box.x2 = pixmap->drawable.width;
1871 		box.y2 = pixmap->drawable.height;
1872 
1873 		if (write_boxes_inplace__tiled(&sna->kgem, src,
1874 					       stride, pixmap->drawable.bitsPerPixel, 0, 0,
1875 					       bo, 0, 0, &box, 1))
1876 			goto done;
1877 	}
1878 
1879 	if (kgem_bo_can_map(&sna->kgem, bo) &&
1880 	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1881 	    sigtrap_get() == 0) {
1882 		memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
1883 			   stride, bo->pitch,
1884 			   0, 0,
1885 			   0, 0,
1886 			   pixmap->drawable.width,
1887 			   pixmap->drawable.height);
1888 		sigtrap_put();
1889 	} else {
1890 		BoxRec box;
1891 
1892 		if (bo != priv->gpu_bo) {
1893 			kgem_bo_destroy(&sna->kgem, bo);
1894 			bo = priv->gpu_bo;
1895 		}
1896 
1897 		box.x1 = box.y1 = 0;
1898 		box.x2 = pixmap->drawable.width;
1899 		box.y2 = pixmap->drawable.height;
1900 
1901 		if (!sna_write_boxes(sna, pixmap,
1902 				     bo, 0, 0,
1903 				     src, stride, 0, 0,
1904 				     &box, 1))
1905 			return false;
1906 	}
1907 
1908 done:
1909 	if (bo != priv->gpu_bo) {
1910 		sna_pixmap_unmap(pixmap, priv);
1911 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1912 		priv->gpu_bo = bo;
1913 	}
1914 
1915 	return true;
1916 }
1917 
1918 bool
sna_replace__xor(struct sna * sna,PixmapPtr pixmap,const void * src,int stride,uint32_t and,uint32_t or)1919 sna_replace__xor(struct sna *sna, PixmapPtr pixmap,
1920 		 const void *src, int stride,
1921 		 uint32_t and, uint32_t or)
1922 {
1923 	struct sna_pixmap *priv = sna_pixmap(pixmap);
1924 	struct kgem_bo *bo = priv->gpu_bo;
1925 	void *dst;
1926 
1927 	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
1928 	     __FUNCTION__, bo->handle,
1929 	     pixmap->drawable.width,
1930 	     pixmap->drawable.height,
1931 	     pixmap->drawable.bitsPerPixel,
1932 	     bo->tiling));
1933 
1934 	assert(!priv->pinned);
1935 
1936 	kgem_bo_undo(&sna->kgem, bo);
1937 
1938 	if (!kgem_bo_can_map(&sna->kgem, bo) ||
1939 	    __kgem_bo_is_busy(&sna->kgem, bo)) {
1940 		struct kgem_bo *new_bo;
1941 
1942 		new_bo = kgem_create_2d(&sna->kgem,
1943 					pixmap->drawable.width,
1944 					pixmap->drawable.height,
1945 					pixmap->drawable.bitsPerPixel,
1946 					bo->tiling,
1947 					CREATE_GTT_MAP | CREATE_INACTIVE);
1948 		if (new_bo)
1949 			bo = new_bo;
1950 	}
1951 
1952 	if (kgem_bo_can_map(&sna->kgem, bo) &&
1953 	    (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1954 	    sigtrap_get() == 0) {
1955 		memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
1956 			   stride, bo->pitch,
1957 			   0, 0,
1958 			   0, 0,
1959 			   pixmap->drawable.width,
1960 			   pixmap->drawable.height,
1961 			   and, or);
1962 		sigtrap_put();
1963 	} else {
1964 		BoxRec box;
1965 
1966 		if (bo != priv->gpu_bo) {
1967 			kgem_bo_destroy(&sna->kgem, bo);
1968 			bo = priv->gpu_bo;
1969 		}
1970 
1971 		box.x1 = box.y1 = 0;
1972 		box.x2 = pixmap->drawable.width;
1973 		box.y2 = pixmap->drawable.height;
1974 
1975 		if (!sna_write_boxes__xor(sna, pixmap,
1976 					  bo, 0, 0,
1977 					  src, stride, 0, 0,
1978 					  &box, 1,
1979 					  and, or))
1980 			return false;
1981 	}
1982 
1983 	if (bo != priv->gpu_bo) {
1984 		sna_pixmap_unmap(pixmap, priv);
1985 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1986 		priv->gpu_bo = bo;
1987 	}
1988 
1989 	return true;
1990 }
1991