1 /*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "sna.h"
33 #include "sna_render.h"
34 #include "sna_reg.h"
35
36 #include <sys/mman.h>
37
38 #define PITCH(x, y) ALIGN((x)*(y), 4)
39
40 #define FORCE_INPLACE 0 /* 1 upload directly, -1 force indirect */
41
42 /* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */
43
upload_too_large(struct sna * sna,int width,int height)44 static inline bool upload_too_large(struct sna *sna, int width, int height)
45 {
46 return width * height * 4 > sna->kgem.max_upload_tile_size;
47 }
48
must_tile(struct sna * sna,int width,int height)49 static inline bool must_tile(struct sna *sna, int width, int height)
50 {
51 return (width > sna->render.max_3d_size ||
52 height > sna->render.max_3d_size ||
53 upload_too_large(sna, width, height));
54 }
55
download_inplace__cpu(struct kgem * kgem,PixmapPtr p,struct kgem_bo * bo,const BoxRec * box,int nbox)56 static bool download_inplace__cpu(struct kgem *kgem,
57 PixmapPtr p, struct kgem_bo *bo,
58 const BoxRec *box, int nbox)
59 {
60 BoxRec extents;
61
62 switch (bo->tiling) {
63 case I915_TILING_X:
64 if (!kgem->memcpy_from_tiled_x)
65 return false;
66 case I915_TILING_NONE:
67 break;
68 default:
69 return false;
70 }
71
72 if (!kgem_bo_can_map__cpu(kgem, bo, false))
73 return false;
74
75 if (kgem->has_llc)
76 return true;
77
78 extents = *box;
79 while (--nbox) {
80 ++box;
81 if (box->x1 < extents.x1)
82 extents.x1 = box->x1;
83 if (box->x2 > extents.x2)
84 extents.x2 = box->x2;
85 extents.y2 = box->y2;
86 }
87
88 if (extents.x2 - extents.x1 == p->drawable.width &&
89 extents.y2 - extents.y1 == p->drawable.height)
90 return true;
91
92 return __kgem_bo_size(bo) <= PAGE_SIZE;
93 }
94
95 static bool
read_boxes_inplace__cpu(struct kgem * kgem,PixmapPtr pixmap,struct kgem_bo * bo,const BoxRec * box,int n)96 read_boxes_inplace__cpu(struct kgem *kgem,
97 PixmapPtr pixmap, struct kgem_bo *bo,
98 const BoxRec *box, int n)
99 {
100 int bpp = pixmap->drawable.bitsPerPixel;
101 void *src, *dst = pixmap->devPrivate.ptr;
102 int src_pitch = bo->pitch;
103 int dst_pitch = pixmap->devKind;
104
105 if (!download_inplace__cpu(kgem, dst, bo, box, n))
106 return false;
107
108 if (bo->tiling == I915_TILING_Y)
109 return false;
110
111 assert(kgem_bo_can_map__cpu(kgem, bo, false));
112
113 src = kgem_bo_map__cpu(kgem, bo);
114 if (src == NULL)
115 return false;
116
117 kgem_bo_sync__cpu_full(kgem, bo, 0);
118
119 if (sigtrap_get())
120 return false;
121
122 DBG(("%s x %d\n", __FUNCTION__, n));
123
124 if (bo->tiling == I915_TILING_X) {
125 do {
126 memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
127 box->x1, box->y1,
128 box->x1, box->y1,
129 box->x2 - box->x1, box->y2 - box->y1);
130 box++;
131 } while (--n);
132 } else {
133 do {
134 memcpy_blt(src, dst, bpp, src_pitch, dst_pitch,
135 box->x1, box->y1,
136 box->x1, box->y1,
137 box->x2 - box->x1, box->y2 - box->y1);
138 box++;
139 } while (--n);
140 }
141
142 sigtrap_put();
143 return true;
144 }
145
read_boxes_inplace(struct kgem * kgem,PixmapPtr pixmap,struct kgem_bo * bo,const BoxRec * box,int n)146 static void read_boxes_inplace(struct kgem *kgem,
147 PixmapPtr pixmap, struct kgem_bo *bo,
148 const BoxRec *box, int n)
149 {
150 int bpp = pixmap->drawable.bitsPerPixel;
151 void *src, *dst = pixmap->devPrivate.ptr;
152 int src_pitch = bo->pitch;
153 int dst_pitch = pixmap->devKind;
154
155 if (read_boxes_inplace__cpu(kgem, pixmap, bo, box, n))
156 return;
157
158 DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
159
160 if (!kgem_bo_can_map(kgem, bo))
161 return;
162
163 kgem_bo_submit(kgem, bo);
164
165 src = kgem_bo_map(kgem, bo);
166 if (src == NULL)
167 return;
168
169 if (sigtrap_get())
170 return;
171
172 assert(src != dst);
173 do {
174 DBG(("%s: copying box (%d, %d), (%d, %d)\n",
175 __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
176
177 assert(box->x2 > box->x1);
178 assert(box->y2 > box->y1);
179
180 assert(box->x1 >= 0);
181 assert(box->y1 >= 0);
182 assert(box->x2 <= pixmap->drawable.width);
183 assert(box->y2 <= pixmap->drawable.height);
184
185 assert(box->x1 >= 0);
186 assert(box->y1 >= 0);
187 assert(box->x2 <= pixmap->drawable.width);
188 assert(box->y2 <= pixmap->drawable.height);
189
190 memcpy_blt(src, dst, bpp,
191 src_pitch, dst_pitch,
192 box->x1, box->y1,
193 box->x1, box->y1,
194 box->x2 - box->x1, box->y2 - box->y1);
195 box++;
196 } while (--n);
197
198 sigtrap_put();
199 }
200
download_inplace(struct kgem * kgem,PixmapPtr p,struct kgem_bo * bo,const BoxRec * box,int nbox)201 static bool download_inplace(struct kgem *kgem,
202 PixmapPtr p, struct kgem_bo *bo,
203 const BoxRec *box, int nbox)
204 {
205 bool cpu;
206
207 if (unlikely(kgem->wedged))
208 return true;
209
210 cpu = download_inplace__cpu(kgem, p, bo, box, nbox);
211 if (!cpu && !kgem_bo_can_map(kgem, bo))
212 return false;
213
214 if (FORCE_INPLACE)
215 return FORCE_INPLACE > 0;
216
217 if (cpu)
218 return true;
219
220 if (kgem->can_blt_cpu && kgem->max_cpu_size)
221 return false;
222
223 return !__kgem_bo_is_busy(kgem, bo);
224 }
225
sna_read_boxes(struct sna * sna,PixmapPtr dst,struct kgem_bo * src_bo,const BoxRec * box,int nbox)226 void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
227 const BoxRec *box, int nbox)
228 {
229 struct kgem *kgem = &sna->kgem;
230 struct kgem_bo *dst_bo;
231 BoxRec extents;
232 const BoxRec *tmp_box;
233 int tmp_nbox;
234 void *ptr;
235 int src_pitch, cpp, offset;
236 int n, cmd, br13;
237 bool can_blt;
238
239 DBG(("%s x %d, src=(handle=%d), dst=(size=(%d, %d)\n",
240 __FUNCTION__, nbox, src_bo->handle,
241 dst->drawable.width, dst->drawable.height));
242
243 #ifndef NDEBUG
244 for (n = 0; n < nbox; n++) {
245 if (box[n].x1 < 0 || box[n].y1 < 0 ||
246 box[n].x2 * dst->drawable.bitsPerPixel/8 > src_bo->pitch ||
247 box[n].y2 * src_bo->pitch > kgem_bo_size(src_bo))
248 {
249 FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d), pitch=%d, size=%d\n", n,
250 box[n].x1, box[n].y1,
251 box[n].x2, box[n].y2,
252 src_bo->pitch, kgem_bo_size(src_bo));
253 }
254 }
255 #endif
256
257 /* XXX The gpu is faster to perform detiling in bulk, but takes
258 * longer to setup and retrieve the results, with an additional
259 * copy. The long term solution is to use snoopable bo and avoid
260 * this path.
261 */
262
263 if (download_inplace(kgem, dst, src_bo, box, nbox)) {
264 fallback:
265 read_boxes_inplace(kgem, dst, src_bo, box, nbox);
266 return;
267 }
268
269 can_blt = kgem_bo_can_blt(kgem, src_bo) &&
270 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
271 extents = box[0];
272 for (n = 1; n < nbox; n++) {
273 if (box[n].x1 < extents.x1)
274 extents.x1 = box[n].x1;
275 if (box[n].x2 > extents.x2)
276 extents.x2 = box[n].x2;
277
278 if (can_blt)
279 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
280
281 if (box[n].y1 < extents.y1)
282 extents.y1 = box[n].y1;
283 if (box[n].y2 > extents.y2)
284 extents.y2 = box[n].y2;
285 }
286 if (!can_blt && sna->render.max_3d_size == 0)
287 goto fallback;
288
289 if (kgem_bo_can_map(kgem, src_bo)) {
290 /* Is it worth detiling? */
291 if ((extents.y2 - extents.y1 - 1) * src_bo->pitch < 4096)
292 goto fallback;
293 }
294
295 /* Try to avoid switching rings... */
296 if (!can_blt || kgem->ring == KGEM_RENDER ||
297 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
298 DrawableRec tmp;
299
300 tmp.width = extents.x2 - extents.x1;
301 tmp.height = extents.y2 - extents.y1;
302 tmp.depth = dst->drawable.depth;
303 tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
304
305 assert(tmp.width);
306 assert(tmp.height);
307
308 if (must_tile(sna, tmp.width, tmp.height)) {
309 BoxRec tile, stack[64], *clipped, *c;
310 int step;
311
312 if (n > ARRAY_SIZE(stack)) {
313 clipped = malloc(sizeof(BoxRec) * n);
314 if (clipped == NULL)
315 goto fallback;
316 } else
317 clipped = stack;
318
319 step = MIN(sna->render.max_3d_size,
320 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
321 while (step * step * 4 > sna->kgem.max_upload_tile_size)
322 step /= 2;
323
324 DBG(("%s: tiling download, using %dx%d tiles\n",
325 __FUNCTION__, step, step));
326 assert(step);
327
328 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
329 int y2 = tile.y1 + step;
330 if (y2 > extents.y2)
331 y2 = extents.y2;
332 tile.y2 = y2;
333
334 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
335 int x2 = tile.x1 + step;
336 if (x2 > extents.x2)
337 x2 = extents.x2;
338 tile.x2 = x2;
339
340 tmp.width = tile.x2 - tile.x1;
341 tmp.height = tile.y2 - tile.y1;
342
343 c = clipped;
344 for (n = 0; n < nbox; n++) {
345 *c = box[n];
346 if (!box_intersect(c, &tile))
347 continue;
348
349 DBG(("%s: box(%d, %d), (%d, %d),, dst=(%d, %d)\n",
350 __FUNCTION__,
351 c->x1, c->y1,
352 c->x2, c->y2,
353 c->x1 - tile.x1,
354 c->y1 - tile.y1));
355 c++;
356 }
357 if (c == clipped)
358 continue;
359
360 dst_bo = kgem_create_buffer_2d(kgem,
361 tmp.width,
362 tmp.height,
363 tmp.bitsPerPixel,
364 KGEM_BUFFER_LAST,
365 &ptr);
366 if (!dst_bo) {
367 if (clipped != stack)
368 free(clipped);
369 goto fallback;
370 }
371
372 if (!sna->render.copy_boxes(sna, GXcopy,
373 &dst->drawable, src_bo, 0, 0,
374 &tmp, dst_bo, -tile.x1, -tile.y1,
375 clipped, c-clipped, COPY_LAST)) {
376 kgem_bo_destroy(&sna->kgem, dst_bo);
377 if (clipped != stack)
378 free(clipped);
379 goto fallback;
380 }
381
382 kgem_bo_submit(&sna->kgem, dst_bo);
383 kgem_buffer_read_sync(kgem, dst_bo);
384
385 if (sigtrap_get() == 0) {
386 while (c-- != clipped) {
387 memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
388 dst_bo->pitch, dst->devKind,
389 c->x1 - tile.x1,
390 c->y1 - tile.y1,
391 c->x1, c->y1,
392 c->x2 - c->x1,
393 c->y2 - c->y1);
394 }
395 sigtrap_put();
396 }
397
398 kgem_bo_destroy(&sna->kgem, dst_bo);
399 }
400 }
401
402 if (clipped != stack)
403 free(clipped);
404 } else {
405 dst_bo = kgem_create_buffer_2d(kgem,
406 tmp.width,
407 tmp.height,
408 tmp.bitsPerPixel,
409 KGEM_BUFFER_LAST,
410 &ptr);
411 if (!dst_bo)
412 goto fallback;
413
414 if (!sna->render.copy_boxes(sna, GXcopy,
415 &dst->drawable, src_bo, 0, 0,
416 &tmp, dst_bo, -extents.x1, -extents.y1,
417 box, nbox, COPY_LAST)) {
418 kgem_bo_destroy(&sna->kgem, dst_bo);
419 goto fallback;
420 }
421
422 kgem_bo_submit(&sna->kgem, dst_bo);
423 kgem_buffer_read_sync(kgem, dst_bo);
424
425 if (sigtrap_get() == 0) {
426 for (n = 0; n < nbox; n++) {
427 memcpy_blt(ptr, dst->devPrivate.ptr, tmp.bitsPerPixel,
428 dst_bo->pitch, dst->devKind,
429 box[n].x1 - extents.x1,
430 box[n].y1 - extents.y1,
431 box[n].x1, box[n].y1,
432 box[n].x2 - box[n].x1,
433 box[n].y2 - box[n].y1);
434 }
435 sigtrap_put();
436 }
437
438 kgem_bo_destroy(&sna->kgem, dst_bo);
439 }
440 return;
441 }
442
443 /* count the total number of bytes to be read and allocate a bo */
444 cpp = dst->drawable.bitsPerPixel / 8;
445 offset = 0;
446 for (n = 0; n < nbox; n++) {
447 int height = box[n].y2 - box[n].y1;
448 int width = box[n].x2 - box[n].x1;
449 offset += PITCH(width, cpp) * height;
450 }
451
452 DBG((" read buffer size=%d\n", offset));
453
454 dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
455 if (!dst_bo) {
456 read_boxes_inplace(kgem, dst, src_bo, box, nbox);
457 return;
458 }
459
460 cmd = XY_SRC_COPY_BLT_CMD;
461 src_pitch = src_bo->pitch;
462 if (kgem->gen >= 040 && src_bo->tiling) {
463 cmd |= BLT_SRC_TILED;
464 src_pitch >>= 2;
465 }
466
467 br13 = 0xcc << 16;
468 br13 |= sna_br13_color_depth(cpp * 8);
469 if (cpp == 4)
470 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
471
472 kgem_set_mode(kgem, KGEM_BLT, dst_bo);
473 if (!kgem_check_batch(kgem, 10) ||
474 !kgem_check_reloc_and_exec(kgem, 2) ||
475 !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
476 kgem_submit(kgem);
477 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL))
478 goto fallback;
479 _kgem_set_mode(kgem, KGEM_BLT);
480 }
481 kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
482
483 tmp_nbox = nbox;
484 tmp_box = box;
485 offset = 0;
486 if (sna->kgem.gen >= 0100) {
487 cmd |= 8;
488 do {
489 int nbox_this_time, rem;
490
491 nbox_this_time = tmp_nbox;
492 rem = kgem_batch_space(kgem);
493 if (10*nbox_this_time > rem)
494 nbox_this_time = rem / 10;
495 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
496 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
497 assert(nbox_this_time);
498 tmp_nbox -= nbox_this_time;
499
500 assert(kgem->mode == KGEM_BLT);
501 for (n = 0; n < nbox_this_time; n++) {
502 int height = tmp_box[n].y2 - tmp_box[n].y1;
503 int width = tmp_box[n].x2 - tmp_box[n].x1;
504 int pitch = PITCH(width, cpp);
505 uint32_t *b = kgem->batch + kgem->nbatch;
506
507 DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
508 offset,
509 tmp_box[n].x1, tmp_box[n].y1,
510 width, height, pitch));
511
512 assert(tmp_box[n].x1 >= 0);
513 assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
514 assert(tmp_box[n].y1 >= 0);
515 assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
516
517 b[0] = cmd;
518 b[1] = br13 | pitch;
519 b[2] = 0;
520 b[3] = height << 16 | width;
521 *(uint64_t *)(b+4) =
522 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
523 I915_GEM_DOMAIN_RENDER << 16 |
524 I915_GEM_DOMAIN_RENDER |
525 KGEM_RELOC_FENCED,
526 offset);
527 b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
528 b[7] = src_pitch;
529 *(uint64_t *)(b+8) =
530 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
531 I915_GEM_DOMAIN_RENDER << 16 |
532 KGEM_RELOC_FENCED,
533 0);
534 kgem->nbatch += 10;
535
536 offset += pitch * height;
537 }
538
539 _kgem_submit(kgem);
540 if (!tmp_nbox)
541 break;
542
543 _kgem_set_mode(kgem, KGEM_BLT);
544 kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
545 tmp_box += nbox_this_time;
546 } while (1);
547 } else {
548 cmd |= 6;
549 do {
550 int nbox_this_time, rem;
551
552 nbox_this_time = tmp_nbox;
553 rem = kgem_batch_space(kgem);
554 if (8*nbox_this_time > rem)
555 nbox_this_time = rem / 8;
556 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
557 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
558 assert(nbox_this_time);
559 tmp_nbox -= nbox_this_time;
560
561 assert(kgem->mode == KGEM_BLT);
562 for (n = 0; n < nbox_this_time; n++) {
563 int height = tmp_box[n].y2 - tmp_box[n].y1;
564 int width = tmp_box[n].x2 - tmp_box[n].x1;
565 int pitch = PITCH(width, cpp);
566 uint32_t *b = kgem->batch + kgem->nbatch;
567
568 DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
569 offset,
570 tmp_box[n].x1, tmp_box[n].y1,
571 width, height, pitch));
572
573 assert(tmp_box[n].x1 >= 0);
574 assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
575 assert(tmp_box[n].y1 >= 0);
576 assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
577
578 b[0] = cmd;
579 b[1] = br13 | pitch;
580 b[2] = 0;
581 b[3] = height << 16 | width;
582 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
583 I915_GEM_DOMAIN_RENDER << 16 |
584 I915_GEM_DOMAIN_RENDER |
585 KGEM_RELOC_FENCED,
586 offset);
587 b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
588 b[6] = src_pitch;
589 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
590 I915_GEM_DOMAIN_RENDER << 16 |
591 KGEM_RELOC_FENCED,
592 0);
593 kgem->nbatch += 8;
594
595 offset += pitch * height;
596 }
597
598 _kgem_submit(kgem);
599 if (!tmp_nbox)
600 break;
601
602 _kgem_set_mode(kgem, KGEM_BLT);
603 kgem_bcs_set_tiling(&sna->kgem, src_bo, NULL);
604 tmp_box += nbox_this_time;
605 } while (1);
606 }
607 assert(offset == __kgem_buffer_size(dst_bo));
608
609 kgem_buffer_read_sync(kgem, dst_bo);
610
611 if (sigtrap_get() == 0) {
612 char *src = ptr;
613 do {
614 int height = box->y2 - box->y1;
615 int width = box->x2 - box->x1;
616 int pitch = PITCH(width, cpp);
617
618 DBG((" copy offset %lx [%08x...%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
619 (long)((char *)src - (char *)ptr),
620 *(uint32_t*)src, *(uint32_t*)(src+pitch*height/2 + pitch/2 - 4), *(uint32_t*)(src+pitch*height - 4),
621 box->x1, box->y1,
622 width, height,
623 pitch, dst->devKind, cpp*8));
624
625 assert(box->x1 >= 0);
626 assert(box->x2 <= dst->drawable.width);
627 assert(box->y1 >= 0);
628 assert(box->y2 <= dst->drawable.height);
629
630 memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
631 pitch, dst->devKind,
632 0, 0,
633 box->x1, box->y1,
634 width, height);
635 box++;
636
637 src += pitch * height;
638 } while (--nbox);
639 assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
640 sigtrap_put();
641 }
642 kgem_bo_destroy(kgem, dst_bo);
643 sna->blt_state.fill_bo = 0;
644 }
645
upload_inplace__tiled(struct kgem * kgem,struct kgem_bo * bo)646 static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
647 {
648 DBG(("%s: tiling=%d\n", __FUNCTION__, bo->tiling));
649 switch (bo->tiling) {
650 case I915_TILING_Y:
651 return false;
652 case I915_TILING_X:
653 if (!kgem->memcpy_to_tiled_x)
654 return false;
655 default:
656 break;
657 }
658
659 if (kgem->has_wc_mmap)
660 return true;
661
662 return kgem_bo_can_map__cpu(kgem, bo, true);
663 }
664
665 static bool
write_boxes_inplace__tiled(struct kgem * kgem,const uint8_t * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n)666 write_boxes_inplace__tiled(struct kgem *kgem,
667 const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
668 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
669 const BoxRec *box, int n)
670 {
671 uint8_t *dst;
672
673 if (bo->tiling == I915_TILING_Y)
674 return false;
675
676 assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
677
678 if (kgem_bo_can_map__cpu(kgem, bo, true)) {
679 dst = kgem_bo_map__cpu(kgem, bo);
680 if (dst == NULL)
681 return false;
682
683 kgem_bo_sync__cpu(kgem, bo);
684 } else {
685 dst = kgem_bo_map__wc(kgem, bo);
686 if (dst == NULL)
687 return false;
688
689 kgem_bo_sync__gtt(kgem, bo);
690 }
691
692 if (sigtrap_get())
693 return false;
694
695 if (bo->tiling) {
696 do {
697 memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
698 box->x1 + src_dx, box->y1 + src_dy,
699 box->x1 + dst_dx, box->y1 + dst_dy,
700 box->x2 - box->x1, box->y2 - box->y1);
701 box++;
702 } while (--n);
703 } else {
704 do {
705 memcpy_blt(src, dst, bpp, stride, bo->pitch,
706 box->x1 + src_dx, box->y1 + src_dy,
707 box->x1 + dst_dx, box->y1 + dst_dy,
708 box->x2 - box->x1, box->y2 - box->y1);
709 box++;
710 } while (--n);
711 }
712
713 sigtrap_put();
714 return true;
715 }
716
write_boxes_inplace(struct kgem * kgem,const void * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n)717 static bool write_boxes_inplace(struct kgem *kgem,
718 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
719 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
720 const BoxRec *box, int n)
721 {
722 void *dst;
723
724 DBG(("%s x %d, handle=%d, tiling=%d\n",
725 __FUNCTION__, n, bo->handle, bo->tiling));
726
727 if (upload_inplace__tiled(kgem, bo) &&
728 write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
729 bo, dst_dx, dst_dy, box, n))
730 return true;
731
732 if (!kgem_bo_can_map(kgem, bo))
733 return false;
734
735 kgem_bo_submit(kgem, bo);
736
737 dst = kgem_bo_map(kgem, bo);
738 if (dst == NULL)
739 return false;
740
741 assert(dst != src);
742
743 if (sigtrap_get())
744 return false;
745
746 do {
747 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
748 box->x1 + src_dx, box->y1 + src_dy,
749 box->x1 + dst_dx, box->y1 + dst_dy,
750 box->x2 - box->x1, box->y2 - box->y1,
751 bpp, stride, bo->pitch));
752
753 assert(box->x2 > box->x1);
754 assert(box->y2 > box->y1);
755
756 assert(box->x1 + dst_dx >= 0);
757 assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
758 assert(box->y1 + dst_dy >= 0);
759 assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
760
761 assert(box->x1 + src_dx >= 0);
762 assert((box->x2 + src_dx)*bpp <= 8*stride);
763 assert(box->y1 + src_dy >= 0);
764
765 memcpy_blt(src, dst, bpp,
766 stride, bo->pitch,
767 box->x1 + src_dx, box->y1 + src_dy,
768 box->x1 + dst_dx, box->y1 + dst_dy,
769 box->x2 - box->x1, box->y2 - box->y1);
770 box++;
771 } while (--n);
772
773 sigtrap_put();
774 return true;
775 }
776
__upload_inplace(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)777 static bool __upload_inplace(struct kgem *kgem,
778 struct kgem_bo *bo,
779 const BoxRec *box,
780 int n, int bpp)
781 {
782 unsigned int bytes;
783
784 if (FORCE_INPLACE)
785 return FORCE_INPLACE > 0;
786
787 if (bo->exec)
788 return false;
789
790 if (bo->flush)
791 return true;
792
793 if (kgem_bo_can_map__cpu(kgem, bo, true))
794 return true;
795
796 /* If we are writing through the GTT, check first if we might be
797 * able to almagamate a series of small writes into a single
798 * operation.
799 */
800 bytes = 0;
801 while (n--) {
802 bytes += (box->x2 - box->x1) * (box->y2 - box->y1);
803 box++;
804 }
805 if (__kgem_bo_is_busy(kgem, bo))
806 return bytes * bpp >> 12 >= kgem->half_cpu_cache_pages;
807 else
808 return bytes * bpp >> 12;
809 }
810
upload_inplace(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)811 static bool upload_inplace(struct kgem *kgem,
812 struct kgem_bo *bo,
813 const BoxRec *box,
814 int n, int bpp)
815 {
816 if (unlikely(kgem->wedged))
817 return true;
818
819 if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
820 return false;
821
822 return __upload_inplace(kgem, bo, box, n,bpp);
823 }
824
sna_write_boxes(struct sna * sna,PixmapPtr dst,struct kgem_bo * const dst_bo,int16_t const dst_dx,int16_t const dst_dy,const void * const src,int const stride,int16_t const src_dx,int16_t const src_dy,const BoxRec * box,int nbox)825 bool sna_write_boxes(struct sna *sna, PixmapPtr dst,
826 struct kgem_bo * const dst_bo, int16_t const dst_dx, int16_t const dst_dy,
827 const void * const src, int const stride, int16_t const src_dx, int16_t const src_dy,
828 const BoxRec *box, int nbox)
829 {
830 struct kgem *kgem = &sna->kgem;
831 struct kgem_bo *src_bo;
832 BoxRec extents;
833 void *ptr;
834 int offset;
835 int n, cmd, br13;
836 bool can_blt;
837
838 DBG(("%s x %d, src stride=%d, src dx=(%d, %d)\n", __FUNCTION__, nbox, stride, src_dx, src_dy));
839
840 if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
841 write_boxes_inplace(kgem,
842 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
843 dst_bo, dst_dx, dst_dy,
844 box, nbox))
845 return true;
846
847 if (wedged(sna))
848 return false;
849
850 can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
851 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
852 extents = box[0];
853 for (n = 1; n < nbox; n++) {
854 if (box[n].x1 < extents.x1)
855 extents.x1 = box[n].x1;
856 if (box[n].x2 > extents.x2)
857 extents.x2 = box[n].x2;
858
859 if (can_blt)
860 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
861
862 if (box[n].y1 < extents.y1)
863 extents.y1 = box[n].y1;
864 if (box[n].y2 > extents.y2)
865 extents.y2 = box[n].y2;
866 }
867 if (!can_blt && sna->render.max_3d_size == 0)
868 goto fallback;
869
870 /* Try to avoid switching rings... */
871 if (!can_blt || kgem->ring == KGEM_RENDER ||
872 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
873 DrawableRec tmp;
874
875 tmp.width = extents.x2 - extents.x1;
876 tmp.height = extents.y2 - extents.y1;
877 tmp.depth = dst->drawable.depth;
878 tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
879
880 assert(tmp.width);
881 assert(tmp.height);
882
883 DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
884 __FUNCTION__,
885 extents.x1, extents.y1,
886 tmp.width, tmp.height,
887 sna->render.max_3d_size, sna->render.max_3d_size));
888 if (must_tile(sna, tmp.width, tmp.height)) {
889 BoxRec tile, stack[64], *clipped;
890 int cpp, step;
891
892 tile:
893 cpp = dst->drawable.bitsPerPixel / 8;
894 step = MIN(sna->render.max_3d_size,
895 (MAXSHORT&~63) / cpp);
896 while (step * step * cpp > sna->kgem.max_upload_tile_size)
897 step /= 2;
898
899 if (step * cpp > 4096)
900 step = 4096 / cpp;
901 assert(step);
902
903 DBG(("%s: tiling upload, using %dx%d tiles\n",
904 __FUNCTION__, step, step));
905
906 if (n > ARRAY_SIZE(stack)) {
907 clipped = malloc(sizeof(BoxRec) * n);
908 if (clipped == NULL)
909 goto fallback;
910 } else
911 clipped = stack;
912
913 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
914 int y2 = tile.y1 + step;
915 if (y2 > extents.y2)
916 y2 = extents.y2;
917 tile.y2 = y2;
918
919 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
920 int x2 = tile.x1 + step;
921 if (x2 > extents.x2)
922 x2 = extents.x2;
923 tile.x2 = x2;
924
925 tmp.width = tile.x2 - tile.x1;
926 tmp.height = tile.y2 - tile.y1;
927
928 src_bo = kgem_create_buffer_2d(kgem,
929 tmp.width,
930 tmp.height,
931 tmp.bitsPerPixel,
932 KGEM_BUFFER_WRITE_INPLACE,
933 &ptr);
934 if (!src_bo) {
935 if (clipped != stack)
936 free(clipped);
937 goto fallback;
938 }
939
940 if (sigtrap_get() == 0) {
941 BoxRec *c = clipped;
942 for (n = 0; n < nbox; n++) {
943 *c = box[n];
944 if (!box_intersect(c, &tile))
945 continue;
946
947 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
948 __FUNCTION__,
949 c->x1, c->y1,
950 c->x2, c->y2,
951 src_dx, src_dy,
952 c->x1 - tile.x1,
953 c->y1 - tile.y1));
954 memcpy_blt(src, ptr, tmp.bitsPerPixel,
955 stride, src_bo->pitch,
956 c->x1 + src_dx,
957 c->y1 + src_dy,
958 c->x1 - tile.x1,
959 c->y1 - tile.y1,
960 c->x2 - c->x1,
961 c->y2 - c->y1);
962 c++;
963 }
964
965 if (c != clipped)
966 n = sna->render.copy_boxes(sna, GXcopy,
967 &tmp, src_bo, -tile.x1, -tile.y1,
968 &dst->drawable, dst_bo, dst_dx, dst_dy,
969 clipped, c - clipped, 0);
970 else
971 n = 1;
972 sigtrap_put();
973 } else
974 n = 0;
975
976 kgem_bo_destroy(&sna->kgem, src_bo);
977
978 if (!n) {
979 if (clipped != stack)
980 free(clipped);
981 goto fallback;
982 }
983 }
984 }
985
986 if (clipped != stack)
987 free(clipped);
988 } else {
989 src_bo = kgem_create_buffer_2d(kgem,
990 tmp.width,
991 tmp.height,
992 tmp.bitsPerPixel,
993 KGEM_BUFFER_WRITE_INPLACE,
994 &ptr);
995 if (!src_bo)
996 goto fallback;
997
998 if (sigtrap_get() == 0) {
999 for (n = 0; n < nbox; n++) {
1000 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1001 __FUNCTION__,
1002 box[n].x1, box[n].y1,
1003 box[n].x2, box[n].y2,
1004 src_dx, src_dy,
1005 box[n].x1 - extents.x1,
1006 box[n].y1 - extents.y1));
1007 memcpy_blt(src, ptr, tmp.bitsPerPixel,
1008 stride, src_bo->pitch,
1009 box[n].x1 + src_dx,
1010 box[n].y1 + src_dy,
1011 box[n].x1 - extents.x1,
1012 box[n].y1 - extents.y1,
1013 box[n].x2 - box[n].x1,
1014 box[n].y2 - box[n].y1);
1015 }
1016
1017 n = sna->render.copy_boxes(sna, GXcopy,
1018 &tmp, src_bo, -extents.x1, -extents.y1,
1019 &dst->drawable, dst_bo, dst_dx, dst_dy,
1020 box, nbox, 0);
1021 sigtrap_put();
1022 } else
1023 n = 0;
1024
1025 kgem_bo_destroy(&sna->kgem, src_bo);
1026
1027 if (!n)
1028 goto tile;
1029 }
1030
1031 return true;
1032 }
1033
1034 cmd = XY_SRC_COPY_BLT_CMD;
1035 br13 = dst_bo->pitch;
1036 if (kgem->gen >= 040 && dst_bo->tiling) {
1037 cmd |= BLT_DST_TILED;
1038 br13 >>= 2;
1039 }
1040 br13 |= 0xcc << 16;
1041 br13 |= sna_br13_color_depth(dst->drawable.bitsPerPixel);
1042 if (dst->drawable.bitsPerPixel == 32)
1043 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1044
1045 kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1046 if (!kgem_check_batch(kgem, 10) ||
1047 !kgem_check_reloc_and_exec(kgem, 2) ||
1048 !kgem_check_bo_fenced(kgem, dst_bo)) {
1049 kgem_submit(kgem);
1050 if (!kgem_check_bo_fenced(kgem, dst_bo))
1051 goto fallback;
1052 _kgem_set_mode(kgem, KGEM_BLT);
1053 }
1054 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1055
1056 if (kgem->gen >= 0100) {
1057 cmd |= 8;
1058 do {
1059 int nbox_this_time, rem;
1060
1061 nbox_this_time = nbox;
1062 rem = kgem_batch_space(kgem);
1063 if (10*nbox_this_time > rem)
1064 nbox_this_time = rem / 10;
1065 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1066 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1067 assert(nbox_this_time);
1068 nbox -= nbox_this_time;
1069
1070 /* Count the total number of bytes to be read and allocate a
1071 * single buffer large enough. Or if it is very small, combine
1072 * with other allocations. */
1073 offset = 0;
1074 for (n = 0; n < nbox_this_time; n++) {
1075 int height = box[n].y2 - box[n].y1;
1076 int width = box[n].x2 - box[n].x1;
1077 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1078 }
1079
1080 src_bo = kgem_create_buffer(kgem, offset,
1081 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1082 &ptr);
1083 if (!src_bo)
1084 break;
1085
1086 if (sigtrap_get() == 0) {
1087 offset = 0;
1088 do {
1089 int height = box->y2 - box->y1;
1090 int width = box->x2 - box->x1;
1091 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1092 uint32_t *b;
1093
1094 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1095 __FUNCTION__,
1096 box->x1 + src_dx, box->y1 + src_dy,
1097 box->x1 + dst_dx, box->y1 + dst_dy,
1098 width, height,
1099 offset, pitch));
1100
1101 assert(box->x1 + src_dx >= 0);
1102 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1103 assert(box->y1 + src_dy >= 0);
1104
1105 assert(box->x1 + dst_dx >= 0);
1106 assert(box->y1 + dst_dy >= 0);
1107
1108 memcpy_blt(src, (char *)ptr + offset,
1109 dst->drawable.bitsPerPixel,
1110 stride, pitch,
1111 box->x1 + src_dx, box->y1 + src_dy,
1112 0, 0,
1113 width, height);
1114
1115 assert(kgem->mode == KGEM_BLT);
1116 b = kgem->batch + kgem->nbatch;
1117 b[0] = cmd;
1118 b[1] = br13;
1119 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1120 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1121 *(uint64_t *)(b+4) =
1122 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1123 I915_GEM_DOMAIN_RENDER << 16 |
1124 I915_GEM_DOMAIN_RENDER |
1125 KGEM_RELOC_FENCED,
1126 0);
1127 b[6] = 0;
1128 b[7] = pitch;
1129 *(uint64_t *)(b+8) =
1130 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1131 I915_GEM_DOMAIN_RENDER << 16 |
1132 KGEM_RELOC_FENCED,
1133 offset);
1134 kgem->nbatch += 10;
1135
1136 box++;
1137 offset += pitch * height;
1138 } while (--nbox_this_time);
1139 assert(offset == __kgem_buffer_size(src_bo));
1140 sigtrap_put();
1141 }
1142
1143 if (nbox) {
1144 _kgem_submit(kgem);
1145 _kgem_set_mode(kgem, KGEM_BLT);
1146 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1147 }
1148
1149 kgem_bo_destroy(kgem, src_bo);
1150 } while (nbox);
1151 } else {
1152 cmd |= 6;
1153 do {
1154 int nbox_this_time, rem;
1155
1156 nbox_this_time = nbox;
1157 rem = kgem_batch_space(kgem);
1158 if (8*nbox_this_time > rem)
1159 nbox_this_time = rem / 8;
1160 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1161 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1162 assert(nbox_this_time);
1163 nbox -= nbox_this_time;
1164
1165 /* Count the total number of bytes to be read and allocate a
1166 * single buffer large enough. Or if it is very small, combine
1167 * with other allocations. */
1168 offset = 0;
1169 for (n = 0; n < nbox_this_time; n++) {
1170 int height = box[n].y2 - box[n].y1;
1171 int width = box[n].x2 - box[n].x1;
1172 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1173 }
1174
1175 src_bo = kgem_create_buffer(kgem, offset,
1176 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1177 &ptr);
1178 if (!src_bo)
1179 break;
1180
1181 if (sigtrap_get()) {
1182 kgem_bo_destroy(kgem, src_bo);
1183 goto fallback;
1184 }
1185
1186 offset = 0;
1187 do {
1188 int height = box->y2 - box->y1;
1189 int width = box->x2 - box->x1;
1190 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1191 uint32_t *b;
1192
1193 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1194 __FUNCTION__,
1195 box->x1 + src_dx, box->y1 + src_dy,
1196 box->x1 + dst_dx, box->y1 + dst_dy,
1197 width, height,
1198 offset, pitch));
1199
1200 assert(box->x1 + src_dx >= 0);
1201 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1202 assert(box->y1 + src_dy >= 0);
1203
1204 assert(box->x1 + dst_dx >= 0);
1205 assert(box->y1 + dst_dy >= 0);
1206
1207 memcpy_blt(src, (char *)ptr + offset,
1208 dst->drawable.bitsPerPixel,
1209 stride, pitch,
1210 box->x1 + src_dx, box->y1 + src_dy,
1211 0, 0,
1212 width, height);
1213
1214 assert(kgem->mode == KGEM_BLT);
1215 b = kgem->batch + kgem->nbatch;
1216 b[0] = cmd;
1217 b[1] = br13;
1218 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1219 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1220 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1221 I915_GEM_DOMAIN_RENDER << 16 |
1222 I915_GEM_DOMAIN_RENDER |
1223 KGEM_RELOC_FENCED,
1224 0);
1225 b[5] = 0;
1226 b[6] = pitch;
1227 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1228 I915_GEM_DOMAIN_RENDER << 16 |
1229 KGEM_RELOC_FENCED,
1230 offset);
1231 kgem->nbatch += 8;
1232
1233 box++;
1234 offset += pitch * height;
1235 } while (--nbox_this_time);
1236 assert(offset == __kgem_buffer_size(src_bo));
1237 sigtrap_put();
1238
1239 if (nbox) {
1240 _kgem_submit(kgem);
1241 _kgem_set_mode(kgem, KGEM_BLT);
1242 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1243 }
1244
1245 kgem_bo_destroy(kgem, src_bo);
1246 } while (nbox);
1247 }
1248
1249 sna->blt_state.fill_bo = 0;
1250 return true;
1251
1252 fallback:
1253 return write_boxes_inplace(kgem,
1254 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1255 dst_bo, dst_dx, dst_dy,
1256 box, nbox);
1257 }
1258
1259 static bool
write_boxes_inplace__xor(struct kgem * kgem,const void * src,int stride,int bpp,int16_t src_dx,int16_t src_dy,struct kgem_bo * bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int n,uint32_t and,uint32_t or)1260 write_boxes_inplace__xor(struct kgem *kgem,
1261 const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
1262 struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
1263 const BoxRec *box, int n,
1264 uint32_t and, uint32_t or)
1265 {
1266 void *dst;
1267
1268 DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
1269
1270 if (!kgem_bo_can_map(kgem, bo))
1271 return false;
1272
1273 kgem_bo_submit(kgem, bo);
1274
1275 dst = kgem_bo_map(kgem, bo);
1276 if (dst == NULL)
1277 return false;
1278
1279 if (sigtrap_get())
1280 return false;
1281
1282 do {
1283 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
1284 box->x1 + src_dx, box->y1 + src_dy,
1285 box->x1 + dst_dx, box->y1 + dst_dy,
1286 box->x2 - box->x1, box->y2 - box->y1,
1287 bpp, stride, bo->pitch));
1288
1289 assert(box->x2 > box->x1);
1290 assert(box->y2 > box->y1);
1291
1292 assert(box->x1 + dst_dx >= 0);
1293 assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
1294 assert(box->y1 + dst_dy >= 0);
1295 assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
1296
1297 assert(box->x1 + src_dx >= 0);
1298 assert((box->x2 + src_dx)*bpp <= 8*stride);
1299 assert(box->y1 + src_dy >= 0);
1300
1301 memcpy_xor(src, dst, bpp,
1302 stride, bo->pitch,
1303 box->x1 + src_dx, box->y1 + src_dy,
1304 box->x1 + dst_dx, box->y1 + dst_dy,
1305 box->x2 - box->x1, box->y2 - box->y1,
1306 and, or);
1307 box++;
1308 } while (--n);
1309
1310 sigtrap_put();
1311 return true;
1312 }
1313
upload_inplace__xor(struct kgem * kgem,struct kgem_bo * bo,const BoxRec * box,int n,int bpp)1314 static bool upload_inplace__xor(struct kgem *kgem,
1315 struct kgem_bo *bo,
1316 const BoxRec *box,
1317 int n, int bpp)
1318 {
1319 if (unlikely(kgem->wedged))
1320 return true;
1321
1322 if (!kgem_bo_can_map(kgem, bo))
1323 return false;
1324
1325 return __upload_inplace(kgem, bo, box, n, bpp);
1326 }
1327
sna_write_boxes__xor(struct sna * sna,PixmapPtr dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const void * src,int stride,int16_t src_dx,int16_t src_dy,const BoxRec * box,int nbox,uint32_t and,uint32_t or)1328 bool sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
1329 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
1330 const void *src, int stride, int16_t src_dx, int16_t src_dy,
1331 const BoxRec *box, int nbox,
1332 uint32_t and, uint32_t or)
1333 {
1334 struct kgem *kgem = &sna->kgem;
1335 struct kgem_bo *src_bo;
1336 BoxRec extents;
1337 bool can_blt;
1338 void *ptr;
1339 int offset;
1340 int n, cmd, br13;
1341
1342 DBG(("%s x %d\n", __FUNCTION__, nbox));
1343
1344 if (upload_inplace__xor(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel) &&
1345 write_boxes_inplace__xor(kgem,
1346 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1347 dst_bo, dst_dx, dst_dy,
1348 box, nbox,
1349 and, or))
1350 return true;
1351
1352 if (wedged(sna))
1353 return false;
1354
1355 can_blt = kgem_bo_can_blt(kgem, dst_bo) &&
1356 (box[0].x2 - box[0].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1357 extents = box[0];
1358 for (n = 1; n < nbox; n++) {
1359 if (box[n].x1 < extents.x1)
1360 extents.x1 = box[n].x1;
1361 if (box[n].x2 > extents.x2)
1362 extents.x2 = box[n].x2;
1363
1364 if (can_blt)
1365 can_blt = (box[n].x2 - box[n].x1) * dst->drawable.bitsPerPixel < 8 * (MAXSHORT - 4);
1366
1367 if (box[n].y1 < extents.y1)
1368 extents.y1 = box[n].y1;
1369 if (box[n].y2 > extents.y2)
1370 extents.y2 = box[n].y2;
1371 }
1372
1373 /* Try to avoid switching rings... */
1374 if (!can_blt || kgem->ring == KGEM_RENDER ||
1375 upload_too_large(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) {
1376 DrawableRec tmp;
1377
1378 tmp.width = extents.x2 - extents.x1;
1379 tmp.height = extents.y2 - extents.y1;
1380 tmp.depth = dst->drawable.depth;
1381 tmp.bitsPerPixel = dst->drawable.bitsPerPixel;
1382
1383 assert(tmp.width);
1384 assert(tmp.height);
1385
1386 DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n",
1387 __FUNCTION__,
1388 extents.x1, extents.y1,
1389 tmp.width, tmp.height,
1390 sna->render.max_3d_size, sna->render.max_3d_size));
1391 if (must_tile(sna, tmp.width, tmp.height)) {
1392 BoxRec tile, stack[64], *clipped;
1393 int step;
1394
1395 tile:
1396 step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
1397 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
1398 while (step * step * 4 > sna->kgem.max_upload_tile_size)
1399 step /= 2;
1400
1401 DBG(("%s: tiling upload, using %dx%d tiles\n",
1402 __FUNCTION__, step, step));
1403 assert(step);
1404
1405 if (n > ARRAY_SIZE(stack)) {
1406 clipped = malloc(sizeof(BoxRec) * n);
1407 if (clipped == NULL)
1408 goto fallback;
1409 } else
1410 clipped = stack;
1411
1412 for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
1413 int y2 = tile.y1 + step;
1414 if (y2 > extents.y2)
1415 y2 = extents.y2;
1416 tile.y2 = y2;
1417
1418 for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
1419 int x2 = tile.x1 + step;
1420 if (x2 > extents.x2)
1421 x2 = extents.x2;
1422 tile.x2 = x2;
1423
1424 tmp.width = tile.x2 - tile.x1;
1425 tmp.height = tile.y2 - tile.y1;
1426
1427 src_bo = kgem_create_buffer_2d(kgem,
1428 tmp.width,
1429 tmp.height,
1430 tmp.bitsPerPixel,
1431 KGEM_BUFFER_WRITE_INPLACE,
1432 &ptr);
1433 if (!src_bo) {
1434 if (clipped != stack)
1435 free(clipped);
1436 goto fallback;
1437 }
1438
1439 if (sigtrap_get() == 0) {
1440 BoxRec *c = clipped;
1441 for (n = 0; n < nbox; n++) {
1442 *c = box[n];
1443 if (!box_intersect(c, &tile))
1444 continue;
1445
1446 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1447 __FUNCTION__,
1448 c->x1, c->y1,
1449 c->x2, c->y2,
1450 src_dx, src_dy,
1451 c->x1 - tile.x1,
1452 c->y1 - tile.y1));
1453 memcpy_xor(src, ptr, tmp.bitsPerPixel,
1454 stride, src_bo->pitch,
1455 c->x1 + src_dx,
1456 c->y1 + src_dy,
1457 c->x1 - tile.x1,
1458 c->y1 - tile.y1,
1459 c->x2 - c->x1,
1460 c->y2 - c->y1,
1461 and, or);
1462 c++;
1463 }
1464
1465 if (c != clipped)
1466 n = sna->render.copy_boxes(sna, GXcopy,
1467 &tmp, src_bo, -tile.x1, -tile.y1,
1468 &dst->drawable, dst_bo, dst_dx, dst_dy,
1469 clipped, c - clipped, 0);
1470 else
1471 n = 1;
1472
1473 sigtrap_put();
1474 } else
1475 n = 0;
1476
1477 kgem_bo_destroy(&sna->kgem, src_bo);
1478
1479 if (!n) {
1480 if (clipped != stack)
1481 free(clipped);
1482 goto fallback;
1483 }
1484 }
1485 }
1486
1487 if (clipped != stack)
1488 free(clipped);
1489 } else {
1490 src_bo = kgem_create_buffer_2d(kgem,
1491 tmp.width,
1492 tmp.height,
1493 tmp.bitsPerPixel,
1494 KGEM_BUFFER_WRITE_INPLACE,
1495 &ptr);
1496 if (!src_bo)
1497 goto fallback;
1498
1499 if (sigtrap_get() == 0) {
1500 for (n = 0; n < nbox; n++) {
1501 DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
1502 __FUNCTION__,
1503 box[n].x1, box[n].y1,
1504 box[n].x2, box[n].y2,
1505 src_dx, src_dy,
1506 box[n].x1 - extents.x1,
1507 box[n].y1 - extents.y1));
1508 memcpy_xor(src, ptr, tmp.bitsPerPixel,
1509 stride, src_bo->pitch,
1510 box[n].x1 + src_dx,
1511 box[n].y1 + src_dy,
1512 box[n].x1 - extents.x1,
1513 box[n].y1 - extents.y1,
1514 box[n].x2 - box[n].x1,
1515 box[n].y2 - box[n].y1,
1516 and, or);
1517 }
1518
1519 n = sna->render.copy_boxes(sna, GXcopy,
1520 &tmp, src_bo, -extents.x1, -extents.y1,
1521 &dst->drawable, dst_bo, dst_dx, dst_dy,
1522 box, nbox, 0);
1523 sigtrap_put();
1524 } else
1525 n = 0;
1526
1527 kgem_bo_destroy(&sna->kgem, src_bo);
1528
1529 if (!n)
1530 goto tile;
1531 }
1532
1533 return true;
1534 }
1535
1536 cmd = XY_SRC_COPY_BLT_CMD;
1537 br13 = dst_bo->pitch;
1538 if (kgem->gen >= 040 && dst_bo->tiling) {
1539 cmd |= BLT_DST_TILED;
1540 br13 >>= 2;
1541 }
1542 br13 |= 0xcc << 16;
1543 br13 |= sna_br13_color_depth(dst->drawable.bitsPerPixel);
1544 if (dst->drawable.bitsPerPixel == 32)
1545 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
1546
1547 kgem_set_mode(kgem, KGEM_BLT, dst_bo);
1548 if (!kgem_check_batch(kgem, 10) ||
1549 !kgem_check_reloc_and_exec(kgem, 2) ||
1550 !kgem_check_bo_fenced(kgem, dst_bo)) {
1551 kgem_submit(kgem);
1552 if (!kgem_check_bo_fenced(kgem, dst_bo))
1553 goto fallback;
1554 _kgem_set_mode(kgem, KGEM_BLT);
1555 }
1556 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1557
1558 if (sna->kgem.gen >= 0100) {
1559 cmd |= 8;
1560 do {
1561 int nbox_this_time, rem;
1562
1563 nbox_this_time = nbox;
1564 rem = kgem_batch_space(kgem);
1565 if (10*nbox_this_time > rem)
1566 nbox_this_time = rem / 10;
1567 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1568 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1569 assert(nbox_this_time);
1570 nbox -= nbox_this_time;
1571
1572 /* Count the total number of bytes to be read and allocate a
1573 * single buffer large enough. Or if it is very small, combine
1574 * with other allocations. */
1575 offset = 0;
1576 for (n = 0; n < nbox_this_time; n++) {
1577 int height = box[n].y2 - box[n].y1;
1578 int width = box[n].x2 - box[n].x1;
1579 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1580 }
1581
1582 src_bo = kgem_create_buffer(kgem, offset,
1583 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1584 &ptr);
1585 if (!src_bo)
1586 goto fallback;
1587
1588 if (sigtrap_get()) {
1589 kgem_bo_destroy(kgem, src_bo);
1590 goto fallback;
1591 }
1592
1593 offset = 0;
1594 do {
1595 int height = box->y2 - box->y1;
1596 int width = box->x2 - box->x1;
1597 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1598 uint32_t *b;
1599
1600 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1601 __FUNCTION__,
1602 box->x1 + src_dx, box->y1 + src_dy,
1603 box->x1 + dst_dx, box->y1 + dst_dy,
1604 width, height,
1605 offset, pitch));
1606
1607 assert(box->x1 + src_dx >= 0);
1608 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1609 assert(box->y1 + src_dy >= 0);
1610
1611 assert(box->x1 + dst_dx >= 0);
1612 assert(box->y1 + dst_dy >= 0);
1613
1614 memcpy_xor(src, (char *)ptr + offset,
1615 dst->drawable.bitsPerPixel,
1616 stride, pitch,
1617 box->x1 + src_dx, box->y1 + src_dy,
1618 0, 0,
1619 width, height,
1620 and, or);
1621
1622 assert(kgem->mode == KGEM_BLT);
1623 b = kgem->batch + kgem->nbatch;
1624 b[0] = cmd;
1625 b[1] = br13;
1626 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1627 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1628 *(uint64_t *)(b+4) =
1629 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1630 I915_GEM_DOMAIN_RENDER << 16 |
1631 I915_GEM_DOMAIN_RENDER |
1632 KGEM_RELOC_FENCED,
1633 0);
1634 b[6] = 0;
1635 b[7] = pitch;
1636 *(uint64_t *)(b+8) =
1637 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1638 I915_GEM_DOMAIN_RENDER << 16 |
1639 KGEM_RELOC_FENCED,
1640 offset);
1641 kgem->nbatch += 10;
1642
1643 box++;
1644 offset += pitch * height;
1645 } while (--nbox_this_time);
1646 assert(offset == __kgem_buffer_size(src_bo));
1647 sigtrap_put();
1648
1649 if (nbox) {
1650 _kgem_submit(kgem);
1651 _kgem_set_mode(kgem, KGEM_BLT);
1652 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1653 }
1654
1655 kgem_bo_destroy(kgem, src_bo);
1656 } while (nbox);
1657 } else {
1658 cmd |= 6;
1659 do {
1660 int nbox_this_time, rem;
1661
1662 nbox_this_time = nbox;
1663 rem = kgem_batch_space(kgem);
1664 if (8*nbox_this_time > rem)
1665 nbox_this_time = rem / 8;
1666 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1667 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
1668 assert(nbox_this_time);
1669 nbox -= nbox_this_time;
1670
1671 /* Count the total number of bytes to be read and allocate a
1672 * single buffer large enough. Or if it is very small, combine
1673 * with other allocations. */
1674 offset = 0;
1675 for (n = 0; n < nbox_this_time; n++) {
1676 int height = box[n].y2 - box[n].y1;
1677 int width = box[n].x2 - box[n].x1;
1678 offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
1679 }
1680
1681 src_bo = kgem_create_buffer(kgem, offset,
1682 KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
1683 &ptr);
1684 if (!src_bo)
1685 goto fallback;
1686
1687 if (sigtrap_get()) {
1688 kgem_bo_destroy(kgem, src_bo);
1689 goto fallback;
1690 }
1691
1692 offset = 0;
1693 do {
1694 int height = box->y2 - box->y1;
1695 int width = box->x2 - box->x1;
1696 int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
1697 uint32_t *b;
1698
1699 DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
1700 __FUNCTION__,
1701 box->x1 + src_dx, box->y1 + src_dy,
1702 box->x1 + dst_dx, box->y1 + dst_dy,
1703 width, height,
1704 offset, pitch));
1705
1706 assert(box->x1 + src_dx >= 0);
1707 assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
1708 assert(box->y1 + src_dy >= 0);
1709
1710 assert(box->x1 + dst_dx >= 0);
1711 assert(box->y1 + dst_dy >= 0);
1712
1713 memcpy_xor(src, (char *)ptr + offset,
1714 dst->drawable.bitsPerPixel,
1715 stride, pitch,
1716 box->x1 + src_dx, box->y1 + src_dy,
1717 0, 0,
1718 width, height,
1719 and, or);
1720
1721 assert(kgem->mode == KGEM_BLT);
1722 b = kgem->batch + kgem->nbatch;
1723 b[0] = cmd;
1724 b[1] = br13;
1725 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
1726 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
1727 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1728 I915_GEM_DOMAIN_RENDER << 16 |
1729 I915_GEM_DOMAIN_RENDER |
1730 KGEM_RELOC_FENCED,
1731 0);
1732 b[5] = 0;
1733 b[6] = pitch;
1734 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1735 I915_GEM_DOMAIN_RENDER << 16 |
1736 KGEM_RELOC_FENCED,
1737 offset);
1738 kgem->nbatch += 8;
1739
1740 box++;
1741 offset += pitch * height;
1742 } while (--nbox_this_time);
1743 assert(offset == __kgem_buffer_size(src_bo));
1744 sigtrap_put();
1745
1746 if (nbox) {
1747 _kgem_submit(kgem);
1748 _kgem_set_mode(kgem, KGEM_BLT);
1749 kgem_bcs_set_tiling(&sna->kgem, NULL, dst_bo);
1750 }
1751
1752 kgem_bo_destroy(kgem, src_bo);
1753 } while (nbox);
1754 }
1755
1756 sna->blt_state.fill_bo = 0;
1757 return true;
1758
1759 fallback:
1760 return write_boxes_inplace__xor(kgem,
1761 src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
1762 dst_bo, dst_dx, dst_dy,
1763 box, nbox,
1764 and, or);
1765 }
1766
1767 static bool
indirect_replace(struct sna * sna,PixmapPtr pixmap,struct kgem_bo * bo,const void * src,int stride)1768 indirect_replace(struct sna *sna,
1769 PixmapPtr pixmap,
1770 struct kgem_bo *bo,
1771 const void *src, int stride)
1772 {
1773 struct kgem *kgem = &sna->kgem;
1774 struct kgem_bo *src_bo;
1775 BoxRec box;
1776 void *ptr;
1777 bool ret;
1778
1779 DBG(("%s: size=%d vs %d\n",
1780 __FUNCTION__,
1781 stride * pixmap->drawable.height >> 12,
1782 kgem->half_cpu_cache_pages));
1783
1784 if (stride * pixmap->drawable.height >> 12 > kgem->half_cpu_cache_pages)
1785 return false;
1786
1787 if (!kgem_bo_can_blt(kgem, bo) &&
1788 must_tile(sna, pixmap->drawable.width, pixmap->drawable.height))
1789 return false;
1790
1791 src_bo = kgem_create_buffer_2d(kgem,
1792 pixmap->drawable.width,
1793 pixmap->drawable.height,
1794 pixmap->drawable.bitsPerPixel,
1795 KGEM_BUFFER_WRITE_INPLACE,
1796 &ptr);
1797 if (!src_bo)
1798 return false;
1799
1800 ret = false;
1801 if (sigtrap_get() == 0) {
1802 memcpy_blt(src, ptr, pixmap->drawable.bitsPerPixel,
1803 stride, src_bo->pitch,
1804 0, 0,
1805 0, 0,
1806 pixmap->drawable.width,
1807 pixmap->drawable.height);
1808
1809 box.x1 = box.y1 = 0;
1810 box.x2 = pixmap->drawable.width;
1811 box.y2 = pixmap->drawable.height;
1812
1813 ret = sna->render.copy_boxes(sna, GXcopy,
1814 &pixmap->drawable, src_bo, 0, 0,
1815 &pixmap->drawable, bo, 0, 0,
1816 &box, 1, 0);
1817 sigtrap_put();
1818 }
1819
1820 kgem_bo_destroy(kgem, src_bo);
1821
1822 return ret;
1823 }
1824
sna_replace(struct sna * sna,PixmapPtr pixmap,const void * src,int stride)1825 bool sna_replace(struct sna *sna, PixmapPtr pixmap,
1826 const void *src, int stride)
1827 {
1828 struct sna_pixmap *priv = sna_pixmap(pixmap);
1829 struct kgem_bo *bo = priv->gpu_bo;
1830 void *dst;
1831
1832 assert(bo);
1833 DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d) busy?=%d\n",
1834 __FUNCTION__, bo->handle,
1835 pixmap->drawable.width,
1836 pixmap->drawable.height,
1837 pixmap->drawable.bitsPerPixel,
1838 bo->tiling,
1839 __kgem_bo_is_busy(&sna->kgem, bo)));
1840
1841 assert(!priv->pinned);
1842
1843 kgem_bo_undo(&sna->kgem, bo);
1844
1845 if (__kgem_bo_is_busy(&sna->kgem, bo)) {
1846 struct kgem_bo *new_bo;
1847
1848 if (indirect_replace(sna, pixmap, bo, src, stride))
1849 return true;
1850
1851 new_bo = kgem_create_2d(&sna->kgem,
1852 pixmap->drawable.width,
1853 pixmap->drawable.height,
1854 pixmap->drawable.bitsPerPixel,
1855 bo->tiling,
1856 CREATE_GTT_MAP | CREATE_INACTIVE);
1857 if (new_bo)
1858 bo = new_bo;
1859 }
1860
1861 if (bo->tiling == I915_TILING_NONE && bo->pitch == stride &&
1862 kgem_bo_write(&sna->kgem, bo, src,
1863 (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
1864 goto done;
1865
1866 if (upload_inplace__tiled(&sna->kgem, bo)) {
1867 BoxRec box;
1868
1869 box.x1 = box.y1 = 0;
1870 box.x2 = pixmap->drawable.width;
1871 box.y2 = pixmap->drawable.height;
1872
1873 if (write_boxes_inplace__tiled(&sna->kgem, src,
1874 stride, pixmap->drawable.bitsPerPixel, 0, 0,
1875 bo, 0, 0, &box, 1))
1876 goto done;
1877 }
1878
1879 if (kgem_bo_can_map(&sna->kgem, bo) &&
1880 (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1881 sigtrap_get() == 0) {
1882 memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
1883 stride, bo->pitch,
1884 0, 0,
1885 0, 0,
1886 pixmap->drawable.width,
1887 pixmap->drawable.height);
1888 sigtrap_put();
1889 } else {
1890 BoxRec box;
1891
1892 if (bo != priv->gpu_bo) {
1893 kgem_bo_destroy(&sna->kgem, bo);
1894 bo = priv->gpu_bo;
1895 }
1896
1897 box.x1 = box.y1 = 0;
1898 box.x2 = pixmap->drawable.width;
1899 box.y2 = pixmap->drawable.height;
1900
1901 if (!sna_write_boxes(sna, pixmap,
1902 bo, 0, 0,
1903 src, stride, 0, 0,
1904 &box, 1))
1905 return false;
1906 }
1907
1908 done:
1909 if (bo != priv->gpu_bo) {
1910 sna_pixmap_unmap(pixmap, priv);
1911 kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1912 priv->gpu_bo = bo;
1913 }
1914
1915 return true;
1916 }
1917
1918 bool
sna_replace__xor(struct sna * sna,PixmapPtr pixmap,const void * src,int stride,uint32_t and,uint32_t or)1919 sna_replace__xor(struct sna *sna, PixmapPtr pixmap,
1920 const void *src, int stride,
1921 uint32_t and, uint32_t or)
1922 {
1923 struct sna_pixmap *priv = sna_pixmap(pixmap);
1924 struct kgem_bo *bo = priv->gpu_bo;
1925 void *dst;
1926
1927 DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
1928 __FUNCTION__, bo->handle,
1929 pixmap->drawable.width,
1930 pixmap->drawable.height,
1931 pixmap->drawable.bitsPerPixel,
1932 bo->tiling));
1933
1934 assert(!priv->pinned);
1935
1936 kgem_bo_undo(&sna->kgem, bo);
1937
1938 if (!kgem_bo_can_map(&sna->kgem, bo) ||
1939 __kgem_bo_is_busy(&sna->kgem, bo)) {
1940 struct kgem_bo *new_bo;
1941
1942 new_bo = kgem_create_2d(&sna->kgem,
1943 pixmap->drawable.width,
1944 pixmap->drawable.height,
1945 pixmap->drawable.bitsPerPixel,
1946 bo->tiling,
1947 CREATE_GTT_MAP | CREATE_INACTIVE);
1948 if (new_bo)
1949 bo = new_bo;
1950 }
1951
1952 if (kgem_bo_can_map(&sna->kgem, bo) &&
1953 (dst = kgem_bo_map(&sna->kgem, bo)) != NULL &&
1954 sigtrap_get() == 0) {
1955 memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
1956 stride, bo->pitch,
1957 0, 0,
1958 0, 0,
1959 pixmap->drawable.width,
1960 pixmap->drawable.height,
1961 and, or);
1962 sigtrap_put();
1963 } else {
1964 BoxRec box;
1965
1966 if (bo != priv->gpu_bo) {
1967 kgem_bo_destroy(&sna->kgem, bo);
1968 bo = priv->gpu_bo;
1969 }
1970
1971 box.x1 = box.y1 = 0;
1972 box.x2 = pixmap->drawable.width;
1973 box.y2 = pixmap->drawable.height;
1974
1975 if (!sna_write_boxes__xor(sna, pixmap,
1976 bo, 0, 0,
1977 src, stride, 0, 0,
1978 &box, 1,
1979 and, or))
1980 return false;
1981 }
1982
1983 if (bo != priv->gpu_bo) {
1984 sna_pixmap_unmap(pixmap, priv);
1985 kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
1986 priv->gpu_bo = bo;
1987 }
1988
1989 return true;
1990 }
1991