1 /*
2 * Based on code from intel_uxa.c and i830_xaa.c
3 * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
4 * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
5 * Copyright (c) 2009-2011 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 * Authors:
27 * Chris Wilson <chris@chris-wilson.co.uk>
28 *
29 */
30
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include "sna.h"
36 #include "sna_render.h"
37 #include "sna_render_inline.h"
38 #include "sna_reg.h"
39 #include "rop.h"
40
41 #define NO_BLT_COMPOSITE 0
42 #define NO_BLT_COPY 0
43 #define NO_BLT_COPY_BOXES 0
44 #define NO_BLT_FILL 0
45 #define NO_BLT_FILL_BOXES 0
46
47 #ifndef PICT_TYPE_BGRA
48 #define PICT_TYPE_BGRA 8
49 #endif
50
51 static const uint8_t copy_ROP[] = {
52 ROP_0, /* GXclear */
53 ROP_DSa, /* GXand */
54 ROP_SDna, /* GXandReverse */
55 ROP_S, /* GXcopy */
56 ROP_DSna, /* GXandInverted */
57 ROP_D, /* GXnoop */
58 ROP_DSx, /* GXxor */
59 ROP_DSo, /* GXor */
60 ROP_DSon, /* GXnor */
61 ROP_DSxn, /* GXequiv */
62 ROP_Dn, /* GXinvert */
63 ROP_SDno, /* GXorReverse */
64 ROP_Sn, /* GXcopyInverted */
65 ROP_DSno, /* GXorInverted */
66 ROP_DSan, /* GXnand */
67 ROP_1 /* GXset */
68 };
69
70 static const uint8_t fill_ROP[] = {
71 ROP_0,
72 ROP_DPa,
73 ROP_PDna,
74 ROP_P,
75 ROP_DPna,
76 ROP_D,
77 ROP_DPx,
78 ROP_DPo,
79 ROP_DPon,
80 ROP_PDxn,
81 ROP_Dn,
82 ROP_PDno,
83 ROP_Pn,
84 ROP_DPno,
85 ROP_DPan,
86 ROP_1
87 };
88
sig_done(struct sna * sna,const struct sna_composite_op * op)89 static void sig_done(struct sna *sna, const struct sna_composite_op *op)
90 {
91 sigtrap_put();
92 }
93
nop_done(struct sna * sna,const struct sna_composite_op * op)94 static void nop_done(struct sna *sna, const struct sna_composite_op *op)
95 {
96 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
97 if (sna->kgem.nexec > 1 && __kgem_ring_empty(&sna->kgem)) {
98 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
99 _kgem_submit(&sna->kgem);
100 }
101 (void)op;
102 }
103
gen6_blt_copy_done(struct sna * sna,const struct sna_composite_op * op)104 static void gen6_blt_copy_done(struct sna *sna, const struct sna_composite_op *op)
105 {
106 struct kgem *kgem = &sna->kgem;
107
108 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
109 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
110 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
111 _kgem_submit(kgem);
112 return;
113 }
114
115 if (kgem_check_batch(kgem, 3)) {
116 uint32_t *b = kgem->batch + kgem->nbatch;
117 assert(sna->kgem.mode == KGEM_BLT);
118 b[0] = XY_SETUP_CLIP;
119 b[1] = b[2] = 0;
120 kgem->nbatch += 3;
121 assert(kgem->nbatch < kgem->surface);
122 }
123 assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
124 (void)op;
125 }
126
sna_blt_fill_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * bo,int bpp,uint8_t alu,uint32_t pixel)127 static bool sna_blt_fill_init(struct sna *sna,
128 struct sna_blt_state *blt,
129 struct kgem_bo *bo,
130 int bpp,
131 uint8_t alu,
132 uint32_t pixel)
133 {
134 struct kgem *kgem = &sna->kgem;
135
136 assert(kgem_bo_can_blt (kgem, bo));
137 blt->bo[0] = bo;
138
139 blt->br13 = bo->pitch;
140 blt->cmd = XY_SCANLINE_BLT;
141 if (kgem->gen >= 040 && bo->tiling) {
142 blt->cmd |= BLT_DST_TILED;
143 blt->br13 >>= 2;
144 }
145 assert(blt->br13 <= MAXSHORT);
146
147 if (alu == GXclear)
148 pixel = 0;
149 else if (alu == GXcopy) {
150 if (pixel == 0)
151 alu = GXclear;
152 else if (pixel == -1)
153 alu = GXset;
154 }
155
156 blt->br13 |= 1<<31 | (fill_ROP[alu] << 16);
157 blt->br13 |= sna_br13_color_depth(bpp);
158
159 blt->pixel = pixel;
160 blt->bpp = bpp;
161 blt->alu = alu;
162
163 kgem_set_mode(kgem, KGEM_BLT, bo);
164 if (!kgem_check_batch(kgem, 14) ||
165 !kgem_check_bo_fenced(kgem, bo)) {
166 kgem_submit(kgem);
167 if (!kgem_check_bo_fenced(kgem, bo))
168 return false;
169 _kgem_set_mode(kgem, KGEM_BLT);
170 }
171
172 if (sna->blt_state.fill_bo != bo->unique_id ||
173 sna->blt_state.fill_pixel != pixel ||
174 sna->blt_state.fill_alu != alu)
175 {
176 uint32_t *b;
177
178 if (!kgem_check_batch(kgem, 24) ||
179 !kgem_check_reloc(kgem, 1)) {
180 _kgem_submit(kgem);
181 if (!kgem_check_bo_fenced(kgem, bo))
182 return false;
183 _kgem_set_mode(kgem, KGEM_BLT);
184 }
185 kgem_bcs_set_tiling(kgem, NULL, bo);
186
187 assert(sna->kgem.mode == KGEM_BLT);
188 b = kgem->batch + kgem->nbatch;
189 if (sna->kgem.gen >= 0100) {
190 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
191 if (bpp == 32)
192 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
193 if (bo->tiling)
194 b[0] |= BLT_DST_TILED;
195 b[1] = blt->br13;
196 b[2] = 0;
197 b[3] = 0;
198 *(uint64_t *)(b+4) =
199 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
200 I915_GEM_DOMAIN_RENDER << 16 |
201 I915_GEM_DOMAIN_RENDER |
202 KGEM_RELOC_FENCED,
203 0);
204 b[6] = pixel;
205 b[7] = pixel;
206 b[8] = 0;
207 b[9] = 0;
208 kgem->nbatch += 10;
209 } else {
210 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
211 if (bpp == 32)
212 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
213 if (bo->tiling && kgem->gen >= 040)
214 b[0] |= BLT_DST_TILED;
215 b[1] = blt->br13;
216 b[2] = 0;
217 b[3] = 0;
218 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
219 I915_GEM_DOMAIN_RENDER << 16 |
220 I915_GEM_DOMAIN_RENDER |
221 KGEM_RELOC_FENCED,
222 0);
223 b[5] = pixel;
224 b[6] = pixel;
225 b[7] = 0;
226 b[8] = 0;
227 kgem->nbatch += 9;
228 }
229 assert(kgem->nbatch < kgem->surface);
230
231 sna->blt_state.fill_bo = bo->unique_id;
232 sna->blt_state.fill_pixel = pixel;
233 sna->blt_state.fill_alu = alu;
234 }
235
236 assert(sna->kgem.mode == KGEM_BLT);
237 return true;
238 }
239
__sna_blt_fill_begin(struct sna * sna,const struct sna_blt_state * blt)240 noinline static void __sna_blt_fill_begin(struct sna *sna,
241 const struct sna_blt_state *blt)
242 {
243 struct kgem *kgem = &sna->kgem;
244 uint32_t *b;
245
246 kgem_bcs_set_tiling(&sna->kgem, NULL, blt->bo[0]);
247
248 assert(kgem->mode == KGEM_BLT);
249 b = kgem->batch + kgem->nbatch;
250 if (sna->kgem.gen >= 0100) {
251 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
252 if (blt->bpp == 32)
253 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
254 if (blt->bo[0]->tiling)
255 b[0] |= BLT_DST_TILED;
256 b[1] = blt->br13;
257 b[2] = 0;
258 b[3] = 0;
259 *(uint64_t *)(b+4) =
260 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
261 I915_GEM_DOMAIN_RENDER << 16 |
262 I915_GEM_DOMAIN_RENDER |
263 KGEM_RELOC_FENCED,
264 0);
265 b[6] = blt->pixel;
266 b[7] = blt->pixel;
267 b[8] = 0;
268 b[9] = 0;
269 kgem->nbatch += 10;
270 } else {
271 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
272 if (blt->bpp == 32)
273 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
274 if (blt->bo[0]->tiling && kgem->gen >= 040)
275 b[0] |= BLT_DST_TILED;
276 b[1] = blt->br13;
277 b[2] = 0;
278 b[3] = 0;
279 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
280 I915_GEM_DOMAIN_RENDER << 16 |
281 I915_GEM_DOMAIN_RENDER |
282 KGEM_RELOC_FENCED,
283 0);
284 b[5] = blt->pixel;
285 b[6] = blt->pixel;
286 b[7] = 0;
287 b[8] = 0;
288 kgem->nbatch += 9;
289 }
290 }
291
sna_blt_fill_begin(struct sna * sna,const struct sna_blt_state * blt)292 inline static void sna_blt_fill_begin(struct sna *sna,
293 const struct sna_blt_state *blt)
294 {
295 struct kgem *kgem = &sna->kgem;
296
297 if (kgem->nreloc) {
298 _kgem_submit(kgem);
299 _kgem_set_mode(kgem, KGEM_BLT);
300 kgem_bcs_set_tiling(kgem, NULL, blt->bo[0]);
301 assert(kgem->nbatch == 0);
302 }
303
304 __sna_blt_fill_begin(sna, blt);
305 }
306
sna_blt_fill_one(struct sna * sna,const struct sna_blt_state * blt,int16_t x,int16_t y,int16_t width,int16_t height)307 inline static void sna_blt_fill_one(struct sna *sna,
308 const struct sna_blt_state *blt,
309 int16_t x, int16_t y,
310 int16_t width, int16_t height)
311 {
312 struct kgem *kgem = &sna->kgem;
313 uint32_t *b;
314
315 DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
316 __FUNCTION__, x, y, width, height, blt->pixel));
317
318 assert(x >= 0);
319 assert(y >= 0);
320 assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
321
322 if (!kgem_check_batch(kgem, 3))
323 sna_blt_fill_begin(sna, blt);
324
325 assert(sna->kgem.mode == KGEM_BLT);
326 b = kgem->batch + kgem->nbatch;
327 kgem->nbatch += 3;
328 assert(kgem->nbatch < kgem->surface);
329
330 b[0] = blt->cmd;
331 b[1] = y << 16 | x;
332 b[2] = b[1] + (height << 16 | width);
333 }
334
sna_blt_copy_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * src,struct kgem_bo * dst,int bpp,uint8_t alu)335 static bool sna_blt_copy_init(struct sna *sna,
336 struct sna_blt_state *blt,
337 struct kgem_bo *src,
338 struct kgem_bo *dst,
339 int bpp,
340 uint8_t alu)
341 {
342 struct kgem *kgem = &sna->kgem;
343
344 assert(kgem_bo_can_blt(kgem, src));
345 assert(kgem_bo_can_blt(kgem, dst));
346
347 blt->bo[0] = src;
348 blt->bo[1] = dst;
349
350 blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
351 if (bpp == 32)
352 blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
353
354 blt->pitch[0] = src->pitch;
355 if (kgem->gen >= 040 && src->tiling) {
356 blt->cmd |= BLT_SRC_TILED;
357 blt->pitch[0] >>= 2;
358 }
359 assert(blt->pitch[0] <= MAXSHORT);
360
361 blt->pitch[1] = dst->pitch;
362 if (kgem->gen >= 040 && dst->tiling) {
363 blt->cmd |= BLT_DST_TILED;
364 blt->pitch[1] >>= 2;
365 }
366 assert(blt->pitch[1] <= MAXSHORT);
367
368 blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
369 blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
370 blt->br13 |= sna_br13_color_depth(bpp);
371
372 kgem_set_mode(kgem, KGEM_BLT, dst);
373 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
374 kgem_submit(kgem);
375 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
376 return false;
377 _kgem_set_mode(kgem, KGEM_BLT);
378 }
379 kgem_bcs_set_tiling(&sna->kgem, src, dst);
380
381 sna->blt_state.fill_bo = 0;
382 return true;
383 }
384
sna_blt_alpha_fixup_init(struct sna * sna,struct sna_blt_state * blt,struct kgem_bo * src,struct kgem_bo * dst,int bpp,uint32_t alpha)385 static bool sna_blt_alpha_fixup_init(struct sna *sna,
386 struct sna_blt_state *blt,
387 struct kgem_bo *src,
388 struct kgem_bo *dst,
389 int bpp, uint32_t alpha)
390 {
391 struct kgem *kgem = &sna->kgem;
392
393 DBG(("%s: dst handle=%d, src handle=%d, bpp=%d, fixup=%08x\n",
394 __FUNCTION__, dst->handle, src->handle, bpp, alpha));
395 assert(kgem_bo_can_blt(kgem, src));
396 assert(kgem_bo_can_blt(kgem, dst));
397
398 blt->bo[0] = src;
399 blt->bo[1] = dst;
400
401 blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
402 blt->pitch[0] = src->pitch;
403 if (kgem->gen >= 040 && src->tiling) {
404 blt->cmd |= BLT_SRC_TILED;
405 blt->pitch[0] >>= 2;
406 }
407 assert(blt->pitch[0] <= MAXSHORT);
408
409 blt->pitch[1] = dst->pitch;
410 if (kgem->gen >= 040 && dst->tiling) {
411 blt->cmd |= BLT_DST_TILED;
412 blt->pitch[1] >>= 2;
413 }
414 assert(blt->pitch[1] <= MAXSHORT);
415
416 blt->overwrites = 1;
417 blt->br13 = (0xfc << 16) | blt->pitch[1];
418 blt->br13 |= sna_br13_color_depth(bpp);
419 if (bpp == 32)
420 blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
421
422 blt->pixel = alpha;
423
424 kgem_set_mode(kgem, KGEM_BLT, dst);
425 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
426 kgem_submit(kgem);
427 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
428 return false;
429 _kgem_set_mode(kgem, KGEM_BLT);
430 }
431 kgem_bcs_set_tiling(&sna->kgem, src, dst);
432
433 sna->blt_state.fill_bo = 0;
434 return true;
435 }
436
sna_blt_alpha_fixup_one(struct sna * sna,const struct sna_blt_state * blt,int src_x,int src_y,int width,int height,int dst_x,int dst_y)437 static void sna_blt_alpha_fixup_one(struct sna *sna,
438 const struct sna_blt_state *blt,
439 int src_x, int src_y,
440 int width, int height,
441 int dst_x, int dst_y)
442 {
443 struct kgem *kgem = &sna->kgem;
444 uint32_t *b;
445
446 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
447 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
448
449 assert(src_x >= 0);
450 assert(src_y >= 0);
451 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
452 assert(dst_x >= 0);
453 assert(dst_y >= 0);
454 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
455 assert(width > 0);
456 assert(height > 0);
457
458 if (!kgem_check_batch(kgem, 14) ||
459 !kgem_check_reloc(kgem, 2)) {
460 _kgem_submit(kgem);
461 _kgem_set_mode(kgem, KGEM_BLT);
462 kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
463 }
464
465 assert(sna->kgem.mode == KGEM_BLT);
466 b = kgem->batch + kgem->nbatch;
467 b[0] = blt->cmd;
468 b[1] = blt->br13;
469 b[2] = (dst_y << 16) | dst_x;
470 b[3] = ((dst_y + height) << 16) | (dst_x + width);
471 if (sna->kgem.gen >= 0100) {
472 *(uint64_t *)(b+4) =
473 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
474 I915_GEM_DOMAIN_RENDER << 16 |
475 I915_GEM_DOMAIN_RENDER |
476 KGEM_RELOC_FENCED,
477 0);
478 b[6] = blt->pitch[0];
479 b[7] = (src_y << 16) | src_x;
480 *(uint64_t *)(b+8) =
481 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
482 I915_GEM_DOMAIN_RENDER << 16 |
483 KGEM_RELOC_FENCED,
484 0);
485 b[10] = blt->pixel;
486 b[11] = blt->pixel;
487 b[12] = 0;
488 b[13] = 0;
489 kgem->nbatch += 14;
490 } else {
491 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
492 I915_GEM_DOMAIN_RENDER << 16 |
493 I915_GEM_DOMAIN_RENDER |
494 KGEM_RELOC_FENCED,
495 0);
496 b[5] = blt->pitch[0];
497 b[6] = (src_y << 16) | src_x;
498 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
499 I915_GEM_DOMAIN_RENDER << 16 |
500 KGEM_RELOC_FENCED,
501 0);
502 b[8] = blt->pixel;
503 b[9] = blt->pixel;
504 b[10] = 0;
505 b[11] = 0;
506 kgem->nbatch += 12;
507 }
508 assert(kgem->nbatch < kgem->surface);
509 }
510
sna_blt_copy_one(struct sna * sna,const struct sna_blt_state * blt,int src_x,int src_y,int width,int height,int dst_x,int dst_y)511 static void sna_blt_copy_one(struct sna *sna,
512 const struct sna_blt_state *blt,
513 int src_x, int src_y,
514 int width, int height,
515 int dst_x, int dst_y)
516 {
517 struct kgem *kgem = &sna->kgem;
518 uint32_t *b;
519
520 DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
521 __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
522
523 assert(src_x >= 0);
524 assert(src_y >= 0);
525 assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
526 assert(dst_x >= 0);
527 assert(dst_y >= 0);
528 assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1]));
529 assert(width > 0);
530 assert(height > 0);
531
532 /* Compare against a previous fill */
533 if (blt->overwrites &&
534 kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
535 if (sna->kgem.gen >= 0100) {
536 if (kgem->nbatch >= 7 &&
537 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
538 kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
539 kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
540 DBG(("%s: replacing last fill\n", __FUNCTION__));
541 if (kgem_check_batch(kgem, 3)) {
542 assert(kgem->mode == KGEM_BLT);
543 b = kgem->batch + kgem->nbatch - 7;
544 b[0] = blt->cmd;
545 b[1] = blt->br13;
546 b[6] = (src_y << 16) | src_x;
547 b[7] = blt->pitch[0];
548 *(uint64_t *)(b+8) =
549 kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
550 I915_GEM_DOMAIN_RENDER << 16 |
551 KGEM_RELOC_FENCED,
552 0);
553 kgem->nbatch += 3;
554 assert(kgem->nbatch < kgem->surface);
555 return;
556 }
557 kgem->nbatch -= 7;
558 kgem->nreloc--;
559 }
560 } else {
561 if (kgem->nbatch >= 6 &&
562 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
563 kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
564 kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
565 DBG(("%s: replacing last fill\n", __FUNCTION__));
566 if (kgem_check_batch(kgem, 8-6)) {
567 assert(kgem->mode == KGEM_BLT);
568 b = kgem->batch + kgem->nbatch - 6;
569 b[0] = blt->cmd;
570 b[1] = blt->br13;
571 b[5] = (src_y << 16) | src_x;
572 b[6] = blt->pitch[0];
573 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
574 I915_GEM_DOMAIN_RENDER << 16 |
575 KGEM_RELOC_FENCED,
576 0);
577 kgem->nbatch += 8 - 6;
578 assert(kgem->nbatch < kgem->surface);
579 return;
580 }
581 kgem->nbatch -= 6;
582 kgem->nreloc--;
583 }
584 }
585 }
586
587 if (!kgem_check_batch(kgem, 10) ||
588 !kgem_check_reloc(kgem, 2)) {
589 _kgem_submit(kgem);
590 _kgem_set_mode(kgem, KGEM_BLT);
591 kgem_bcs_set_tiling(&sna->kgem, blt->bo[0], blt->bo[1]);
592 }
593
594 assert(sna->kgem.mode == KGEM_BLT);
595 b = kgem->batch + kgem->nbatch;
596 b[0] = blt->cmd;
597 b[1] = blt->br13;
598 b[2] = (dst_y << 16) | dst_x;
599 b[3] = ((dst_y + height) << 16) | (dst_x + width);
600 if (kgem->gen >= 0100) {
601 *(uint64_t *)(b+4) =
602 kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
603 I915_GEM_DOMAIN_RENDER << 16 |
604 I915_GEM_DOMAIN_RENDER |
605 KGEM_RELOC_FENCED,
606 0);
607 b[6] = (src_y << 16) | src_x;
608 b[7] = blt->pitch[0];
609 *(uint64_t *)(b+8) =
610 kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
611 I915_GEM_DOMAIN_RENDER << 16 |
612 KGEM_RELOC_FENCED,
613 0);
614 kgem->nbatch += 10;
615 } else {
616 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
617 I915_GEM_DOMAIN_RENDER << 16 |
618 I915_GEM_DOMAIN_RENDER |
619 KGEM_RELOC_FENCED,
620 0);
621 b[5] = (src_y << 16) | src_x;
622 b[6] = blt->pitch[0];
623 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
624 I915_GEM_DOMAIN_RENDER << 16 |
625 KGEM_RELOC_FENCED,
626 0);
627 kgem->nbatch += 8;
628 }
629 assert(kgem->nbatch < kgem->surface);
630 }
631
632 bool
sna_get_rgba_from_pixel(uint32_t pixel,uint16_t * red,uint16_t * green,uint16_t * blue,uint16_t * alpha,uint32_t format)633 sna_get_rgba_from_pixel(uint32_t pixel,
634 uint16_t *red,
635 uint16_t *green,
636 uint16_t *blue,
637 uint16_t *alpha,
638 uint32_t format)
639 {
640 int rbits, bbits, gbits, abits;
641 int rshift, bshift, gshift, ashift;
642
643 rbits = PICT_FORMAT_R(format);
644 gbits = PICT_FORMAT_G(format);
645 bbits = PICT_FORMAT_B(format);
646 abits = PICT_FORMAT_A(format);
647
648 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
649 rshift = gshift = bshift = ashift = 0;
650 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
651 bshift = 0;
652 gshift = bbits;
653 rshift = gshift + gbits;
654 ashift = rshift + rbits;
655 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
656 rshift = 0;
657 gshift = rbits;
658 bshift = gshift + gbits;
659 ashift = bshift + bbits;
660 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
661 ashift = 0;
662 rshift = abits;
663 if (abits == 0)
664 rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
665 gshift = rshift + rbits;
666 bshift = gshift + gbits;
667 } else {
668 return false;
669 }
670
671 if (rbits) {
672 *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
673 while (rbits < 16) {
674 *red |= *red >> rbits;
675 rbits <<= 1;
676 }
677 } else
678 *red = 0;
679
680 if (gbits) {
681 *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
682 while (gbits < 16) {
683 *green |= *green >> gbits;
684 gbits <<= 1;
685 }
686 } else
687 *green = 0;
688
689 if (bbits) {
690 *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
691 while (bbits < 16) {
692 *blue |= *blue >> bbits;
693 bbits <<= 1;
694 }
695 } else
696 *blue = 0;
697
698 if (abits) {
699 *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
700 while (abits < 16) {
701 *alpha |= *alpha >> abits;
702 abits <<= 1;
703 }
704 } else
705 *alpha = 0xffff;
706
707 return true;
708 }
709
710 bool
_sna_get_pixel_from_rgba(uint32_t * pixel,uint16_t red,uint16_t green,uint16_t blue,uint16_t alpha,uint32_t format)711 _sna_get_pixel_from_rgba(uint32_t * pixel,
712 uint16_t red,
713 uint16_t green,
714 uint16_t blue,
715 uint16_t alpha,
716 uint32_t format)
717 {
718 int rbits, bbits, gbits, abits;
719 int rshift, bshift, gshift, ashift;
720
721 rbits = PICT_FORMAT_R(format);
722 gbits = PICT_FORMAT_G(format);
723 bbits = PICT_FORMAT_B(format);
724 abits = PICT_FORMAT_A(format);
725 if (abits == 0)
726 abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
727
728 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
729 *pixel = alpha >> (16 - abits);
730 return true;
731 }
732
733 if (!PICT_FORMAT_COLOR(format))
734 return false;
735
736 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
737 bshift = 0;
738 gshift = bbits;
739 rshift = gshift + gbits;
740 ashift = rshift + rbits;
741 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
742 rshift = 0;
743 gshift = rbits;
744 bshift = gshift + gbits;
745 ashift = bshift + bbits;
746 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
747 ashift = 0;
748 rshift = abits;
749 gshift = rshift + rbits;
750 bshift = gshift + gbits;
751 } else
752 return false;
753
754 *pixel = 0;
755 *pixel |= (blue >> (16 - bbits)) << bshift;
756 *pixel |= (green >> (16 - gbits)) << gshift;
757 *pixel |= (red >> (16 - rbits)) << rshift;
758 *pixel |= (alpha >> (16 - abits)) << ashift;
759
760 return true;
761 }
762
763 uint32_t
sna_rgba_for_color(uint32_t color,int depth)764 sna_rgba_for_color(uint32_t color, int depth)
765 {
766 return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
767 }
768
769 uint32_t
sna_rgba_to_color(uint32_t rgba,uint32_t format)770 sna_rgba_to_color(uint32_t rgba, uint32_t format)
771 {
772 return color_convert(rgba, PICT_a8r8g8b8, format);
773 }
774
775 static uint32_t
get_pixel(PicturePtr picture)776 get_pixel(PicturePtr picture)
777 {
778 PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
779
780 DBG(("%s: %p\n", __FUNCTION__, pixmap));
781
782 if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
783 return 0;
784
785 switch (pixmap->drawable.bitsPerPixel) {
786 case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
787 case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
788 default: return *(uint8_t *)pixmap->devPrivate.ptr;
789 }
790 }
791
792 static uint32_t
get_solid_color(PicturePtr picture,uint32_t format)793 get_solid_color(PicturePtr picture, uint32_t format)
794 {
795 if (picture->pSourcePict) {
796 PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
797 return color_convert(fill->color, PICT_a8r8g8b8, format);
798 } else
799 return color_convert(get_pixel(picture), picture->format, format);
800 }
801
802 static bool
is_solid(PicturePtr picture)803 is_solid(PicturePtr picture)
804 {
805 if (picture->pSourcePict) {
806 if (picture->pSourcePict->type == SourcePictTypeSolidFill)
807 return true;
808 }
809
810 if (picture->pDrawable) {
811 if (picture->pDrawable->width == 1 &&
812 picture->pDrawable->height == 1 &&
813 picture->repeat)
814 return true;
815 }
816
817 return false;
818 }
819
820 bool
sna_picture_is_solid(PicturePtr picture,uint32_t * color)821 sna_picture_is_solid(PicturePtr picture, uint32_t *color)
822 {
823 if (!is_solid(picture))
824 return false;
825
826 if (color)
827 *color = get_solid_color(picture, PICT_a8r8g8b8);
828 return true;
829 }
830
831 static bool
pixel_is_transparent(uint32_t pixel,uint32_t format)832 pixel_is_transparent(uint32_t pixel, uint32_t format)
833 {
834 unsigned int abits;
835
836 abits = PICT_FORMAT_A(format);
837 if (!abits)
838 return false;
839
840 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
841 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
842 return (pixel & ((1 << abits) - 1)) == 0;
843 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
844 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
845 unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
846 return (pixel >> ashift) == 0;
847 } else
848 return false;
849 }
850
851 static bool
pixel_is_opaque(uint32_t pixel,uint32_t format)852 pixel_is_opaque(uint32_t pixel, uint32_t format)
853 {
854 unsigned int abits;
855
856 abits = PICT_FORMAT_A(format);
857 if (!abits)
858 return true;
859
860 if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
861 PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
862 return (pixel & ((1 << abits) - 1)) == (unsigned)((1 << abits) - 1);
863 } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
864 PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
865 unsigned int ashift = PICT_FORMAT_BPP(format) - abits;
866 return (pixel >> ashift) == (unsigned)((1 << abits) - 1);
867 } else
868 return false;
869 }
870
871 static bool
pixel_is_white(uint32_t pixel,uint32_t format)872 pixel_is_white(uint32_t pixel, uint32_t format)
873 {
874 switch (PICT_FORMAT_TYPE(format)) {
875 case PICT_TYPE_A:
876 case PICT_TYPE_ARGB:
877 case PICT_TYPE_ABGR:
878 case PICT_TYPE_BGRA:
879 return pixel == ((1U << PICT_FORMAT_BPP(format)) - 1);
880 default:
881 return false;
882 }
883 }
884
885 static bool
is_opaque_solid(PicturePtr picture)886 is_opaque_solid(PicturePtr picture)
887 {
888 if (picture->pSourcePict) {
889 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
890 return (fill->color >> 24) == 0xff;
891 } else
892 return pixel_is_opaque(get_pixel(picture), picture->format);
893 }
894
895 static bool
is_white(PicturePtr picture)896 is_white(PicturePtr picture)
897 {
898 if (picture->pSourcePict) {
899 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
900 return fill->color == 0xffffffff;
901 } else
902 return pixel_is_white(get_pixel(picture), picture->format);
903 }
904
905 static bool
is_transparent(PicturePtr picture)906 is_transparent(PicturePtr picture)
907 {
908 if (picture->pSourcePict) {
909 PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
910 return fill->color == 0;
911 } else
912 return pixel_is_transparent(get_pixel(picture), picture->format);
913 }
914
915 bool
sna_composite_mask_is_opaque(PicturePtr mask)916 sna_composite_mask_is_opaque(PicturePtr mask)
917 {
918 if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
919 return is_solid(mask) && is_white(mask);
920 else if (!PICT_FORMAT_A(mask->format))
921 return true;
922 else if (mask->pSourcePict) {
923 PictSolidFill *fill = (PictSolidFill *) mask->pSourcePict;
924 return (fill->color >> 24) == 0xff;
925 } else {
926 struct sna_pixmap *priv;
927 assert(mask->pDrawable);
928
929 if (mask->pDrawable->width == 1 &&
930 mask->pDrawable->height == 1 &&
931 mask->repeat)
932 return pixel_is_opaque(get_pixel(mask), mask->format);
933
934 if (mask->transform)
935 return false;
936
937 priv = sna_pixmap_from_drawable(mask->pDrawable);
938 if (priv == NULL || !priv->clear)
939 return false;
940
941 return pixel_is_opaque(priv->clear_color, mask->format);
942 }
943 }
944
945 fastcall
blt_composite_fill(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)946 static void blt_composite_fill(struct sna *sna,
947 const struct sna_composite_op *op,
948 const struct sna_composite_rectangles *r)
949 {
950 int x1, x2, y1, y2;
951
952 x1 = r->dst.x + op->dst.x;
953 y1 = r->dst.y + op->dst.y;
954 x2 = x1 + r->width;
955 y2 = y1 + r->height;
956
957 if (x1 < 0)
958 x1 = 0;
959 if (y1 < 0)
960 y1 = 0;
961
962 if (x2 > op->dst.width)
963 x2 = op->dst.width;
964 if (y2 > op->dst.height)
965 y2 = op->dst.height;
966
967 if (x2 <= x1 || y2 <= y1)
968 return;
969
970 sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
971 }
972
973 fastcall
blt_composite_fill__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)974 static void blt_composite_fill__cpu(struct sna *sna,
975 const struct sna_composite_op *op,
976 const struct sna_composite_rectangles *r)
977 {
978 int x1, x2, y1, y2;
979
980 x1 = r->dst.x + op->dst.x;
981 y1 = r->dst.y + op->dst.y;
982 x2 = x1 + r->width;
983 y2 = y1 + r->height;
984
985 if (x1 < 0)
986 x1 = 0;
987 if (y1 < 0)
988 y1 = 0;
989
990 if (x2 > op->dst.width)
991 x2 = op->dst.width;
992 if (y2 > op->dst.height)
993 y2 = op->dst.height;
994
995 if (x2 <= x1 || y2 <= y1)
996 return;
997
998 assert(op->dst.pixmap->devPrivate.ptr);
999 assert(op->dst.pixmap->devKind);
1000 sigtrap_assert_active();
1001 pixman_fill(op->dst.pixmap->devPrivate.ptr,
1002 op->dst.pixmap->devKind / sizeof(uint32_t),
1003 op->dst.pixmap->drawable.bitsPerPixel,
1004 x1, y1, x2-x1, y2-y1,
1005 op->u.blt.pixel);
1006 }
1007
1008 fastcall static void
blt_composite_fill_box_no_offset__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1009 blt_composite_fill_box_no_offset__cpu(struct sna *sna,
1010 const struct sna_composite_op *op,
1011 const BoxRec *box)
1012 {
1013 assert(box->x1 >= 0);
1014 assert(box->y1 >= 0);
1015 assert(box->x2 <= op->dst.pixmap->drawable.width);
1016 assert(box->y2 <= op->dst.pixmap->drawable.height);
1017
1018 assert(op->dst.pixmap->devPrivate.ptr);
1019 assert(op->dst.pixmap->devKind);
1020 sigtrap_assert_active();
1021 pixman_fill(op->dst.pixmap->devPrivate.ptr,
1022 op->dst.pixmap->devKind / sizeof(uint32_t),
1023 op->dst.pixmap->drawable.bitsPerPixel,
1024 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1025 op->u.blt.pixel);
1026 }
1027
1028 static void
blt_composite_fill_boxes_no_offset__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1029 blt_composite_fill_boxes_no_offset__cpu(struct sna *sna,
1030 const struct sna_composite_op *op,
1031 const BoxRec *box, int n)
1032 {
1033 do {
1034 assert(box->x1 >= 0);
1035 assert(box->y1 >= 0);
1036 assert(box->x2 <= op->dst.pixmap->drawable.width);
1037 assert(box->y2 <= op->dst.pixmap->drawable.height);
1038
1039 assert(op->dst.pixmap->devPrivate.ptr);
1040 assert(op->dst.pixmap->devKind);
1041 sigtrap_assert_active();
1042 pixman_fill(op->dst.pixmap->devPrivate.ptr,
1043 op->dst.pixmap->devKind / sizeof(uint32_t),
1044 op->dst.pixmap->drawable.bitsPerPixel,
1045 box->x1, box->y1, box->x2-box->x1, box->y2-box->y1,
1046 op->u.blt.pixel);
1047 box++;
1048 } while (--n);
1049 }
1050
1051 fastcall static void
blt_composite_fill_box__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1052 blt_composite_fill_box__cpu(struct sna *sna,
1053 const struct sna_composite_op *op,
1054 const BoxRec *box)
1055 {
1056 assert(box->x1 + op->dst.x >= 0);
1057 assert(box->y1 + op->dst.y >= 0);
1058 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1059 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1060
1061 assert(op->dst.pixmap->devPrivate.ptr);
1062 assert(op->dst.pixmap->devKind);
1063 sigtrap_assert_active();
1064 pixman_fill(op->dst.pixmap->devPrivate.ptr,
1065 op->dst.pixmap->devKind / sizeof(uint32_t),
1066 op->dst.pixmap->drawable.bitsPerPixel,
1067 box->x1 + op->dst.x, box->y1 + op->dst.y,
1068 box->x2 - box->x1, box->y2 - box->y1,
1069 op->u.blt.pixel);
1070 }
1071
1072 static void
blt_composite_fill_boxes__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1073 blt_composite_fill_boxes__cpu(struct sna *sna,
1074 const struct sna_composite_op *op,
1075 const BoxRec *box, int n)
1076 {
1077 do {
1078 assert(box->x1 + op->dst.x >= 0);
1079 assert(box->y1 + op->dst.y >= 0);
1080 assert(box->x2 + op->dst.x <= op->dst.pixmap->drawable.width);
1081 assert(box->y2 + op->dst.y <= op->dst.pixmap->drawable.height);
1082
1083 assert(op->dst.pixmap->devPrivate.ptr);
1084 assert(op->dst.pixmap->devKind);
1085 sigtrap_assert_active();
1086 pixman_fill(op->dst.pixmap->devPrivate.ptr,
1087 op->dst.pixmap->devKind / sizeof(uint32_t),
1088 op->dst.pixmap->drawable.bitsPerPixel,
1089 box->x1 + op->dst.x, box->y1 + op->dst.y,
1090 box->x2 - box->x1, box->y2 - box->y1,
1091 op->u.blt.pixel);
1092 box++;
1093 } while (--n);
1094 }
1095
_sna_blt_fill_box(struct sna * sna,const struct sna_blt_state * blt,const BoxRec * box)1096 inline static void _sna_blt_fill_box(struct sna *sna,
1097 const struct sna_blt_state *blt,
1098 const BoxRec *box)
1099 {
1100 struct kgem *kgem = &sna->kgem;
1101 uint32_t *b;
1102
1103 DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
1104 box->x1, box->y1, box->x2, box->y2,
1105 blt->pixel));
1106
1107 assert(box->x1 >= 0);
1108 assert(box->y1 >= 0);
1109 assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0]));
1110
1111 if (!kgem_check_batch(kgem, 3))
1112 sna_blt_fill_begin(sna, blt);
1113
1114 assert(sna->kgem.mode == KGEM_BLT);
1115 b = kgem->batch + kgem->nbatch;
1116 kgem->nbatch += 3;
1117 assert(kgem->nbatch < kgem->surface);
1118
1119 b[0] = blt->cmd;
1120 *(uint64_t *)(b+1) = *(const uint64_t *)box;
1121 }
1122
_sna_blt_fill_boxes(struct sna * sna,const struct sna_blt_state * blt,const BoxRec * box,int nbox)1123 inline static void _sna_blt_fill_boxes(struct sna *sna,
1124 const struct sna_blt_state *blt,
1125 const BoxRec *box,
1126 int nbox)
1127 {
1128 struct kgem *kgem = &sna->kgem;
1129 uint32_t cmd = blt->cmd;
1130
1131 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1132
1133 if (!kgem_check_batch(kgem, 3))
1134 sna_blt_fill_begin(sna, blt);
1135
1136 do {
1137 uint32_t *b = kgem->batch + kgem->nbatch;
1138 int nbox_this_time, rem;
1139
1140 assert(sna->kgem.mode == KGEM_BLT);
1141 nbox_this_time = nbox;
1142 rem = kgem_batch_space(kgem);
1143 if (3*nbox_this_time > rem)
1144 nbox_this_time = rem / 3;
1145 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1146 __FUNCTION__, nbox_this_time, nbox, rem));
1147 assert(nbox_this_time > 0);
1148 nbox -= nbox_this_time;
1149
1150 kgem->nbatch += 3 * nbox_this_time;
1151 assert(kgem->nbatch < kgem->surface);
1152 while (nbox_this_time >= 8) {
1153 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1154 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1155 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1156 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1157 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1158 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1159 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1160 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1161 b += 24;
1162 nbox_this_time -= 8;
1163 }
1164 if (nbox_this_time & 4) {
1165 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1166 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1167 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1168 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1169 b += 12;
1170 }
1171 if (nbox_this_time & 2) {
1172 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1173 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1174 b += 6;
1175 }
1176 if (nbox_this_time & 1) {
1177 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1178 }
1179
1180 if (!nbox)
1181 return;
1182
1183 sna_blt_fill_begin(sna, blt);
1184 } while (1);
1185 }
1186
_sna_blt_maybe_clear(const struct sna_composite_op * op,const BoxRec * box)1187 static inline void _sna_blt_maybe_clear(const struct sna_composite_op *op, const BoxRec *box)
1188 {
1189 if (box->x2 - box->x1 >= op->dst.width &&
1190 box->y2 - box->y1 >= op->dst.height) {
1191 struct sna_pixmap *priv = sna_pixmap(op->dst.pixmap);
1192 if (op->dst.bo == priv->gpu_bo) {
1193 sna_damage_all(&priv->gpu_damage, op->dst.pixmap);
1194 sna_damage_destroy(&priv->cpu_damage);
1195 priv->clear = true;
1196 priv->clear_color = op->u.blt.pixel;
1197 DBG(("%s: pixmap=%ld marking clear [%08x]\n",
1198 __FUNCTION__,
1199 op->dst.pixmap->drawable.serialNumber,
1200 op->u.blt.pixel));
1201 ((struct sna_composite_op *)op)->damage = NULL;
1202 }
1203 }
1204 }
1205
blt_composite_fill_box_no_offset(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1206 fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
1207 const struct sna_composite_op *op,
1208 const BoxRec *box)
1209 {
1210 _sna_blt_fill_box(sna, &op->u.blt, box);
1211 _sna_blt_maybe_clear(op, box);
1212 }
1213
blt_composite_fill_boxes_no_offset(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1214 static void blt_composite_fill_boxes_no_offset(struct sna *sna,
1215 const struct sna_composite_op *op,
1216 const BoxRec *box, int n)
1217 {
1218 _sna_blt_fill_boxes(sna, &op->u.blt, box, n);
1219 }
1220
blt_composite_fill_boxes_no_offset__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1221 static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
1222 const struct sna_composite_op *op,
1223 const BoxRec *box, int nbox)
1224 {
1225 struct kgem *kgem = &sna->kgem;
1226 const struct sna_blt_state *blt = &op->u.blt;
1227 uint32_t cmd = blt->cmd;
1228
1229 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1230
1231 sna_vertex_lock(&sna->render);
1232 assert(kgem->mode == KGEM_BLT);
1233 if (!kgem_check_batch(kgem, 3)) {
1234 sna_vertex_wait__locked(&sna->render);
1235 sna_blt_fill_begin(sna, blt);
1236 }
1237
1238 do {
1239 uint32_t *b = kgem->batch + kgem->nbatch;
1240 int nbox_this_time, rem;
1241
1242 assert(sna->kgem.mode == KGEM_BLT);
1243 nbox_this_time = nbox;
1244 rem = kgem_batch_space(kgem);
1245 if (3*nbox_this_time > rem)
1246 nbox_this_time = rem / 3;
1247 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1248 __FUNCTION__, nbox_this_time, nbox, rem));
1249 assert(nbox_this_time > 0);
1250 nbox -= nbox_this_time;
1251
1252 kgem->nbatch += 3 * nbox_this_time;
1253 assert(kgem->nbatch < kgem->surface);
1254 sna_vertex_acquire__locked(&sna->render);
1255 sna_vertex_unlock(&sna->render);
1256
1257 while (nbox_this_time >= 8) {
1258 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1259 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1260 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1261 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1262 b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
1263 b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
1264 b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
1265 b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
1266 b += 24;
1267 nbox_this_time -= 8;
1268 }
1269 if (nbox_this_time & 4) {
1270 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1271 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1272 b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
1273 b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
1274 b += 12;
1275 }
1276 if (nbox_this_time & 2) {
1277 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1278 b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
1279 b += 6;
1280 }
1281 if (nbox_this_time & 1) {
1282 b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
1283 }
1284
1285 sna_vertex_lock(&sna->render);
1286 sna_vertex_release__locked(&sna->render);
1287 if (!nbox)
1288 break;
1289
1290 sna_vertex_wait__locked(&sna->render);
1291 sna_blt_fill_begin(sna, blt);
1292 } while (1);
1293 sna_vertex_unlock(&sna->render);
1294 }
1295
blt_composite_fill_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1296 fastcall static void blt_composite_fill_box(struct sna *sna,
1297 const struct sna_composite_op *op,
1298 const BoxRec *box)
1299 {
1300 sna_blt_fill_one(sna, &op->u.blt,
1301 box->x1 + op->dst.x,
1302 box->y1 + op->dst.y,
1303 box->x2 - box->x1,
1304 box->y2 - box->y1);
1305 _sna_blt_maybe_clear(op, box);
1306 }
1307
blt_composite_fill_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1308 static void blt_composite_fill_boxes(struct sna *sna,
1309 const struct sna_composite_op *op,
1310 const BoxRec *box, int n)
1311 {
1312 do {
1313 sna_blt_fill_one(sna, &op->u.blt,
1314 box->x1 + op->dst.x, box->y1 + op->dst.y,
1315 box->x2 - box->x1, box->y2 - box->y1);
1316 box++;
1317 } while (--n);
1318 }
1319
add4(const BoxRec * b,int16_t x,int16_t y)1320 static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
1321 {
1322 union {
1323 uint64_t v;
1324 int16_t i[4];
1325 } vi;
1326 vi.v = *(uint64_t *)b;
1327 vi.i[0] += x;
1328 vi.i[1] += y;
1329 vi.i[2] += x;
1330 vi.i[3] += y;
1331 return vi.v;
1332 }
1333
blt_composite_fill_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1334 static void blt_composite_fill_boxes__thread(struct sna *sna,
1335 const struct sna_composite_op *op,
1336 const BoxRec *box, int nbox)
1337 {
1338 struct kgem *kgem = &sna->kgem;
1339 const struct sna_blt_state *blt = &op->u.blt;
1340 uint32_t cmd = blt->cmd;
1341 int16_t dx = op->dst.x;
1342 int16_t dy = op->dst.y;
1343
1344 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
1345
1346 sna_vertex_lock(&sna->render);
1347 assert(kgem->mode == KGEM_BLT);
1348 if (!kgem_check_batch(kgem, 3)) {
1349 sna_vertex_wait__locked(&sna->render);
1350 sna_blt_fill_begin(sna, blt);
1351 }
1352
1353 do {
1354 uint32_t *b = kgem->batch + kgem->nbatch;
1355 int nbox_this_time, rem;
1356
1357 assert(sna->kgem.mode == KGEM_BLT);
1358 nbox_this_time = nbox;
1359 rem = kgem_batch_space(kgem);
1360 if (3*nbox_this_time > rem)
1361 nbox_this_time = rem / 3;
1362 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1363 __FUNCTION__, nbox_this_time, nbox, rem));
1364 assert(nbox_this_time > 0);
1365 nbox -= nbox_this_time;
1366
1367 kgem->nbatch += 3 * nbox_this_time;
1368 assert(kgem->nbatch < kgem->surface);
1369 sna_vertex_acquire__locked(&sna->render);
1370 sna_vertex_unlock(&sna->render);
1371
1372 while (nbox_this_time >= 8) {
1373 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1374 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1375 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1376 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1377 b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
1378 b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
1379 b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
1380 b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
1381 b += 24;
1382 nbox_this_time -= 8;
1383 }
1384 if (nbox_this_time & 4) {
1385 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1386 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1387 b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
1388 b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
1389 b += 12;
1390 }
1391 if (nbox_this_time & 2) {
1392 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1393 b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
1394 b += 6;
1395 }
1396 if (nbox_this_time & 1) {
1397 b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
1398 }
1399
1400 sna_vertex_lock(&sna->render);
1401 sna_vertex_release__locked(&sna->render);
1402 if (!nbox)
1403 break;
1404
1405 sna_vertex_wait__locked(&sna->render);
1406 sna_blt_fill_begin(sna, blt);
1407 } while (1);
1408 sna_vertex_unlock(&sna->render);
1409 }
1410
1411 fastcall
blt_composite_nop(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1412 static void blt_composite_nop(struct sna *sna,
1413 const struct sna_composite_op *op,
1414 const struct sna_composite_rectangles *r)
1415 {
1416 }
1417
blt_composite_nop_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1418 fastcall static void blt_composite_nop_box(struct sna *sna,
1419 const struct sna_composite_op *op,
1420 const BoxRec *box)
1421 {
1422 }
1423
blt_composite_nop_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)1424 static void blt_composite_nop_boxes(struct sna *sna,
1425 const struct sna_composite_op *op,
1426 const BoxRec *box, int n)
1427 {
1428 }
1429
1430 static bool
begin_blt(struct sna * sna,struct sna_composite_op * op)1431 begin_blt(struct sna *sna,
1432 struct sna_composite_op *op)
1433 {
1434 assert(sna->kgem.mode == KGEM_BLT);
1435 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo)) {
1436 kgem_submit(&sna->kgem);
1437 if (!kgem_check_bo_fenced(&sna->kgem, op->dst.bo))
1438 return false;
1439
1440 _kgem_set_mode(&sna->kgem, KGEM_BLT);
1441 kgem_bcs_set_tiling(&sna->kgem, NULL, op->dst.bo);
1442 }
1443
1444 return true;
1445 }
1446
1447 static bool
prepare_blt_nop(struct sna * sna,struct sna_composite_op * op)1448 prepare_blt_nop(struct sna *sna,
1449 struct sna_composite_op *op)
1450 {
1451 DBG(("%s\n", __FUNCTION__));
1452
1453 op->blt = blt_composite_nop;
1454 op->box = blt_composite_nop_box;
1455 op->boxes = blt_composite_nop_boxes;
1456 op->done = nop_done;
1457 return true;
1458 }
1459
1460 static bool
prepare_blt_clear(struct sna * sna,struct sna_composite_op * op)1461 prepare_blt_clear(struct sna *sna,
1462 struct sna_composite_op *op)
1463 {
1464 DBG(("%s\n", __FUNCTION__));
1465
1466 if (op->dst.bo == NULL) {
1467 op->u.blt.pixel = 0;
1468 op->blt = blt_composite_fill__cpu;
1469 if (op->dst.x|op->dst.y) {
1470 op->box = blt_composite_fill_box__cpu;
1471 op->boxes = blt_composite_fill_boxes__cpu;
1472 op->thread_boxes = blt_composite_fill_boxes__cpu;
1473 } else {
1474 op->box = blt_composite_fill_box_no_offset__cpu;
1475 op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1476 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1477 }
1478 op->done = sig_done;
1479 return sigtrap_get() == 0;
1480 }
1481
1482 op->blt = blt_composite_fill;
1483 if (op->dst.x|op->dst.y) {
1484 op->box = blt_composite_fill_box;
1485 op->boxes = blt_composite_fill_boxes;
1486 op->thread_boxes = blt_composite_fill_boxes__thread;
1487 } else {
1488 op->box = blt_composite_fill_box_no_offset;
1489 op->boxes = blt_composite_fill_boxes_no_offset;
1490 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1491 }
1492 op->done = nop_done;
1493
1494 if (!sna_blt_fill_init(sna, &op->u.blt,
1495 op->dst.bo,
1496 op->dst.pixmap->drawable.bitsPerPixel,
1497 GXclear, 0))
1498 return false;
1499
1500 return begin_blt(sna, op);
1501 }
1502
1503 static bool
prepare_blt_fill(struct sna * sna,struct sna_composite_op * op,uint32_t pixel)1504 prepare_blt_fill(struct sna *sna,
1505 struct sna_composite_op *op,
1506 uint32_t pixel)
1507 {
1508 DBG(("%s\n", __FUNCTION__));
1509
1510 if (op->dst.bo == NULL) {
1511 op->u.blt.pixel = pixel;
1512 op->blt = blt_composite_fill__cpu;
1513 if (op->dst.x|op->dst.y) {
1514 op->box = blt_composite_fill_box__cpu;
1515 op->boxes = blt_composite_fill_boxes__cpu;
1516 op->thread_boxes = blt_composite_fill_boxes__cpu;
1517 } else {
1518 op->box = blt_composite_fill_box_no_offset__cpu;
1519 op->boxes = blt_composite_fill_boxes_no_offset__cpu;
1520 op->thread_boxes = blt_composite_fill_boxes_no_offset__cpu;
1521 }
1522 op->done = sig_done;
1523 return sigtrap_get() == 0;
1524 }
1525
1526 op->blt = blt_composite_fill;
1527 if (op->dst.x|op->dst.y) {
1528 op->box = blt_composite_fill_box;
1529 op->boxes = blt_composite_fill_boxes;
1530 op->thread_boxes = blt_composite_fill_boxes__thread;
1531 } else {
1532 op->box = blt_composite_fill_box_no_offset;
1533 op->boxes = blt_composite_fill_boxes_no_offset;
1534 op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
1535 }
1536 op->done = nop_done;
1537
1538 if (!sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
1539 op->dst.pixmap->drawable.bitsPerPixel,
1540 GXcopy, pixel))
1541 return false;
1542
1543 return begin_blt(sna, op);
1544 }
1545
1546 fastcall static void
blt_composite_copy(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1547 blt_composite_copy(struct sna *sna,
1548 const struct sna_composite_op *op,
1549 const struct sna_composite_rectangles *r)
1550 {
1551 int x1, x2, y1, y2;
1552 int src_x, src_y;
1553
1554 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1555 __FUNCTION__,
1556 r->src.x, r->src.y,
1557 r->dst.x, r->dst.y,
1558 r->width, r->height));
1559
1560 /* XXX higher layer should have clipped? */
1561
1562 x1 = r->dst.x + op->dst.x;
1563 y1 = r->dst.y + op->dst.y;
1564 x2 = x1 + r->width;
1565 y2 = y1 + r->height;
1566
1567 src_x = r->src.x - x1 + op->u.blt.sx;
1568 src_y = r->src.y - y1 + op->u.blt.sy;
1569
1570 /* clip against dst */
1571 if (x1 < 0)
1572 x1 = 0;
1573 if (y1 < 0)
1574 y1 = 0;
1575
1576 if (x2 > op->dst.width)
1577 x2 = op->dst.width;
1578
1579 if (y2 > op->dst.height)
1580 y2 = op->dst.height;
1581
1582 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1583
1584 if (x2 <= x1 || y2 <= y1)
1585 return;
1586
1587 sna_blt_copy_one(sna, &op->u.blt,
1588 x1 + src_x, y1 + src_y,
1589 x2 - x1, y2 - y1,
1590 x1, y1);
1591 }
1592
blt_composite_copy_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1593 fastcall static void blt_composite_copy_box(struct sna *sna,
1594 const struct sna_composite_op *op,
1595 const BoxRec *box)
1596 {
1597 DBG(("%s: box (%d, %d), (%d, %d)\n",
1598 __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1599 sna_blt_copy_one(sna, &op->u.blt,
1600 box->x1 + op->u.blt.sx,
1601 box->y1 + op->u.blt.sy,
1602 box->x2 - box->x1,
1603 box->y2 - box->y1,
1604 box->x1 + op->dst.x,
1605 box->y1 + op->dst.y);
1606 }
1607
blt_composite_copy_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1608 static void blt_composite_copy_boxes(struct sna *sna,
1609 const struct sna_composite_op *op,
1610 const BoxRec *box, int nbox)
1611 {
1612 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1613 do {
1614 DBG(("%s: box (%d, %d), (%d, %d)\n",
1615 __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1616 sna_blt_copy_one(sna, &op->u.blt,
1617 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1618 box->x2 - box->x1, box->y2 - box->y1,
1619 box->x1 + op->dst.x, box->y1 + op->dst.y);
1620 box++;
1621 } while(--nbox);
1622 }
1623
add2(uint32_t v,int16_t x,int16_t y)1624 static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
1625 {
1626 x += v & 0xffff;
1627 y += v >> 16;
1628 return (uint16_t)y << 16 | x;
1629 }
1630
blt_composite_copy_boxes__thread(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1631 static void blt_composite_copy_boxes__thread(struct sna *sna,
1632 const struct sna_composite_op *op,
1633 const BoxRec *box, int nbox)
1634 {
1635 struct kgem *kgem = &sna->kgem;
1636 int dst_dx = op->dst.x;
1637 int dst_dy = op->dst.y;
1638 int src_dx = op->src.offset[0];
1639 int src_dy = op->src.offset[1];
1640 uint32_t cmd = op->u.blt.cmd;
1641 uint32_t br13 = op->u.blt.br13;
1642 struct kgem_bo *src_bo = op->u.blt.bo[0];
1643 struct kgem_bo *dst_bo = op->u.blt.bo[1];
1644 int src_pitch = op->u.blt.pitch[0];
1645
1646 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1647
1648 sna_vertex_lock(&sna->render);
1649
1650 if ((dst_dx | dst_dy) == 0) {
1651 uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1652 do {
1653 int nbox_this_time, rem;
1654
1655 nbox_this_time = nbox;
1656 rem = kgem_batch_space(kgem);
1657 if (8*nbox_this_time > rem)
1658 nbox_this_time = rem / 8;
1659 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1660 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1661 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1662 __FUNCTION__, nbox_this_time, nbox, rem));
1663 assert(nbox_this_time > 0);
1664 nbox -= nbox_this_time;
1665
1666 assert(sna->kgem.mode == KGEM_BLT);
1667 do {
1668 uint32_t *b = kgem->batch + kgem->nbatch;
1669
1670 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
1671 __FUNCTION__,
1672 box->x1, box->y1,
1673 box->x2 - box->x1, box->y2 - box->y1));
1674
1675 assert(box->x1 + src_dx >= 0);
1676 assert(box->y1 + src_dy >= 0);
1677 assert(box->x1 + src_dx <= INT16_MAX);
1678 assert(box->y1 + src_dy <= INT16_MAX);
1679
1680 assert(box->x1 >= 0);
1681 assert(box->y1 >= 0);
1682
1683 *(uint64_t *)&b[0] = hdr;
1684 *(uint64_t *)&b[2] = *(const uint64_t *)box;
1685 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1686 I915_GEM_DOMAIN_RENDER << 16 |
1687 I915_GEM_DOMAIN_RENDER |
1688 KGEM_RELOC_FENCED,
1689 0);
1690 b[5] = add2(b[2], src_dx, src_dy);
1691 b[6] = src_pitch;
1692 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1693 I915_GEM_DOMAIN_RENDER << 16 |
1694 KGEM_RELOC_FENCED,
1695 0);
1696 kgem->nbatch += 8;
1697 assert(kgem->nbatch < kgem->surface);
1698 box++;
1699 } while (--nbox_this_time);
1700
1701 if (!nbox)
1702 break;
1703
1704 _kgem_submit(kgem);
1705 _kgem_set_mode(kgem, KGEM_BLT);
1706 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1707 } while (1);
1708 } else {
1709 do {
1710 int nbox_this_time, rem;
1711
1712 nbox_this_time = nbox;
1713 rem = kgem_batch_space(kgem);
1714 if (8*nbox_this_time > rem)
1715 nbox_this_time = rem / 8;
1716 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1717 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1718 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1719 __FUNCTION__, nbox_this_time, nbox, rem));
1720 assert(nbox_this_time > 0);
1721 nbox -= nbox_this_time;
1722
1723 assert(sna->kgem.mode == KGEM_BLT);
1724 do {
1725 uint32_t *b = kgem->batch + kgem->nbatch;
1726
1727 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
1728 __FUNCTION__,
1729 box->x1, box->y1,
1730 box->x2 - box->x1, box->y2 - box->y1));
1731
1732 assert(box->x1 + src_dx >= 0);
1733 assert(box->y1 + src_dy >= 0);
1734
1735 assert(box->x1 + dst_dx >= 0);
1736 assert(box->y1 + dst_dy >= 0);
1737
1738 b[0] = cmd;
1739 b[1] = br13;
1740 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1741 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1742 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
1743 I915_GEM_DOMAIN_RENDER << 16 |
1744 I915_GEM_DOMAIN_RENDER |
1745 KGEM_RELOC_FENCED,
1746 0);
1747 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1748 b[6] = src_pitch;
1749 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
1750 I915_GEM_DOMAIN_RENDER << 16 |
1751 KGEM_RELOC_FENCED,
1752 0);
1753 kgem->nbatch += 8;
1754 assert(kgem->nbatch < kgem->surface);
1755 box++;
1756 } while (--nbox_this_time);
1757
1758 if (!nbox)
1759 break;
1760
1761 _kgem_submit(kgem);
1762 _kgem_set_mode(kgem, KGEM_BLT);
1763 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1764 } while (1);
1765 }
1766 sna_vertex_unlock(&sna->render);
1767 }
1768
blt_composite_copy_boxes__thread64(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1769 static void blt_composite_copy_boxes__thread64(struct sna *sna,
1770 const struct sna_composite_op *op,
1771 const BoxRec *box, int nbox)
1772 {
1773 struct kgem *kgem = &sna->kgem;
1774 int dst_dx = op->dst.x;
1775 int dst_dy = op->dst.y;
1776 int src_dx = op->src.offset[0];
1777 int src_dy = op->src.offset[1];
1778 uint32_t cmd = op->u.blt.cmd;
1779 uint32_t br13 = op->u.blt.br13;
1780 struct kgem_bo *src_bo = op->u.blt.bo[0];
1781 struct kgem_bo *dst_bo = op->u.blt.bo[1];
1782 int src_pitch = op->u.blt.pitch[0];
1783
1784 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1785
1786 sna_vertex_lock(&sna->render);
1787
1788 if ((dst_dx | dst_dy) == 0) {
1789 uint64_t hdr = (uint64_t)br13 << 32 | cmd;
1790 do {
1791 int nbox_this_time, rem;
1792
1793 nbox_this_time = nbox;
1794 rem = kgem_batch_space(kgem);
1795 if (10*nbox_this_time > rem)
1796 nbox_this_time = rem / 10;
1797 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1798 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1799 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1800 __FUNCTION__, nbox_this_time, nbox, rem));
1801 assert(nbox_this_time > 0);
1802 nbox -= nbox_this_time;
1803
1804 assert(kgem->mode == KGEM_BLT);
1805 do {
1806 uint32_t *b = kgem->batch + kgem->nbatch;
1807
1808 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
1809 __FUNCTION__,
1810 box->x1, box->y1,
1811 box->x2 - box->x1, box->y2 - box->y1));
1812
1813 assert(box->x1 + src_dx >= 0);
1814 assert(box->y1 + src_dy >= 0);
1815 assert(box->x1 + src_dx <= INT16_MAX);
1816 assert(box->y1 + src_dy <= INT16_MAX);
1817
1818 assert(box->x1 >= 0);
1819 assert(box->y1 >= 0);
1820
1821 *(uint64_t *)&b[0] = hdr;
1822 *(uint64_t *)&b[2] = *(const uint64_t *)box;
1823 *(uint64_t *)(b+4) =
1824 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1825 I915_GEM_DOMAIN_RENDER << 16 |
1826 I915_GEM_DOMAIN_RENDER |
1827 KGEM_RELOC_FENCED,
1828 0);
1829 b[6] = add2(b[2], src_dx, src_dy);
1830 b[7] = src_pitch;
1831 *(uint64_t *)(b+8) =
1832 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1833 I915_GEM_DOMAIN_RENDER << 16 |
1834 KGEM_RELOC_FENCED,
1835 0);
1836 kgem->nbatch += 10;
1837 assert(kgem->nbatch < kgem->surface);
1838 box++;
1839 } while (--nbox_this_time);
1840
1841 if (!nbox)
1842 break;
1843
1844 _kgem_submit(kgem);
1845 _kgem_set_mode(kgem, KGEM_BLT);
1846 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1847 } while (1);
1848 } else {
1849 do {
1850 int nbox_this_time, rem;
1851
1852 nbox_this_time = nbox;
1853 rem = kgem_batch_space(kgem);
1854 if (10*nbox_this_time > rem)
1855 nbox_this_time = rem / 10;
1856 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
1857 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
1858 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
1859 __FUNCTION__, nbox_this_time, nbox, rem));
1860 assert(nbox_this_time > 0);
1861 nbox -= nbox_this_time;
1862
1863 assert(kgem->mode == KGEM_BLT);
1864 do {
1865 uint32_t *b = kgem->batch + kgem->nbatch;
1866
1867 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
1868 __FUNCTION__,
1869 box->x1, box->y1,
1870 box->x2 - box->x1, box->y2 - box->y1));
1871
1872 assert(box->x1 + src_dx >= 0);
1873 assert(box->y1 + src_dy >= 0);
1874
1875 assert(box->x1 + dst_dx >= 0);
1876 assert(box->y1 + dst_dy >= 0);
1877
1878 b[0] = cmd;
1879 b[1] = br13;
1880 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
1881 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
1882 *(uint64_t *)(b+4) =
1883 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
1884 I915_GEM_DOMAIN_RENDER << 16 |
1885 I915_GEM_DOMAIN_RENDER |
1886 KGEM_RELOC_FENCED,
1887 0);
1888 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
1889 b[7] = src_pitch;
1890 *(uint64_t *)(b+8) =
1891 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
1892 I915_GEM_DOMAIN_RENDER << 16 |
1893 KGEM_RELOC_FENCED,
1894 0);
1895 kgem->nbatch += 10;
1896 assert(kgem->nbatch < kgem->surface);
1897 box++;
1898 } while (--nbox_this_time);
1899
1900 if (!nbox)
1901 break;
1902
1903 _kgem_submit(kgem);
1904 _kgem_set_mode(kgem, KGEM_BLT);
1905 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
1906 } while (1);
1907 }
1908 sna_vertex_unlock(&sna->render);
1909 }
1910
1911 fastcall static void
blt_composite_copy_with_alpha(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)1912 blt_composite_copy_with_alpha(struct sna *sna,
1913 const struct sna_composite_op *op,
1914 const struct sna_composite_rectangles *r)
1915 {
1916 int x1, x2, y1, y2;
1917 int src_x, src_y;
1918
1919 DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
1920 __FUNCTION__,
1921 r->src.x, r->src.y,
1922 r->dst.x, r->dst.y,
1923 r->width, r->height));
1924
1925 /* XXX higher layer should have clipped? */
1926
1927 x1 = r->dst.x + op->dst.x;
1928 y1 = r->dst.y + op->dst.y;
1929 x2 = x1 + r->width;
1930 y2 = y1 + r->height;
1931
1932 src_x = r->src.x - x1 + op->u.blt.sx;
1933 src_y = r->src.y - y1 + op->u.blt.sy;
1934
1935 /* clip against dst */
1936 if (x1 < 0)
1937 x1 = 0;
1938 if (y1 < 0)
1939 y1 = 0;
1940
1941 if (x2 > op->dst.width)
1942 x2 = op->dst.width;
1943
1944 if (y2 > op->dst.height)
1945 y2 = op->dst.height;
1946
1947 DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
1948
1949 if (x2 <= x1 || y2 <= y1)
1950 return;
1951
1952 sna_blt_alpha_fixup_one(sna, &op->u.blt,
1953 x1 + src_x, y1 + src_y,
1954 x2 - x1, y2 - y1,
1955 x1, y1);
1956 }
1957
1958 fastcall static void
blt_composite_copy_box_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)1959 blt_composite_copy_box_with_alpha(struct sna *sna,
1960 const struct sna_composite_op *op,
1961 const BoxRec *box)
1962 {
1963 DBG(("%s: box (%d, %d), (%d, %d)\n",
1964 __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1965 sna_blt_alpha_fixup_one(sna, &op->u.blt,
1966 box->x1 + op->u.blt.sx,
1967 box->y1 + op->u.blt.sy,
1968 box->x2 - box->x1,
1969 box->y2 - box->y1,
1970 box->x1 + op->dst.x,
1971 box->y1 + op->dst.y);
1972 }
1973
1974 static void
blt_composite_copy_boxes_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int nbox)1975 blt_composite_copy_boxes_with_alpha(struct sna *sna,
1976 const struct sna_composite_op *op,
1977 const BoxRec *box, int nbox)
1978 {
1979 DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1980 do {
1981 DBG(("%s: box (%d, %d), (%d, %d)\n",
1982 __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
1983 sna_blt_alpha_fixup_one(sna, &op->u.blt,
1984 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
1985 box->x2 - box->x1, box->y2 - box->y1,
1986 box->x1 + op->dst.x, box->y1 + op->dst.y);
1987 box++;
1988 } while(--nbox);
1989 }
1990
1991 static bool
prepare_blt_copy(struct sna * sna,struct sna_composite_op * op,struct kgem_bo * bo,uint32_t alpha_fixup)1992 prepare_blt_copy(struct sna *sna,
1993 struct sna_composite_op *op,
1994 struct kgem_bo *bo,
1995 uint32_t alpha_fixup)
1996 {
1997 PixmapPtr src = op->u.blt.src_pixmap;
1998
1999 assert(op->dst.bo);
2000 assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
2001 assert(kgem_bo_can_blt(&sna->kgem, bo));
2002
2003 kgem_set_mode(&sna->kgem, KGEM_BLT, op->dst.bo);
2004 if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) {
2005 kgem_submit(&sna->kgem);
2006 if (!kgem_check_many_bo_fenced(&sna->kgem,
2007 op->dst.bo, bo, NULL)) {
2008 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
2009 return sna_tiling_blt_composite(sna, op, bo,
2010 src->drawable.bitsPerPixel,
2011 alpha_fixup);
2012 }
2013 _kgem_set_mode(&sna->kgem, KGEM_BLT);
2014 }
2015 kgem_bcs_set_tiling(&sna->kgem, bo, op->dst.bo);
2016
2017 DBG(("%s\n", __FUNCTION__));
2018
2019 if (sna->kgem.gen >= 060 && op->dst.bo == bo)
2020 op->done = gen6_blt_copy_done;
2021 else
2022 op->done = nop_done;
2023
2024 if (alpha_fixup) {
2025 op->blt = blt_composite_copy_with_alpha;
2026 op->box = blt_composite_copy_box_with_alpha;
2027 op->boxes = blt_composite_copy_boxes_with_alpha;
2028
2029 if (!sna_blt_alpha_fixup_init(sna, &op->u.blt, bo, op->dst.bo,
2030 src->drawable.bitsPerPixel,
2031 alpha_fixup))
2032 return false;
2033 } else {
2034 op->blt = blt_composite_copy;
2035 op->box = blt_composite_copy_box;
2036 op->boxes = blt_composite_copy_boxes;
2037 if (sna->kgem.gen >= 0100)
2038 op->thread_boxes = blt_composite_copy_boxes__thread64;
2039 else
2040 op->thread_boxes = blt_composite_copy_boxes__thread;
2041
2042 if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
2043 src->drawable.bitsPerPixel,
2044 GXcopy))
2045 return false;
2046 }
2047
2048 return true;
2049 }
2050
2051 fastcall static void
blt_put_composite__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2052 blt_put_composite__cpu(struct sna *sna,
2053 const struct sna_composite_op *op,
2054 const struct sna_composite_rectangles *r)
2055 {
2056 PixmapPtr dst = op->dst.pixmap;
2057 PixmapPtr src = op->u.blt.src_pixmap;
2058 assert(src->devPrivate.ptr);
2059 assert(src->devKind);
2060 assert(dst->devPrivate.ptr);
2061 assert(dst->devKind);
2062 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2063 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2064 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2065 r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2066 r->width, r->height);
2067 }
2068
2069 fastcall static void
blt_put_composite_box__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2070 blt_put_composite_box__cpu(struct sna *sna,
2071 const struct sna_composite_op *op,
2072 const BoxRec *box)
2073 {
2074 PixmapPtr dst = op->dst.pixmap;
2075 PixmapPtr src = op->u.blt.src_pixmap;
2076 assert(src->devPrivate.ptr);
2077 assert(src->devKind);
2078 assert(dst->devPrivate.ptr);
2079 assert(dst->devKind);
2080 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2081 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2082 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2083 box->x1 + op->dst.x, box->y1 + op->dst.y,
2084 box->x2-box->x1, box->y2-box->y1);
2085 }
2086
2087 static void
blt_put_composite_boxes__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2088 blt_put_composite_boxes__cpu(struct sna *sna,
2089 const struct sna_composite_op *op,
2090 const BoxRec *box, int n)
2091 {
2092 PixmapPtr dst = op->dst.pixmap;
2093 PixmapPtr src = op->u.blt.src_pixmap;
2094 assert(src->devPrivate.ptr);
2095 assert(src->devKind);
2096 assert(dst->devPrivate.ptr);
2097 assert(dst->devKind);
2098 do {
2099 memcpy_blt(src->devPrivate.ptr, dst->devPrivate.ptr,
2100 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2101 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2102 box->x1 + op->dst.x, box->y1 + op->dst.y,
2103 box->x2-box->x1, box->y2-box->y1);
2104 box++;
2105 } while (--n);
2106 }
2107
2108 fastcall static void
blt_put_composite_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2109 blt_put_composite_with_alpha__cpu(struct sna *sna,
2110 const struct sna_composite_op *op,
2111 const struct sna_composite_rectangles *r)
2112 {
2113 PixmapPtr dst = op->dst.pixmap;
2114 PixmapPtr src = op->u.blt.src_pixmap;
2115 assert(src->devPrivate.ptr);
2116 assert(src->devKind);
2117 assert(dst->devPrivate.ptr);
2118 assert(dst->devKind);
2119 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2120 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2121 r->src.x + op->u.blt.sx, r->src.y + op->u.blt.sy,
2122 r->dst.x + op->dst.x, r->dst.y + op->dst.y,
2123 r->width, r->height,
2124 0xffffffff, op->u.blt.pixel);
2125
2126 }
2127
2128 fastcall static void
blt_put_composite_box_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2129 blt_put_composite_box_with_alpha__cpu(struct sna *sna,
2130 const struct sna_composite_op *op,
2131 const BoxRec *box)
2132 {
2133 PixmapPtr dst = op->dst.pixmap;
2134 PixmapPtr src = op->u.blt.src_pixmap;
2135 assert(src->devPrivate.ptr);
2136 assert(src->devKind);
2137 assert(dst->devPrivate.ptr);
2138 assert(dst->devKind);
2139 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2140 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2141 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2142 box->x1 + op->dst.x, box->y1 + op->dst.y,
2143 box->x2-box->x1, box->y2-box->y1,
2144 0xffffffff, op->u.blt.pixel);
2145 }
2146
2147 static void
blt_put_composite_boxes_with_alpha__cpu(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2148 blt_put_composite_boxes_with_alpha__cpu(struct sna *sna,
2149 const struct sna_composite_op *op,
2150 const BoxRec *box, int n)
2151 {
2152 PixmapPtr dst = op->dst.pixmap;
2153 PixmapPtr src = op->u.blt.src_pixmap;
2154 assert(src->devPrivate.ptr);
2155 assert(src->devKind);
2156 assert(dst->devPrivate.ptr);
2157 assert(dst->devKind);
2158 do {
2159 memcpy_xor(src->devPrivate.ptr, dst->devPrivate.ptr,
2160 src->drawable.bitsPerPixel, src->devKind, dst->devKind,
2161 box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
2162 box->x1 + op->dst.x, box->y1 + op->dst.y,
2163 box->x2-box->x1, box->y2-box->y1,
2164 0xffffffff, op->u.blt.pixel);
2165 box++;
2166 } while (--n);
2167 }
2168
2169 fastcall static void
blt_put_composite(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2170 blt_put_composite(struct sna *sna,
2171 const struct sna_composite_op *op,
2172 const struct sna_composite_rectangles *r)
2173 {
2174 PixmapPtr dst = op->dst.pixmap;
2175 PixmapPtr src = op->u.blt.src_pixmap;
2176 struct sna_pixmap *dst_priv = sna_pixmap(dst);
2177 int pitch = src->devKind;
2178 char *data = src->devPrivate.ptr;
2179 int bpp = src->drawable.bitsPerPixel;
2180
2181 int16_t dst_x = r->dst.x + op->dst.x;
2182 int16_t dst_y = r->dst.y + op->dst.y;
2183 int16_t src_x = r->src.x + op->u.blt.sx;
2184 int16_t src_y = r->src.y + op->u.blt.sy;
2185
2186 if (!dst_priv->pinned &&
2187 dst_x <= 0 && dst_y <= 0 &&
2188 dst_x + r->width >= op->dst.width &&
2189 dst_y + r->height >= op->dst.height) {
2190 data += (src_x - dst_x) * bpp / 8;
2191 data += (src_y - dst_y) * pitch;
2192
2193 assert(op->dst.bo == dst_priv->gpu_bo);
2194 sna_replace(sna, op->dst.pixmap, data, pitch);
2195 } else {
2196 BoxRec box;
2197 bool ok;
2198
2199 box.x1 = dst_x;
2200 box.y1 = dst_y;
2201 box.x2 = dst_x + r->width;
2202 box.y2 = dst_y + r->height;
2203
2204 ok = sna_write_boxes(sna, dst,
2205 dst_priv->gpu_bo, 0, 0,
2206 data, pitch, src_x, src_y,
2207 &box, 1);
2208 assert(ok);
2209 (void)ok;
2210 }
2211 }
2212
blt_put_composite_box(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2213 fastcall static void blt_put_composite_box(struct sna *sna,
2214 const struct sna_composite_op *op,
2215 const BoxRec *box)
2216 {
2217 PixmapPtr src = op->u.blt.src_pixmap;
2218 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2219
2220 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2221 op->u.blt.sx, op->u.blt.sy,
2222 op->dst.x, op->dst.y));
2223
2224 assert(src->devPrivate.ptr);
2225 assert(src->devKind);
2226 if (!dst_priv->pinned &&
2227 box->x2 - box->x1 == op->dst.width &&
2228 box->y2 - box->y1 == op->dst.height) {
2229 int pitch = src->devKind;
2230 int bpp = src->drawable.bitsPerPixel / 8;
2231 char *data = src->devPrivate.ptr;
2232
2233 data += (box->y1 + op->u.blt.sy) * pitch;
2234 data += (box->x1 + op->u.blt.sx) * bpp;
2235
2236 assert(op->dst.bo == dst_priv->gpu_bo);
2237 sna_replace(sna, op->dst.pixmap, data, pitch);
2238 } else {
2239 bool ok;
2240
2241 ok = sna_write_boxes(sna, op->dst.pixmap,
2242 op->dst.bo, op->dst.x, op->dst.y,
2243 src->devPrivate.ptr,
2244 src->devKind,
2245 op->u.blt.sx, op->u.blt.sy,
2246 box, 1);
2247 assert(ok);
2248 (void)ok;
2249 }
2250 }
2251
blt_put_composite_boxes(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2252 static void blt_put_composite_boxes(struct sna *sna,
2253 const struct sna_composite_op *op,
2254 const BoxRec *box, int n)
2255 {
2256 PixmapPtr src = op->u.blt.src_pixmap;
2257 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2258
2259 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2260 op->u.blt.sx, op->u.blt.sy,
2261 op->dst.x, op->dst.y,
2262 box->x1, box->y1, box->x2, box->y2, n));
2263
2264 assert(src->devPrivate.ptr);
2265 assert(src->devKind);
2266 if (n == 1 && !dst_priv->pinned &&
2267 box->x2 - box->x1 == op->dst.width &&
2268 box->y2 - box->y1 == op->dst.height) {
2269 int pitch = src->devKind;
2270 int bpp = src->drawable.bitsPerPixel / 8;
2271 char *data = src->devPrivate.ptr;
2272
2273 data += (box->y1 + op->u.blt.sy) * pitch;
2274 data += (box->x1 + op->u.blt.sx) * bpp;
2275
2276 assert(op->dst.bo == dst_priv->gpu_bo);
2277 sna_replace(sna, op->dst.pixmap, data, pitch);
2278 } else {
2279 bool ok;
2280
2281 ok = sna_write_boxes(sna, op->dst.pixmap,
2282 op->dst.bo, op->dst.x, op->dst.y,
2283 src->devPrivate.ptr,
2284 src->devKind,
2285 op->u.blt.sx, op->u.blt.sy,
2286 box, n);
2287 assert(ok);
2288 (void)ok;
2289 }
2290 }
2291
2292 fastcall static void
blt_put_composite_with_alpha(struct sna * sna,const struct sna_composite_op * op,const struct sna_composite_rectangles * r)2293 blt_put_composite_with_alpha(struct sna *sna,
2294 const struct sna_composite_op *op,
2295 const struct sna_composite_rectangles *r)
2296 {
2297 PixmapPtr dst = op->dst.pixmap;
2298 PixmapPtr src = op->u.blt.src_pixmap;
2299 struct sna_pixmap *dst_priv = sna_pixmap(dst);
2300 int pitch = src->devKind;
2301 char *data = src->devPrivate.ptr;
2302
2303 int16_t dst_x = r->dst.x + op->dst.x;
2304 int16_t dst_y = r->dst.y + op->dst.y;
2305 int16_t src_x = r->src.x + op->u.blt.sx;
2306 int16_t src_y = r->src.y + op->u.blt.sy;
2307
2308 assert(src->devPrivate.ptr);
2309 assert(src->devKind);
2310
2311 if (!dst_priv->pinned &&
2312 dst_x <= 0 && dst_y <= 0 &&
2313 dst_x + r->width >= op->dst.width &&
2314 dst_y + r->height >= op->dst.height) {
2315 int bpp = dst->drawable.bitsPerPixel / 8;
2316
2317 data += (src_x - dst_x) * bpp;
2318 data += (src_y - dst_y) * pitch;
2319
2320 assert(op->dst.bo == dst_priv->gpu_bo);
2321 sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2322 0xffffffff, op->u.blt.pixel);
2323 } else {
2324 BoxRec box;
2325
2326 box.x1 = dst_x;
2327 box.y1 = dst_y;
2328 box.x2 = dst_x + r->width;
2329 box.y2 = dst_y + r->height;
2330
2331 sna_write_boxes__xor(sna, dst,
2332 dst_priv->gpu_bo, 0, 0,
2333 data, pitch, src_x, src_y,
2334 &box, 1,
2335 0xffffffff, op->u.blt.pixel);
2336 }
2337 }
2338
2339 fastcall static void
blt_put_composite_box_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box)2340 blt_put_composite_box_with_alpha(struct sna *sna,
2341 const struct sna_composite_op *op,
2342 const BoxRec *box)
2343 {
2344 PixmapPtr src = op->u.blt.src_pixmap;
2345 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2346
2347 DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
2348 op->u.blt.sx, op->u.blt.sy,
2349 op->dst.x, op->dst.y));
2350
2351 assert(src->devPrivate.ptr);
2352 assert(src->devKind);
2353
2354 if (!dst_priv->pinned &&
2355 box->x2 - box->x1 == op->dst.width &&
2356 box->y2 - box->y1 == op->dst.height) {
2357 int pitch = src->devKind;
2358 int bpp = src->drawable.bitsPerPixel / 8;
2359 char *data = src->devPrivate.ptr;
2360
2361 data += (box->y1 + op->u.blt.sy) * pitch;
2362 data += (box->x1 + op->u.blt.sx) * bpp;
2363
2364 assert(op->dst.bo == dst_priv->gpu_bo);
2365 sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2366 0xffffffff, op->u.blt.pixel);
2367 } else {
2368 sna_write_boxes__xor(sna, op->dst.pixmap,
2369 op->dst.bo, op->dst.x, op->dst.y,
2370 src->devPrivate.ptr,
2371 src->devKind,
2372 op->u.blt.sx, op->u.blt.sy,
2373 box, 1,
2374 0xffffffff, op->u.blt.pixel);
2375 }
2376 }
2377
2378 static void
blt_put_composite_boxes_with_alpha(struct sna * sna,const struct sna_composite_op * op,const BoxRec * box,int n)2379 blt_put_composite_boxes_with_alpha(struct sna *sna,
2380 const struct sna_composite_op *op,
2381 const BoxRec *box, int n)
2382 {
2383 PixmapPtr src = op->u.blt.src_pixmap;
2384 struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
2385
2386 DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
2387 op->u.blt.sx, op->u.blt.sy,
2388 op->dst.x, op->dst.y,
2389 box->x1, box->y1, box->x2, box->y2, n));
2390
2391 assert(src->devPrivate.ptr);
2392 assert(src->devKind);
2393
2394 if (n == 1 && !dst_priv->pinned &&
2395 box->x2 - box->x1 == op->dst.width &&
2396 box->y2 - box->y1 == op->dst.height) {
2397 int pitch = src->devKind;
2398 int bpp = src->drawable.bitsPerPixel / 8;
2399 char *data = src->devPrivate.ptr;
2400
2401 data += (box->y1 + op->u.blt.sy) * pitch;
2402 data += (box->x1 + op->u.blt.sx) * bpp;
2403
2404 assert(dst_priv->gpu_bo == op->dst.bo);
2405 sna_replace__xor(sna, op->dst.pixmap, data, pitch,
2406 0xffffffff, op->u.blt.pixel);
2407 } else {
2408 sna_write_boxes__xor(sna, op->dst.pixmap,
2409 op->dst.bo, op->dst.x, op->dst.y,
2410 src->devPrivate.ptr,
2411 src->devKind,
2412 op->u.blt.sx, op->u.blt.sy,
2413 box, n,
2414 0xffffffff, op->u.blt.pixel);
2415 }
2416 }
2417
2418 static bool
prepare_blt_put(struct sna * sna,struct sna_composite_op * op,uint32_t alpha_fixup)2419 prepare_blt_put(struct sna *sna,
2420 struct sna_composite_op *op,
2421 uint32_t alpha_fixup)
2422 {
2423 DBG(("%s\n", __FUNCTION__));
2424
2425 assert(!sna_pixmap(op->dst.pixmap)->clear);
2426
2427 if (op->dst.bo) {
2428 assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo);
2429 if (alpha_fixup) {
2430 op->u.blt.pixel = alpha_fixup;
2431 op->blt = blt_put_composite_with_alpha;
2432 op->box = blt_put_composite_box_with_alpha;
2433 op->boxes = blt_put_composite_boxes_with_alpha;
2434 } else {
2435 op->blt = blt_put_composite;
2436 op->box = blt_put_composite_box;
2437 op->boxes = blt_put_composite_boxes;
2438 }
2439
2440 op->done = nop_done;
2441 return true;
2442 } else {
2443 if (alpha_fixup) {
2444 op->u.blt.pixel = alpha_fixup;
2445 op->blt = blt_put_composite_with_alpha__cpu;
2446 op->box = blt_put_composite_box_with_alpha__cpu;
2447 op->boxes = blt_put_composite_boxes_with_alpha__cpu;
2448 } else {
2449 op->blt = blt_put_composite__cpu;
2450 op->box = blt_put_composite_box__cpu;
2451 op->boxes = blt_put_composite_boxes__cpu;
2452 }
2453
2454 op->done = sig_done;
2455 return sigtrap_get() == 0;
2456 }
2457 }
2458
2459 static bool
is_clear(PixmapPtr pixmap)2460 is_clear(PixmapPtr pixmap)
2461 {
2462 struct sna_pixmap *priv = sna_pixmap(pixmap);
2463 return priv && priv->clear;
2464 }
2465
2466 static inline uint32_t
over(uint32_t src,uint32_t dst)2467 over(uint32_t src, uint32_t dst)
2468 {
2469 uint32_t a = ~src >> 24;
2470
2471 #define G_SHIFT 8
2472 #define RB_MASK 0xff00ff
2473 #define RB_ONE_HALF 0x800080
2474 #define RB_MASK_PLUS_ONE 0x10000100
2475
2476 #define UN8_rb_MUL_UN8(x, a, t) do { \
2477 t = ((x) & RB_MASK) * (a); \
2478 t += RB_ONE_HALF; \
2479 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
2480 x &= RB_MASK; \
2481 } while (0)
2482
2483 #define UN8_rb_ADD_UN8_rb(x, y, t) do { \
2484 t = ((x) + (y)); \
2485 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
2486 x = (t & RB_MASK); \
2487 } while (0)
2488
2489 #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) do { \
2490 uint32_t r1__, r2__, r3__, t__; \
2491 \
2492 r1__ = (x); \
2493 r2__ = (y) & RB_MASK; \
2494 UN8_rb_MUL_UN8(r1__, (a), t__); \
2495 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \
2496 \
2497 r2__ = (x) >> G_SHIFT; \
2498 r3__ = ((y) >> G_SHIFT) & RB_MASK; \
2499 UN8_rb_MUL_UN8(r2__, (a), t__); \
2500 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \
2501 \
2502 (x) = r1__ | (r2__ << G_SHIFT); \
2503 } while (0)
2504
2505 UN8x4_MUL_UN8_ADD_UN8x4(dst, a, src);
2506
2507 return dst;
2508 }
2509
2510 static inline uint32_t
add(uint32_t src,uint32_t dst)2511 add(uint32_t src, uint32_t dst)
2512 {
2513 #define UN8x4_ADD_UN8x4(x, y) do { \
2514 uint32_t r1__, r2__, r3__, t__; \
2515 \
2516 r1__ = (x) & RB_MASK; \
2517 r2__ = (y) & RB_MASK; \
2518 UN8_rb_ADD_UN8_rb(r1__, r2__, t__); \
2519 \
2520 r2__ = ((x) >> G_SHIFT) & RB_MASK; \
2521 r3__ = ((y) >> G_SHIFT) & RB_MASK; \
2522 UN8_rb_ADD_UN8_rb(r2__, r3__, t__); \
2523 \
2524 x = r1__ | (r2__ << G_SHIFT); \
2525 } while (0)
2526
2527 UN8x4_ADD_UN8x4(src, dst);
2528 return src;
2529 }
2530
2531 bool
sna_blt_composite(struct sna * sna,uint32_t op,PicturePtr src,PicturePtr dst,int16_t x,int16_t y,int16_t dst_x,int16_t dst_y,int16_t width,int16_t height,unsigned flags,struct sna_composite_op * tmp)2532 sna_blt_composite(struct sna *sna,
2533 uint32_t op,
2534 PicturePtr src,
2535 PicturePtr dst,
2536 int16_t x, int16_t y,
2537 int16_t dst_x, int16_t dst_y,
2538 int16_t width, int16_t height,
2539 unsigned flags,
2540 struct sna_composite_op *tmp)
2541 {
2542 PictFormat src_format = src->format;
2543 PixmapPtr src_pixmap;
2544 struct kgem_bo *bo;
2545 int16_t tx, ty;
2546 BoxRec dst_box, src_box;
2547 uint32_t alpha_fixup;
2548 uint32_t color, hint;
2549 bool was_clear;
2550 bool ret;
2551
2552 #if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2553 return false;
2554 #endif
2555 DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
2556 __FUNCTION__, x, y, dst_x, dst_y, width, height));
2557
2558 switch (dst->pDrawable->bitsPerPixel) {
2559 case 8:
2560 case 16:
2561 case 32:
2562 break;
2563 default:
2564 DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
2565 dst->pDrawable->bitsPerPixel));
2566 return false;
2567 }
2568
2569 tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2570 was_clear = is_clear(tmp->dst.pixmap);
2571
2572 if (width | height) {
2573 dst_box.x1 = dst_x;
2574 dst_box.x2 = bound(dst_x, width);
2575 dst_box.y1 = dst_y;
2576 dst_box.y2 = bound(dst_y, height);
2577 } else
2578 sna_render_picture_extents(dst, &dst_box);
2579
2580 tmp->dst.format = dst->format;
2581 tmp->dst.width = tmp->dst.pixmap->drawable.width;
2582 tmp->dst.height = tmp->dst.pixmap->drawable.height;
2583 get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
2584 &tmp->dst.x, &tmp->dst.y);
2585
2586 if (op == PictOpClear) {
2587 clear:
2588 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == 0) {
2589 sna_pixmap(tmp->dst.pixmap)->clear = true;
2590 nop:
2591 return prepare_blt_nop(sna, tmp);
2592 }
2593
2594 hint = 0;
2595 if (can_render(sna)) {
2596 hint |= PREFER_GPU;
2597 if ((flags & COMPOSITE_PARTIAL) == 0) {
2598 hint |= IGNORE_DAMAGE;
2599 if (width == tmp->dst.pixmap->drawable.width &&
2600 height == tmp->dst.pixmap->drawable.height)
2601 hint |= REPLACES;
2602 }
2603 }
2604 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2605 &dst_box, &tmp->damage);
2606 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2607 if (tmp->dst.bo) {
2608 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2609 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2610 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2611 return false;
2612 }
2613 if (hint & REPLACES)
2614 kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2615 if (flags & COMPOSITE_UPLOAD)
2616 return false;
2617 } else {
2618 RegionRec region;
2619
2620 region.extents = dst_box;
2621 region.data = NULL;
2622
2623 hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2624 if (flags & COMPOSITE_PARTIAL)
2625 hint |= MOVE_READ;
2626 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint))
2627 return false;
2628 }
2629
2630 return prepare_blt_clear(sna, tmp);
2631 }
2632
2633 if (is_solid(src)) {
2634 if ((op == PictOpOver || op == PictOpAdd) && is_transparent(src)) {
2635 sna_pixmap(tmp->dst.pixmap)->clear = was_clear;
2636 return prepare_blt_nop(sna, tmp);
2637 }
2638 if (op == PictOpOver && is_opaque_solid(src))
2639 op = PictOpSrc;
2640 if (op == PictOpAdd &&
2641 PICT_FORMAT_RGB(src->format) == PICT_FORMAT_RGB(dst->format) &&
2642 is_white(src))
2643 op = PictOpSrc;
2644 if (was_clear && (op == PictOpAdd || op == PictOpOver)) {
2645 if (sna_pixmap(tmp->dst.pixmap)->clear_color == 0)
2646 op = PictOpSrc;
2647 if (op == PictOpOver) {
2648 unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2649 color = over(get_solid_color(src, PICT_a8r8g8b8),
2650 dst_color);
2651 op = PictOpSrc;
2652 DBG(("%s: precomputing solid OVER (%08x, %08x) -> %08x\n",
2653 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2654 solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2655 color));
2656 if (color == dst_color)
2657 goto nop;
2658 else
2659 goto fill;
2660 }
2661 if (op == PictOpAdd) {
2662 unsigned dst_color = solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color);
2663 color = add(get_solid_color(src, PICT_a8r8g8b8),
2664 dst_color);
2665 op = PictOpSrc;
2666 DBG(("%s: precomputing solid ADD (%08x, %08x) -> %08x\n",
2667 __FUNCTION__, get_solid_color(src, PICT_a8r8g8b8),
2668 solid_color(dst->format, sna_pixmap(tmp->dst.pixmap)->clear_color),
2669 color));
2670 if (color == dst_color)
2671 goto nop;
2672 else
2673 goto fill;
2674 }
2675 }
2676 if (op == PictOpOutReverse && is_opaque_solid(src))
2677 goto clear;
2678
2679 if (op != PictOpSrc) {
2680 DBG(("%s: unsupported op [%d] for blitting\n",
2681 __FUNCTION__, op));
2682 return false;
2683 }
2684
2685 color = get_solid_color(src, tmp->dst.format);
2686 fill:
2687 if (color == 0)
2688 goto clear;
2689
2690 if (was_clear && sna_pixmap(tmp->dst.pixmap)->clear_color == color) {
2691 sna_pixmap(tmp->dst.pixmap)->clear = true;
2692 return prepare_blt_nop(sna, tmp);
2693 }
2694
2695 hint = 0;
2696 if (can_render(sna)) {
2697 hint |= PREFER_GPU;
2698 if ((flags & COMPOSITE_PARTIAL) == 0) {
2699 hint |= IGNORE_DAMAGE;
2700 if (width == tmp->dst.pixmap->drawable.width &&
2701 height == tmp->dst.pixmap->drawable.height)
2702 hint |= REPLACES;
2703 }
2704 }
2705 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2706 &dst_box, &tmp->damage);
2707 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2708 if (tmp->dst.bo) {
2709 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2710 DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
2711 __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
2712 return false;
2713 }
2714 if (hint & REPLACES)
2715 kgem_bo_undo(&sna->kgem, tmp->dst.bo);
2716 if (flags & COMPOSITE_UPLOAD)
2717 return false;
2718 } else {
2719 RegionRec region;
2720
2721 region.extents = dst_box;
2722 region.data = NULL;
2723
2724 hint = MOVE_WRITE | MOVE_INPLACE_HINT;
2725 if (flags & COMPOSITE_PARTIAL)
2726 hint |= MOVE_READ;
2727 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, hint))
2728 return false;
2729 }
2730
2731 return prepare_blt_fill(sna, tmp, color);
2732 }
2733
2734 if (!src->pDrawable) {
2735 DBG(("%s: unsupported procedural source\n",
2736 __FUNCTION__));
2737 return false;
2738 }
2739
2740 if (src->filter == PictFilterConvolution) {
2741 DBG(("%s: convolutions filters not handled\n",
2742 __FUNCTION__));
2743 return false;
2744 }
2745
2746 if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
2747 op = PictOpSrc;
2748
2749 if (op != PictOpSrc) {
2750 DBG(("%s: unsupported op [%d] for blitting\n",
2751 __FUNCTION__, op));
2752 return false;
2753 }
2754
2755 if (!sna_transform_is_imprecise_integer_translation(src->transform, src->filter,
2756 dst->polyMode == PolyModePrecise,
2757 &tx, &ty)) {
2758 DBG(("%s: source transform is not an integer translation\n",
2759 __FUNCTION__));
2760 return false;
2761 }
2762 DBG(("%s: converting transform to integer translation? (%d, %d)\n",
2763 __FUNCTION__, src->transform != NULL, tx, ty));
2764 x += tx;
2765 y += ty;
2766
2767 if ((x >= src->pDrawable->width ||
2768 y >= src->pDrawable->height ||
2769 x + width <= 0 ||
2770 y + height <= 0) &&
2771 (!src->repeat || src->repeatType == RepeatNone)) {
2772 DBG(("%s: source is outside of valid area, converting to clear\n",
2773 __FUNCTION__));
2774 goto clear;
2775 }
2776
2777 src_pixmap = get_drawable_pixmap(src->pDrawable);
2778 if (is_clear(src_pixmap)) {
2779 if (src->repeat ||
2780 (x >= 0 && y >= 0 &&
2781 x + width <= src_pixmap->drawable.width &&
2782 y + height <= src_pixmap->drawable.height)) {
2783 color = color_convert(sna_pixmap(src_pixmap)->clear_color,
2784 src->format, tmp->dst.format);
2785 goto fill;
2786 }
2787 }
2788
2789 alpha_fixup = 0;
2790 if (!(dst->format == src_format ||
2791 dst->format == alphaless(src_format) ||
2792 (alphaless(dst->format) == alphaless(src_format) &&
2793 sna_get_pixel_from_rgba(&alpha_fixup,
2794 0, 0, 0, 0xffff,
2795 dst->format)))) {
2796 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
2797 __FUNCTION__, (unsigned)src_format, dst->format));
2798 return false;
2799 }
2800
2801 /* XXX tiling? fixup extend none? */
2802 if (x < 0 || y < 0 ||
2803 x + width > src->pDrawable->width ||
2804 y + height > src->pDrawable->height) {
2805 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n",
2806 __FUNCTION__,
2807 x, y, x+width, y+width, src->pDrawable->width, src->pDrawable->height, src->repeatType));
2808 if (src->repeat && src->repeatType == RepeatNormal) {
2809 x = x % src->pDrawable->width;
2810 y = y % src->pDrawable->height;
2811 if (x < 0)
2812 x += src->pDrawable->width;
2813 if (y < 0)
2814 y += src->pDrawable->height;
2815 if (x + width > src->pDrawable->width ||
2816 y + height > src->pDrawable->height)
2817 return false;
2818 } else
2819 return false;
2820 }
2821
2822 get_drawable_deltas(src->pDrawable, src_pixmap, &tx, &ty);
2823 x += tx + src->pDrawable->x;
2824 y += ty + src->pDrawable->y;
2825 if (x < 0 || y < 0 ||
2826 x + width > src_pixmap->drawable.width ||
2827 y + height > src_pixmap->drawable.height) {
2828 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid pixmap %dx%d\n",
2829 __FUNCTION__,
2830 x, y, x+width, y+width, src_pixmap->drawable.width, src_pixmap->drawable.height));
2831 return false;
2832 }
2833
2834 tmp->u.blt.src_pixmap = src_pixmap;
2835 tmp->u.blt.sx = x - dst_x;
2836 tmp->u.blt.sy = y - dst_y;
2837 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
2838 __FUNCTION__,
2839 tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
2840
2841 src_box.x1 = x;
2842 src_box.y1 = y;
2843 src_box.x2 = x + width;
2844 src_box.y2 = y + height;
2845 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2846 if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
2847 DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n",
2848 __FUNCTION__,
2849 src_pixmap->drawable.width < sna->render.max_3d_size,
2850 src_pixmap->drawable.height < sna->render.max_3d_size,
2851 bo->tiling, bo->pitch));
2852
2853 if (src_pixmap->drawable.width <= sna->render.max_3d_size &&
2854 src_pixmap->drawable.height <= sna->render.max_3d_size &&
2855 bo->pitch <= sna->render.max_3d_pitch &&
2856 (flags & (COMPOSITE_UPLOAD | COMPOSITE_FALLBACK)) == 0)
2857 {
2858 return false;
2859 }
2860
2861 bo = NULL;
2862 }
2863
2864 hint = 0;
2865 if (bo || can_render(sna)) {
2866 hint |= PREFER_GPU;
2867 if ((flags & COMPOSITE_PARTIAL) == 0) {
2868 hint |= IGNORE_DAMAGE;
2869 if (width == tmp->dst.pixmap->drawable.width &&
2870 height == tmp->dst.pixmap->drawable.height)
2871 hint |= REPLACES;
2872 }
2873 if (bo)
2874 hint |= FORCE_GPU;
2875 }
2876 tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
2877 &dst_box, &tmp->damage);
2878 assert(!tmp->damage || !DAMAGE_IS_ALL(*tmp->damage));
2879
2880 if (tmp->dst.bo && hint & REPLACES) {
2881 struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap);
2882 kgem_bo_pair_undo(&sna->kgem, priv->gpu_bo, priv->cpu_bo);
2883 }
2884
2885 if (tmp->dst.pixmap == src_pixmap)
2886 bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
2887
2888 ret = false;
2889 if (bo) {
2890 if (!tmp->dst.bo) {
2891 DBG(("%s: fallback -- unaccelerated read back\n",
2892 __FUNCTION__));
2893 fallback:
2894 if (flags & COMPOSITE_FALLBACK || !kgem_bo_is_busy(bo))
2895 goto put;
2896 } else if (!kgem_bo_can_blt(&sna->kgem, bo)) {
2897 DBG(("%s: fallback -- cannot blit from source\n",
2898 __FUNCTION__));
2899 goto fallback;
2900 } else if (bo->snoop && tmp->dst.bo->snoop) {
2901 DBG(("%s: fallback -- can not copy between snooped bo\n",
2902 __FUNCTION__));
2903 goto put;
2904 } else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
2905 DBG(("%s: fallback -- unaccelerated upload\n",
2906 __FUNCTION__));
2907 goto fallback;
2908 } else if ((flags & COMPOSITE_UPLOAD) == 0) {
2909 ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup);
2910 if (!ret)
2911 goto fallback;
2912 }
2913 } else {
2914 RegionRec region;
2915
2916 put:
2917 if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) {
2918 DBG(("%s: dropping upload into CPU bo\n", __FUNCTION__));
2919 tmp->dst.bo = NULL;
2920 tmp->damage = NULL;
2921 }
2922
2923 if (tmp->dst.bo == NULL) {
2924 hint = MOVE_INPLACE_HINT | MOVE_WRITE;
2925 if (flags & COMPOSITE_PARTIAL)
2926 hint |= MOVE_READ;
2927
2928 region.extents = dst_box;
2929 region.data = NULL;
2930 if (!sna_drawable_move_region_to_cpu(dst->pDrawable,
2931 ®ion, hint))
2932 return false;
2933
2934 assert(tmp->damage == NULL);
2935 }
2936
2937 region.extents = src_box;
2938 region.data = NULL;
2939 if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
2940 ®ion, MOVE_READ))
2941 return false;
2942
2943 ret = prepare_blt_put(sna, tmp, alpha_fixup);
2944 }
2945
2946 return ret;
2947 }
2948
convert_done(struct sna * sna,const struct sna_composite_op * op)2949 static void convert_done(struct sna *sna, const struct sna_composite_op *op)
2950 {
2951 struct kgem *kgem = &sna->kgem;
2952
2953 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
2954 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
2955 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
2956 _kgem_submit(kgem);
2957 }
2958
2959 kgem_bo_destroy(kgem, op->src.bo);
2960 sna_render_composite_redirect_done(sna, op);
2961 }
2962
gen6_convert_done(struct sna * sna,const struct sna_composite_op * op)2963 static void gen6_convert_done(struct sna *sna, const struct sna_composite_op *op)
2964 {
2965 struct kgem *kgem = &sna->kgem;
2966
2967 if (kgem_check_batch(kgem, 3)) {
2968 uint32_t *b = kgem->batch + kgem->nbatch;
2969 assert(sna->kgem.mode == KGEM_BLT);
2970 b[0] = XY_SETUP_CLIP;
2971 b[1] = b[2] = 0;
2972 kgem->nbatch += 3;
2973 assert(kgem->nbatch < kgem->surface);
2974 }
2975
2976 convert_done(sna, op);
2977 }
2978
2979 bool
sna_blt_composite__convert(struct sna * sna,int x,int y,int width,int height,struct sna_composite_op * tmp)2980 sna_blt_composite__convert(struct sna *sna,
2981 int x, int y,
2982 int width, int height,
2983 struct sna_composite_op *tmp)
2984 {
2985 uint32_t alpha_fixup;
2986 int sx, sy;
2987 uint8_t op;
2988
2989 #if DEBUG_NO_BLT || NO_BLT_COMPOSITE
2990 return false;
2991 #endif
2992
2993 DBG(("%s src=%d, dst=%d (redirect? %d)\n", __FUNCTION__,
2994 tmp->src.bo->handle, tmp->dst.bo->handle,
2995 tmp->redirect.real_bo ? tmp->redirect.real_bo->handle : 0));
2996
2997 if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) ||
2998 !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) {
2999 DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__));
3000 return false;
3001 }
3002
3003 if (tmp->src.transform) {
3004 DBG(("%s: transforms not handled by the BLT\n", __FUNCTION__));
3005 return false;
3006 }
3007
3008 if (tmp->src.filter == PictFilterConvolution) {
3009 DBG(("%s: convolutions filters not handled\n",
3010 __FUNCTION__));
3011 return false;
3012 }
3013
3014 op = tmp->op;
3015 if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0)
3016 op = PictOpSrc;
3017 if (op != PictOpSrc) {
3018 DBG(("%s: unsupported op [%d] for blitting\n",
3019 __FUNCTION__, op));
3020 return false;
3021 }
3022
3023 alpha_fixup = 0;
3024 if (!(tmp->dst.format == tmp->src.pict_format ||
3025 tmp->dst.format == alphaless(tmp->src.pict_format) ||
3026 (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) &&
3027 sna_get_pixel_from_rgba(&alpha_fixup,
3028 0, 0, 0, 0xffff,
3029 tmp->dst.format)))) {
3030 DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
3031 __FUNCTION__,
3032 (unsigned)tmp->src.pict_format,
3033 (unsigned)tmp->dst.format));
3034 return false;
3035 }
3036
3037 sx = tmp->src.offset[0];
3038 sy = tmp->src.offset[1];
3039
3040 x += sx;
3041 y += sy;
3042 if (x < 0 || y < 0 ||
3043 x + width > tmp->src.width ||
3044 y + height > tmp->src.height) {
3045 DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n",
3046 __FUNCTION__,
3047 x, y, x+width, y+width, tmp->src.width, tmp->src.height));
3048 if (tmp->src.repeat == RepeatNormal) {
3049 int xx = x % tmp->src.width;
3050 int yy = y % tmp->src.height;
3051 if (xx < 0)
3052 xx += tmp->src.width;
3053 if (yy < 0)
3054 yy += tmp->src.height;
3055 if (xx + width > tmp->src.width ||
3056 yy + height > tmp->src.height)
3057 return false;
3058
3059 sx += xx - x;
3060 sy += yy - y;
3061 } else
3062 return false;
3063 }
3064
3065 DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
3066 __FUNCTION__,
3067 tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
3068
3069 tmp->u.blt.src_pixmap = NULL;
3070 tmp->u.blt.sx = sx;
3071 tmp->u.blt.sy = sy;
3072
3073 kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
3074 if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
3075 kgem_submit(&sna->kgem);
3076 if (!kgem_check_many_bo_fenced(&sna->kgem,
3077 tmp->dst.bo, tmp->src.bo, NULL)) {
3078 DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
3079 return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
3080 PICT_FORMAT_BPP(tmp->src.pict_format),
3081 alpha_fixup);
3082 }
3083 _kgem_set_mode(&sna->kgem, KGEM_BLT);
3084 }
3085 kgem_bcs_set_tiling(&sna->kgem, tmp->src.bo, tmp->dst.bo);
3086
3087 if (alpha_fixup) {
3088 tmp->blt = blt_composite_copy_with_alpha;
3089 tmp->box = blt_composite_copy_box_with_alpha;
3090 tmp->boxes = blt_composite_copy_boxes_with_alpha;
3091
3092 if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt,
3093 tmp->src.bo, tmp->dst.bo,
3094 PICT_FORMAT_BPP(tmp->src.pict_format),
3095 alpha_fixup))
3096 return false;
3097 } else {
3098 tmp->blt = blt_composite_copy;
3099 tmp->box = blt_composite_copy_box;
3100 tmp->boxes = blt_composite_copy_boxes;
3101 if (sna->kgem.gen >= 0100)
3102 tmp->thread_boxes = blt_composite_copy_boxes__thread64;
3103 else
3104 tmp->thread_boxes = blt_composite_copy_boxes__thread;
3105
3106 if (!sna_blt_copy_init(sna, &tmp->u.blt,
3107 tmp->src.bo, tmp->dst.bo,
3108 PICT_FORMAT_BPP(tmp->src.pict_format),
3109 GXcopy))
3110 return false;
3111 }
3112
3113 tmp->done = convert_done;
3114 if (sna->kgem.gen >= 060 && tmp->src.bo == tmp->dst.bo)
3115 tmp->done = gen6_convert_done;
3116
3117 return true;
3118 }
3119
sna_blt_fill_op_blt(struct sna * sna,const struct sna_fill_op * op,int16_t x,int16_t y,int16_t width,int16_t height)3120 static void sna_blt_fill_op_blt(struct sna *sna,
3121 const struct sna_fill_op *op,
3122 int16_t x, int16_t y,
3123 int16_t width, int16_t height)
3124 {
3125 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3126 const struct sna_blt_state *blt = &op->base.u.blt;
3127
3128 __sna_blt_fill_begin(sna, blt);
3129
3130 sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3131 sna->blt_state.fill_pixel = blt->pixel;
3132 sna->blt_state.fill_alu = blt->alu;
3133 }
3134
3135 sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
3136 }
3137
sna_blt_fill_op_box(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box)3138 fastcall static void sna_blt_fill_op_box(struct sna *sna,
3139 const struct sna_fill_op *op,
3140 const BoxRec *box)
3141 {
3142 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3143 const struct sna_blt_state *blt = &op->base.u.blt;
3144
3145 __sna_blt_fill_begin(sna, blt);
3146
3147 sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3148 sna->blt_state.fill_pixel = blt->pixel;
3149 sna->blt_state.fill_alu = blt->alu;
3150 }
3151
3152 _sna_blt_fill_box(sna, &op->base.u.blt, box);
3153 }
3154
sna_blt_fill_op_boxes(struct sna * sna,const struct sna_fill_op * op,const BoxRec * box,int nbox)3155 fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
3156 const struct sna_fill_op *op,
3157 const BoxRec *box,
3158 int nbox)
3159 {
3160 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3161 const struct sna_blt_state *blt = &op->base.u.blt;
3162
3163 __sna_blt_fill_begin(sna, blt);
3164
3165 sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3166 sna->blt_state.fill_pixel = blt->pixel;
3167 sna->blt_state.fill_alu = blt->alu;
3168 }
3169
3170 _sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
3171 }
3172
pt_add(uint32_t cmd,const DDXPointRec * pt,int16_t dx,int16_t dy)3173 static inline uint64_t pt_add(uint32_t cmd, const DDXPointRec *pt, int16_t dx, int16_t dy)
3174 {
3175 union {
3176 DDXPointRec pt;
3177 uint32_t i;
3178 } u;
3179
3180 u.pt.x = pt->x + dx;
3181 u.pt.y = pt->y + dy;
3182
3183 return cmd | (uint64_t)u.i<<32;
3184 }
3185
sna_blt_fill_op_points(struct sna * sna,const struct sna_fill_op * op,int16_t dx,int16_t dy,const DDXPointRec * p,int n)3186 fastcall static void sna_blt_fill_op_points(struct sna *sna,
3187 const struct sna_fill_op *op,
3188 int16_t dx, int16_t dy,
3189 const DDXPointRec *p, int n)
3190 {
3191 const struct sna_blt_state *blt = &op->base.u.blt;
3192 struct kgem *kgem = &sna->kgem;
3193 uint32_t cmd;
3194
3195 DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, n));
3196
3197 if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
3198 __sna_blt_fill_begin(sna, blt);
3199
3200 sna->blt_state.fill_bo = blt->bo[0]->unique_id;
3201 sna->blt_state.fill_pixel = blt->pixel;
3202 sna->blt_state.fill_alu = blt->alu;
3203 }
3204
3205 if (!kgem_check_batch(kgem, 2))
3206 sna_blt_fill_begin(sna, blt);
3207
3208 cmd = XY_PIXEL_BLT;
3209 if (kgem->gen >= 040 && op->base.u.blt.bo[0]->tiling)
3210 cmd |= BLT_DST_TILED;
3211
3212 do {
3213 uint32_t *b = kgem->batch + kgem->nbatch;
3214 int n_this_time, rem;
3215
3216 assert(sna->kgem.mode == KGEM_BLT);
3217 n_this_time = n;
3218 rem = kgem_batch_space(kgem);
3219 if (2*n_this_time > rem)
3220 n_this_time = rem / 2;
3221 assert(n_this_time);
3222 n -= n_this_time;
3223
3224 kgem->nbatch += 2 * n_this_time;
3225 assert(kgem->nbatch < kgem->surface);
3226
3227 if ((dx|dy) == 0) {
3228 do {
3229 *(uint64_t *)b = pt_add(cmd, p++, 0, 0);
3230 b += 2;
3231 } while (--n_this_time);
3232 } else {
3233 do {
3234 *(uint64_t *)b = pt_add(cmd, p++, dx, dy);
3235 b += 2;
3236 } while (--n_this_time);
3237 }
3238
3239 if (!n)
3240 return;
3241
3242 sna_blt_fill_begin(sna, blt);
3243 } while (1);
3244 }
3245
sna_blt_fill(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t pixel,struct sna_fill_op * fill)3246 bool sna_blt_fill(struct sna *sna, uint8_t alu,
3247 struct kgem_bo *bo, int bpp,
3248 uint32_t pixel,
3249 struct sna_fill_op *fill)
3250 {
3251 #if DEBUG_NO_BLT || NO_BLT_FILL
3252 return false;
3253 #endif
3254
3255 DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
3256
3257 if (!kgem_bo_can_blt(&sna->kgem, bo)) {
3258 DBG(("%s: rejected due to incompatible Y-tiling\n",
3259 __FUNCTION__));
3260 return false;
3261 }
3262
3263 if (!sna_blt_fill_init(sna, &fill->base.u.blt,
3264 bo, bpp, alu, pixel))
3265 return false;
3266
3267 assert(sna->kgem.mode == KGEM_BLT);
3268 fill->blt = sna_blt_fill_op_blt;
3269 fill->box = sna_blt_fill_op_box;
3270 fill->boxes = sna_blt_fill_op_boxes;
3271 fill->points = sna_blt_fill_op_points;
3272 fill->done =
3273 (void (*)(struct sna *, const struct sna_fill_op *))nop_done;
3274 return true;
3275 }
3276
sna_blt_copy_op_blt(struct sna * sna,const struct sna_copy_op * op,int16_t src_x,int16_t src_y,int16_t width,int16_t height,int16_t dst_x,int16_t dst_y)3277 static void sna_blt_copy_op_blt(struct sna *sna,
3278 const struct sna_copy_op *op,
3279 int16_t src_x, int16_t src_y,
3280 int16_t width, int16_t height,
3281 int16_t dst_x, int16_t dst_y)
3282 {
3283 sna_blt_copy_one(sna, &op->base.u.blt,
3284 src_x, src_y,
3285 width, height,
3286 dst_x, dst_y);
3287 }
3288
sna_blt_copy(struct sna * sna,uint8_t alu,struct kgem_bo * src,struct kgem_bo * dst,int bpp,struct sna_copy_op * op)3289 bool sna_blt_copy(struct sna *sna, uint8_t alu,
3290 struct kgem_bo *src,
3291 struct kgem_bo *dst,
3292 int bpp,
3293 struct sna_copy_op *op)
3294 {
3295 #if DEBUG_NO_BLT || NO_BLT_COPY
3296 return false;
3297 #endif
3298
3299 if (!kgem_bo_can_blt(&sna->kgem, src))
3300 return false;
3301
3302 if (!kgem_bo_can_blt(&sna->kgem, dst))
3303 return false;
3304
3305 if (!sna_blt_copy_init(sna, &op->base.u.blt,
3306 src, dst,
3307 bpp, alu))
3308 return false;
3309
3310 op->blt = sna_blt_copy_op_blt;
3311 if (sna->kgem.gen >= 060 && src == dst)
3312 op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3313 gen6_blt_copy_done;
3314 else
3315 op->done = (void (*)(struct sna *, const struct sna_copy_op *))
3316 nop_done;
3317 return true;
3318 }
3319
sna_blt_fill_box(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t color,const BoxRec * box)3320 static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
3321 struct kgem_bo *bo, int bpp,
3322 uint32_t color,
3323 const BoxRec *box)
3324 {
3325 struct kgem *kgem = &sna->kgem;
3326 uint32_t br13, cmd, *b;
3327 bool overwrites;
3328
3329 assert(kgem_bo_can_blt (kgem, bo));
3330
3331 DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
3332 box->x1, box->y1, box->x2, box->y2));
3333
3334 assert(box->x1 >= 0);
3335 assert(box->y1 >= 0);
3336
3337 cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
3338 br13 = bo->pitch;
3339 if (kgem->gen >= 040 && bo->tiling) {
3340 cmd |= BLT_DST_TILED;
3341 br13 >>= 2;
3342 }
3343 assert(br13 <= MAXSHORT);
3344
3345 br13 |= fill_ROP[alu] << 16;
3346 br13 |= sna_br13_color_depth(bpp);
3347 if (bpp == 32)
3348 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3349
3350 /* All too frequently one blt completely overwrites the previous */
3351 overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
3352 if (overwrites) {
3353 if (sna->kgem.gen >= 0100) {
3354 if (kgem->nbatch >= 7 &&
3355 kgem->batch[kgem->nbatch-7] == cmd &&
3356 *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
3357 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3358 DBG(("%s: replacing last fill\n", __FUNCTION__));
3359 kgem->batch[kgem->nbatch-6] = br13;
3360 kgem->batch[kgem->nbatch-1] = color;
3361 return true;
3362 }
3363 if (kgem->nbatch >= 10 &&
3364 (kgem->batch[kgem->nbatch-10] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3365 *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
3366 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3367 DBG(("%s: replacing last copy\n", __FUNCTION__));
3368 kgem->batch[kgem->nbatch-10] = cmd;
3369 kgem->batch[kgem->nbatch-8] = br13;
3370 kgem->batch[kgem->nbatch-4] = color;
3371 /* Keep the src bo as part of the execlist, just remove
3372 * its relocation entry.
3373 */
3374 kgem->nreloc--;
3375 kgem->nbatch -= 3;
3376 return true;
3377 }
3378 } else {
3379 if (kgem->nbatch >= 6 &&
3380 kgem->batch[kgem->nbatch-6] == cmd &&
3381 *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
3382 kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
3383 DBG(("%s: replacing last fill\n", __FUNCTION__));
3384 kgem->batch[kgem->nbatch-5] = br13;
3385 kgem->batch[kgem->nbatch-1] = color;
3386 return true;
3387 }
3388 if (kgem->nbatch >= 8 &&
3389 (kgem->batch[kgem->nbatch-8] & 0xffc00000) == XY_SRC_COPY_BLT_CMD &&
3390 *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
3391 kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
3392 DBG(("%s: replacing last copy\n", __FUNCTION__));
3393 kgem->batch[kgem->nbatch-8] = cmd;
3394 kgem->batch[kgem->nbatch-7] = br13;
3395 kgem->batch[kgem->nbatch-3] = color;
3396 /* Keep the src bo as part of the execlist, just remove
3397 * its relocation entry.
3398 */
3399 kgem->nreloc--;
3400 kgem->nbatch -= 2;
3401 return true;
3402 }
3403 }
3404 }
3405
3406 /* If we are currently emitting SCANLINES, keep doing so */
3407 if (sna->blt_state.fill_bo == bo->unique_id &&
3408 sna->blt_state.fill_pixel == color &&
3409 (sna->blt_state.fill_alu == alu ||
3410 sna->blt_state.fill_alu == ~alu)) {
3411 DBG(("%s: matching last fill, converting to scanlines\n",
3412 __FUNCTION__));
3413 return false;
3414 }
3415
3416 kgem_set_mode(kgem, KGEM_BLT, bo);
3417 if (!kgem_check_batch(kgem, 7) ||
3418 !kgem_check_reloc(kgem, 1) ||
3419 !kgem_check_bo_fenced(kgem, bo)) {
3420 kgem_submit(kgem);
3421 if (!kgem_check_bo_fenced(&sna->kgem, bo))
3422 return false;
3423
3424 _kgem_set_mode(kgem, KGEM_BLT);
3425 }
3426 kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3427
3428 assert(kgem_check_batch(kgem, 6));
3429 assert(kgem_check_reloc(kgem, 1));
3430
3431 assert(sna->kgem.mode == KGEM_BLT);
3432 b = kgem->batch + kgem->nbatch;
3433 b[0] = cmd;
3434 b[1] = br13;
3435 *(uint64_t *)(b+2) = *(const uint64_t *)box;
3436 if (kgem->gen >= 0100) {
3437 *(uint64_t *)(b+4) =
3438 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3439 I915_GEM_DOMAIN_RENDER << 16 |
3440 I915_GEM_DOMAIN_RENDER |
3441 KGEM_RELOC_FENCED,
3442 0);
3443 b[6] = color;
3444 kgem->nbatch += 7;
3445 } else {
3446 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3447 I915_GEM_DOMAIN_RENDER << 16 |
3448 I915_GEM_DOMAIN_RENDER |
3449 KGEM_RELOC_FENCED,
3450 0);
3451 b[5] = color;
3452 kgem->nbatch += 6;
3453 }
3454 assert(kgem->nbatch < kgem->surface);
3455
3456 sna->blt_state.fill_bo = bo->unique_id;
3457 sna->blt_state.fill_pixel = color;
3458 sna->blt_state.fill_alu = ~alu;
3459 return true;
3460 }
3461
sna_blt_fill_boxes(struct sna * sna,uint8_t alu,struct kgem_bo * bo,int bpp,uint32_t pixel,const BoxRec * box,int nbox)3462 bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
3463 struct kgem_bo *bo, int bpp,
3464 uint32_t pixel,
3465 const BoxRec *box, int nbox)
3466 {
3467 struct kgem *kgem = &sna->kgem;
3468 uint32_t br13, cmd;
3469
3470 #if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
3471 return false;
3472 #endif
3473
3474 DBG(("%s (%d, %08x, %d) x %d\n",
3475 __FUNCTION__, bpp, pixel, alu, nbox));
3476
3477 if (!kgem_bo_can_blt(kgem, bo)) {
3478 DBG(("%s: fallback -- cannot blt to dst\n", __FUNCTION__));
3479 return false;
3480 }
3481
3482 if (alu == GXclear)
3483 pixel = 0;
3484 else if (alu == GXcopy) {
3485 if (pixel == 0)
3486 alu = GXclear;
3487 else if (pixel == -1)
3488 alu = GXset;
3489 }
3490
3491 if (nbox == 1 && sna_blt_fill_box(sna, alu, bo, bpp, pixel, box))
3492 return true;
3493
3494 br13 = bo->pitch;
3495 cmd = XY_SCANLINE_BLT;
3496 if (kgem->gen >= 040 && bo->tiling) {
3497 cmd |= 1 << 11;
3498 br13 >>= 2;
3499 }
3500 assert(br13 <= MAXSHORT);
3501
3502 br13 |= 1<<31 | fill_ROP[alu] << 16;
3503 br13 |= sna_br13_color_depth(bpp);
3504
3505 kgem_set_mode(kgem, KGEM_BLT, bo);
3506 if (!kgem_check_batch(kgem, 14) ||
3507 !kgem_check_bo_fenced(kgem, bo)) {
3508 kgem_submit(kgem);
3509 if (!kgem_check_bo_fenced(&sna->kgem, bo))
3510 return false;
3511 _kgem_set_mode(kgem, KGEM_BLT);
3512 }
3513
3514 if (sna->blt_state.fill_bo != bo->unique_id ||
3515 sna->blt_state.fill_pixel != pixel ||
3516 sna->blt_state.fill_alu != alu)
3517 {
3518 uint32_t *b;
3519
3520 if (!kgem_check_batch(kgem, 24) ||
3521 !kgem_check_reloc(kgem, 1)) {
3522 _kgem_submit(kgem);
3523 if (!kgem_check_bo_fenced(&sna->kgem, bo))
3524 return false;
3525 _kgem_set_mode(kgem, KGEM_BLT);
3526 }
3527
3528 kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3529
3530 assert(sna->kgem.mode == KGEM_BLT);
3531 b = kgem->batch + kgem->nbatch;
3532 if (kgem->gen >= 0100) {
3533 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3534 if (bpp == 32)
3535 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3536 if (bo->tiling)
3537 b[0] |= BLT_DST_TILED;
3538 b[1] = br13;
3539 b[2] = 0;
3540 b[3] = 0;
3541 *(uint64_t *)(b+4) =
3542 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3543 I915_GEM_DOMAIN_RENDER << 16 |
3544 I915_GEM_DOMAIN_RENDER |
3545 KGEM_RELOC_FENCED,
3546 0);
3547 b[6] = pixel;
3548 b[7] = pixel;
3549 b[8] = 0;
3550 b[9] = 0;
3551 kgem->nbatch += 10;
3552 } else {
3553 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3554 if (bpp == 32)
3555 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3556 if (bo->tiling && kgem->gen >= 040)
3557 b[0] |= BLT_DST_TILED;
3558 b[1] = br13;
3559 b[2] = 0;
3560 b[3] = 0;
3561 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3562 I915_GEM_DOMAIN_RENDER << 16 |
3563 I915_GEM_DOMAIN_RENDER |
3564 KGEM_RELOC_FENCED,
3565 0);
3566 b[5] = pixel;
3567 b[6] = pixel;
3568 b[7] = 0;
3569 b[8] = 0;
3570 kgem->nbatch += 9;
3571 }
3572 assert(kgem->nbatch < kgem->surface);
3573
3574 sna->blt_state.fill_bo = bo->unique_id;
3575 sna->blt_state.fill_pixel = pixel;
3576 sna->blt_state.fill_alu = alu;
3577 }
3578
3579 do {
3580 int nbox_this_time, rem;
3581
3582 nbox_this_time = nbox;
3583 rem = kgem_batch_space(kgem);
3584 if (3*nbox_this_time > rem)
3585 nbox_this_time = rem / 3;
3586 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3587 __FUNCTION__, nbox_this_time, nbox, rem));
3588 assert(nbox_this_time > 0);
3589 nbox -= nbox_this_time;
3590
3591 assert(sna->kgem.mode == KGEM_BLT);
3592 do {
3593 uint32_t *b;
3594
3595 DBG(("%s: (%d, %d), (%d, %d): %08x\n",
3596 __FUNCTION__,
3597 box->x1, box->y1,
3598 box->x2, box->y2,
3599 pixel));
3600
3601 assert(box->x1 >= 0);
3602 assert(box->y1 >= 0);
3603 assert(box->y2 * bo->pitch <= kgem_bo_size(bo));
3604
3605 b = kgem->batch + kgem->nbatch;
3606 kgem->nbatch += 3;
3607 assert(kgem->nbatch < kgem->surface);
3608 b[0] = cmd;
3609 *(uint64_t *)(b+1) = *(const uint64_t *)box;
3610 box++;
3611 } while (--nbox_this_time);
3612
3613 if (nbox) {
3614 uint32_t *b;
3615
3616 _kgem_submit(kgem);
3617 _kgem_set_mode(kgem, KGEM_BLT);
3618 kgem_bcs_set_tiling(&sna->kgem, NULL, bo);
3619
3620 assert(sna->kgem.mode == KGEM_BLT);
3621 b = kgem->batch + kgem->nbatch;
3622 if (kgem->gen >= 0100) {
3623 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
3624 if (bpp == 32)
3625 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3626 if (bo->tiling)
3627 b[0] |= BLT_DST_TILED;
3628 b[1] = br13;
3629 b[2] = 0;
3630 b[3] = 0;
3631 *(uint64_t *)(b+4) =
3632 kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
3633 I915_GEM_DOMAIN_RENDER << 16 |
3634 I915_GEM_DOMAIN_RENDER |
3635 KGEM_RELOC_FENCED,
3636 0);
3637 b[6] = pixel;
3638 b[7] = pixel;
3639 b[8] = 0;
3640 b[9] = 0;
3641 kgem->nbatch += 10;
3642 } else {
3643 b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
3644 if (bpp == 32)
3645 b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3646 if (bo->tiling && kgem->gen >= 040)
3647 b[0] |= BLT_DST_TILED;
3648 b[1] = br13;
3649 b[2] = 0;
3650 b[3] = 0;
3651 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
3652 I915_GEM_DOMAIN_RENDER << 16 |
3653 I915_GEM_DOMAIN_RENDER |
3654 KGEM_RELOC_FENCED,
3655 0);
3656 b[5] = pixel;
3657 b[6] = pixel;
3658 b[7] = 0;
3659 b[8] = 0;
3660 kgem->nbatch += 9;
3661 }
3662 assert(kgem->nbatch < kgem->surface);
3663 assert(kgem_check_batch(kgem, 3));
3664 }
3665 } while (nbox);
3666
3667 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
3668 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
3669 _kgem_submit(kgem);
3670 }
3671
3672 return true;
3673 }
3674
sna_blt_copy_boxes(struct sna * sna,uint8_t alu,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,int bpp,const BoxRec * box,int nbox)3675 bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
3676 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
3677 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
3678 int bpp, const BoxRec *box, int nbox)
3679 {
3680 struct kgem *kgem = &sna->kgem;
3681 unsigned src_pitch, br13, cmd;
3682
3683 #if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
3684 return false;
3685 #endif
3686
3687 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
3688 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
3689 src_bo->tiling, dst_bo->tiling,
3690 src_bo->pitch, dst_bo->pitch));
3691 assert(nbox);
3692
3693 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
3694 DBG(("%s: cannot blt to src? %d or dst? %d\n",
3695 __FUNCTION__,
3696 kgem_bo_can_blt(kgem, src_bo),
3697 kgem_bo_can_blt(kgem, dst_bo)));
3698 return false;
3699 }
3700
3701 cmd = XY_SRC_COPY_BLT_CMD;
3702 if (bpp == 32)
3703 cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
3704
3705 src_pitch = src_bo->pitch;
3706 if (kgem->gen >= 040 && src_bo->tiling) {
3707 cmd |= BLT_SRC_TILED;
3708 src_pitch >>= 2;
3709 }
3710 assert(src_pitch <= MAXSHORT);
3711
3712 br13 = dst_bo->pitch;
3713 if (kgem->gen >= 040 && dst_bo->tiling) {
3714 cmd |= BLT_DST_TILED;
3715 br13 >>= 2;
3716 }
3717 assert(br13 <= MAXSHORT);
3718
3719 br13 |= copy_ROP[alu] << 16;
3720 br13 |= sna_br13_color_depth(bpp);
3721
3722 /* Compare first box against a previous fill */
3723 if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
3724 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
3725 if (kgem->gen >= 0100) {
3726 if (kgem->nbatch >= 7 &&
3727 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
3728 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3729 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3730 DBG(("%s: deleting last fill\n", __FUNCTION__));
3731 kgem->nbatch -= 7;
3732 kgem->nreloc--;
3733 }
3734 } else {
3735 if (kgem->nbatch >= 6 &&
3736 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_DST_TILED | BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
3737 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
3738 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
3739 DBG(("%s: deleting last fill\n", __FUNCTION__));
3740 kgem->nbatch -= 6;
3741 kgem->nreloc--;
3742 }
3743 }
3744 }
3745
3746 kgem_set_mode(kgem, KGEM_BLT, dst_bo);
3747 if (!kgem_check_batch(kgem, 10) ||
3748 !kgem_check_reloc(kgem, 2) ||
3749 !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3750 kgem_submit(kgem);
3751 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
3752 DBG(("%s: not enough room in aperture, fallback to tiling copy\n", __FUNCTION__));
3753 return sna_tiling_blt_copy_boxes(sna, alu,
3754 src_bo, src_dx, src_dy,
3755 dst_bo, dst_dx, dst_dy,
3756 bpp, box, nbox);
3757 }
3758 _kgem_set_mode(kgem, KGEM_BLT);
3759 }
3760 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3761
3762 if ((dst_dx | dst_dy) == 0) {
3763 if (kgem->gen >= 0100) {
3764 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
3765 do {
3766 int nbox_this_time, rem;
3767
3768 nbox_this_time = nbox;
3769 rem = kgem_batch_space(kgem);
3770 if (10*nbox_this_time > rem)
3771 nbox_this_time = rem / 10;
3772 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3773 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3774 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3775 __FUNCTION__, nbox_this_time, nbox, rem));
3776 assert(nbox_this_time > 0);
3777 nbox -= nbox_this_time;
3778
3779 assert(sna->kgem.mode == KGEM_BLT);
3780 do {
3781 uint32_t *b = kgem->batch + kgem->nbatch;
3782
3783 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
3784 __FUNCTION__,
3785 box->x1, box->y1,
3786 box->x2 - box->x1, box->y2 - box->y1));
3787
3788 assert(box->x1 + src_dx >= 0);
3789 assert(box->y1 + src_dy >= 0);
3790 assert(box->x1 + src_dx <= INT16_MAX);
3791 assert(box->y1 + src_dy <= INT16_MAX);
3792
3793 assert(box->x1 >= 0);
3794 assert(box->y1 >= 0);
3795
3796 *(uint64_t *)&b[0] = hdr;
3797 *(uint64_t *)&b[2] = *(const uint64_t *)box;
3798 *(uint64_t *)(b+4) =
3799 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3800 I915_GEM_DOMAIN_RENDER << 16 |
3801 I915_GEM_DOMAIN_RENDER |
3802 KGEM_RELOC_FENCED,
3803 0);
3804 b[6] = add2(b[2], src_dx, src_dy);
3805 b[7] = src_pitch;
3806 *(uint64_t *)(b+8) =
3807 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3808 I915_GEM_DOMAIN_RENDER << 16 |
3809 KGEM_RELOC_FENCED,
3810 0);
3811 kgem->nbatch += 10;
3812 assert(kgem->nbatch < kgem->surface);
3813 box++;
3814 } while (--nbox_this_time);
3815
3816 if (!nbox)
3817 break;
3818
3819 _kgem_submit(kgem);
3820 _kgem_set_mode(kgem, KGEM_BLT);
3821 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3822 } while (1);
3823 } else {
3824 uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
3825 do {
3826 int nbox_this_time, rem;
3827
3828 nbox_this_time = nbox;
3829 rem = kgem_batch_space(kgem);
3830 if (8*nbox_this_time > rem)
3831 nbox_this_time = rem / 8;
3832 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3833 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3834 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3835 __FUNCTION__, nbox_this_time, nbox, rem));
3836 assert(nbox_this_time > 0);
3837 nbox -= nbox_this_time;
3838
3839 assert(sna->kgem.mode == KGEM_BLT);
3840 do {
3841 uint32_t *b = kgem->batch + kgem->nbatch;
3842
3843 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
3844 __FUNCTION__,
3845 box->x1, box->y1,
3846 box->x2 - box->x1, box->y2 - box->y1));
3847
3848 assert(box->x1 + src_dx >= 0);
3849 assert(box->y1 + src_dy >= 0);
3850 assert(box->x1 + src_dx <= INT16_MAX);
3851 assert(box->y1 + src_dy <= INT16_MAX);
3852
3853 assert(box->x1 >= 0);
3854 assert(box->y1 >= 0);
3855
3856 *(uint64_t *)&b[0] = hdr;
3857 *(uint64_t *)&b[2] = *(const uint64_t *)box;
3858 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3859 I915_GEM_DOMAIN_RENDER << 16 |
3860 I915_GEM_DOMAIN_RENDER |
3861 KGEM_RELOC_FENCED,
3862 0);
3863 b[5] = add2(b[2], src_dx, src_dy);
3864 b[6] = src_pitch;
3865 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3866 I915_GEM_DOMAIN_RENDER << 16 |
3867 KGEM_RELOC_FENCED,
3868 0);
3869 kgem->nbatch += 8;
3870 assert(kgem->nbatch < kgem->surface);
3871 box++;
3872 } while (--nbox_this_time);
3873
3874 if (!nbox)
3875 break;
3876
3877 _kgem_submit(kgem);
3878 _kgem_set_mode(kgem, KGEM_BLT);
3879 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3880 } while (1);
3881 }
3882 } else {
3883 if (kgem->gen >= 0100) {
3884 cmd |= 8;
3885 do {
3886 int nbox_this_time, rem;
3887
3888 nbox_this_time = nbox;
3889 rem = kgem_batch_space(kgem);
3890 if (10*nbox_this_time > rem)
3891 nbox_this_time = rem / 10;
3892 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3893 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3894 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3895 __FUNCTION__, nbox_this_time, nbox, rem));
3896 assert(nbox_this_time > 0);
3897 nbox -= nbox_this_time;
3898
3899 assert(sna->kgem.mode == KGEM_BLT);
3900 do {
3901 uint32_t *b = kgem->batch + kgem->nbatch;
3902
3903 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
3904 __FUNCTION__,
3905 box->x1, box->y1,
3906 box->x2 - box->x1, box->y2 - box->y1));
3907
3908 assert(box->x1 + src_dx >= 0);
3909 assert(box->y1 + src_dy >= 0);
3910
3911 assert(box->x1 + dst_dx >= 0);
3912 assert(box->y1 + dst_dy >= 0);
3913
3914 b[0] = cmd;
3915 b[1] = br13;
3916 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3917 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3918 *(uint64_t *)(b+4) =
3919 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
3920 I915_GEM_DOMAIN_RENDER << 16 |
3921 I915_GEM_DOMAIN_RENDER |
3922 KGEM_RELOC_FENCED,
3923 0);
3924 b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3925 b[7] = src_pitch;
3926 *(uint64_t *)(b+8) =
3927 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
3928 I915_GEM_DOMAIN_RENDER << 16 |
3929 KGEM_RELOC_FENCED,
3930 0);
3931 kgem->nbatch += 10;
3932 assert(kgem->nbatch < kgem->surface);
3933 box++;
3934 } while (--nbox_this_time);
3935
3936 if (!nbox)
3937 break;
3938
3939 _kgem_submit(kgem);
3940 _kgem_set_mode(kgem, KGEM_BLT);
3941 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
3942 } while (1);
3943 } else {
3944 cmd |= 6;
3945 do {
3946 int nbox_this_time, rem;
3947
3948 nbox_this_time = nbox;
3949 rem = kgem_batch_space(kgem);
3950 if (8*nbox_this_time > rem)
3951 nbox_this_time = rem / 8;
3952 if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
3953 nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
3954 DBG(("%s: emitting %d boxes out of %d (batch space %d)\n",
3955 __FUNCTION__, nbox_this_time, nbox, rem));
3956 assert(nbox_this_time > 0);
3957 nbox -= nbox_this_time;
3958
3959 assert(sna->kgem.mode == KGEM_BLT);
3960 do {
3961 uint32_t *b = kgem->batch + kgem->nbatch;
3962
3963 DBG((" %s: box=(%d, %d)x(%d, %d)\n",
3964 __FUNCTION__,
3965 box->x1, box->y1,
3966 box->x2 - box->x1, box->y2 - box->y1));
3967
3968 assert(box->x1 + src_dx >= 0);
3969 assert(box->y1 + src_dy >= 0);
3970
3971 assert(box->x1 + dst_dx >= 0);
3972 assert(box->y1 + dst_dy >= 0);
3973
3974 b[0] = cmd;
3975 b[1] = br13;
3976 b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
3977 b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
3978 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
3979 I915_GEM_DOMAIN_RENDER << 16 |
3980 I915_GEM_DOMAIN_RENDER |
3981 KGEM_RELOC_FENCED,
3982 0);
3983 b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
3984 b[6] = src_pitch;
3985 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
3986 I915_GEM_DOMAIN_RENDER << 16 |
3987 KGEM_RELOC_FENCED,
3988 0);
3989 kgem->nbatch += 8;
3990 assert(kgem->nbatch < kgem->surface);
3991 box++;
3992 } while (--nbox_this_time);
3993
3994 if (!nbox)
3995 break;
3996
3997 _kgem_submit(kgem);
3998 _kgem_set_mode(kgem, KGEM_BLT);
3999 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4000 } while (1);
4001 }
4002 }
4003
4004 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4005 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4006 _kgem_submit(kgem);
4007 } else if (kgem->gen >= 060 && src_bo == dst_bo && kgem_check_batch(kgem, 3)) {
4008 uint32_t *b = kgem->batch + kgem->nbatch;
4009 assert(sna->kgem.mode == KGEM_BLT);
4010 b[0] = XY_SETUP_CLIP;
4011 b[1] = b[2] = 0;
4012 kgem->nbatch += 3;
4013 assert(kgem->nbatch < kgem->surface);
4014 }
4015
4016 sna->blt_state.fill_bo = 0;
4017 return true;
4018 }
4019
sna_blt_copy_boxes__with_alpha(struct sna * sna,uint8_t alu,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,int bpp,int alpha_fixup,const BoxRec * box,int nbox)4020 bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
4021 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4022 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4023 int bpp, int alpha_fixup,
4024 const BoxRec *box, int nbox)
4025 {
4026 struct kgem *kgem = &sna->kgem;
4027 unsigned src_pitch, br13, cmd;
4028
4029 #if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
4030 return false;
4031 #endif
4032
4033 DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
4034 __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
4035 src_bo->tiling, dst_bo->tiling,
4036 src_bo->pitch, dst_bo->pitch));
4037
4038 if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
4039 DBG(("%s: cannot blt to src? %d or dst? %d\n",
4040 __FUNCTION__,
4041 kgem_bo_can_blt(kgem, src_bo),
4042 kgem_bo_can_blt(kgem, dst_bo)));
4043 return false;
4044 }
4045
4046 cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
4047 src_pitch = src_bo->pitch;
4048 if (kgem->gen >= 040 && src_bo->tiling) {
4049 cmd |= BLT_SRC_TILED;
4050 src_pitch >>= 2;
4051 }
4052 assert(src_pitch <= MAXSHORT);
4053
4054 br13 = dst_bo->pitch;
4055 if (kgem->gen >= 040 && dst_bo->tiling) {
4056 cmd |= BLT_DST_TILED;
4057 br13 >>= 2;
4058 }
4059 assert(br13 <= MAXSHORT);
4060
4061 br13 |= copy_ROP[alu] << 16;
4062 br13 |= sna_br13_color_depth(bpp);
4063
4064 kgem_set_mode(kgem, KGEM_BLT, dst_bo);
4065 if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
4066 DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
4067 return false;
4068 }
4069
4070 /* Compare first box against a previous fill */
4071 if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
4072 kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
4073 if (kgem->gen >= 0100) {
4074 if (kgem->nbatch >= 7 &&
4075 kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
4076 kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4077 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4078 DBG(("%s: deleting last fill\n", __FUNCTION__));
4079 kgem->nbatch -= 7;
4080 kgem->nreloc--;
4081 }
4082 } else {
4083 if (kgem->nbatch >= 6 &&
4084 kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
4085 kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
4086 kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
4087 DBG(("%s: deleting last fill\n", __FUNCTION__));
4088 kgem->nbatch -= 6;
4089 kgem->nreloc--;
4090 }
4091 }
4092 }
4093
4094 while (nbox--) {
4095 uint32_t *b;
4096
4097 if (!kgem_check_batch(kgem, 14) ||
4098 !kgem_check_reloc(kgem, 2)) {
4099 _kgem_submit(kgem);
4100 _kgem_set_mode(kgem, KGEM_BLT);
4101 kgem_bcs_set_tiling(&sna->kgem, src_bo, dst_bo);
4102 }
4103
4104 assert(sna->kgem.mode == KGEM_BLT);
4105 b = kgem->batch + kgem->nbatch;
4106 b[0] = cmd;
4107 b[1] = br13;
4108 b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
4109 b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
4110 if (sna->kgem.gen >= 0100) {
4111 *(uint64_t *)(b+4) =
4112 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
4113 I915_GEM_DOMAIN_RENDER << 16 |
4114 I915_GEM_DOMAIN_RENDER |
4115 KGEM_RELOC_FENCED,
4116 0);
4117 b[6] = src_pitch;
4118 b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4119 *(uint64_t *)(b+8) =
4120 kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
4121 I915_GEM_DOMAIN_RENDER << 16 |
4122 KGEM_RELOC_FENCED,
4123 0);
4124 b[10] = alpha_fixup;
4125 b[11] = alpha_fixup;
4126 b[12] = 0;
4127 b[13] = 0;
4128 kgem->nbatch += 14;
4129 } else {
4130 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
4131 I915_GEM_DOMAIN_RENDER << 16 |
4132 I915_GEM_DOMAIN_RENDER |
4133 KGEM_RELOC_FENCED,
4134 0);
4135 b[5] = src_pitch;
4136 b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
4137 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
4138 I915_GEM_DOMAIN_RENDER << 16 |
4139 KGEM_RELOC_FENCED,
4140 0);
4141 b[8] = alpha_fixup;
4142 b[9] = alpha_fixup;
4143 b[10] = 0;
4144 b[11] = 0;
4145 kgem->nbatch += 12;
4146 }
4147 assert(kgem->nbatch < kgem->surface);
4148 box++;
4149 }
4150
4151 if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
4152 DBG(("%s: flushing BLT operation on empty ring\n", __FUNCTION__));
4153 _kgem_submit(kgem);
4154 }
4155
4156 sna->blt_state.fill_bo = 0;
4157 return true;
4158 }
4159
box_extents(const BoxRec * box,int n,BoxRec * extents)4160 static void box_extents(const BoxRec *box, int n, BoxRec *extents)
4161 {
4162 *extents = *box;
4163 while (--n) {
4164 box++;
4165 if (box->x1 < extents->x1)
4166 extents->x1 = box->x1;
4167 if (box->y1 < extents->y1)
4168 extents->y1 = box->y1;
4169
4170 if (box->x2 > extents->x2)
4171 extents->x2 = box->x2;
4172 if (box->y2 > extents->y2)
4173 extents->y2 = box->y2;
4174 }
4175 }
4176
sna_blt_copy_boxes_fallback(struct sna * sna,uint8_t alu,const DrawableRec * src,struct kgem_bo * src_bo,int16_t src_dx,int16_t src_dy,const DrawableRec * dst,struct kgem_bo * dst_bo,int16_t dst_dx,int16_t dst_dy,const BoxRec * box,int nbox)4177 bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
4178 const DrawableRec *src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4179 const DrawableRec *dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4180 const BoxRec *box, int nbox)
4181 {
4182 struct kgem_bo *free_bo = NULL;
4183 bool ret;
4184
4185 DBG(("%s: alu=%d, n=%d\n", __FUNCTION__, alu, nbox));
4186
4187 if (!sna_blt_compare_depth(src, dst)) {
4188 DBG(("%s: mismatching depths %d -> %d\n",
4189 __FUNCTION__, src->depth, dst->depth));
4190 return false;
4191 }
4192
4193 if (src_bo == dst_bo) {
4194 DBG(("%s: dst == src\n", __FUNCTION__));
4195
4196 if (src_bo->tiling == I915_TILING_Y &&
4197 !sna->kgem.can_blt_y &&
4198 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4199 struct kgem_bo *bo;
4200
4201 DBG(("%s: src is Y-tiled\n", __FUNCTION__));
4202
4203 if (src->type != DRAWABLE_PIXMAP)
4204 return false;
4205
4206 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4207 bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4208 if (bo == NULL) {
4209 BoxRec extents;
4210
4211 DBG(("%s: y-tiling conversion failed\n",
4212 __FUNCTION__));
4213
4214 box_extents(box, nbox, &extents);
4215 free_bo = kgem_create_2d(&sna->kgem,
4216 extents.x2 - extents.x1,
4217 extents.y2 - extents.y1,
4218 src->bitsPerPixel,
4219 I915_TILING_X, 0);
4220 if (free_bo == NULL) {
4221 DBG(("%s: fallback -- temp allocation failed\n",
4222 __FUNCTION__));
4223 return false;
4224 }
4225
4226 if (!sna_blt_copy_boxes(sna, GXcopy,
4227 src_bo, src_dx, src_dy,
4228 free_bo, -extents.x1, -extents.y1,
4229 src->bitsPerPixel,
4230 box, nbox)) {
4231 DBG(("%s: fallback -- temp copy failed\n",
4232 __FUNCTION__));
4233 kgem_bo_destroy(&sna->kgem, free_bo);
4234 return false;
4235 }
4236
4237 src_dx = -extents.x1;
4238 src_dy = -extents.y1;
4239 src_bo = free_bo;
4240 } else
4241 dst_bo = src_bo = bo;
4242 }
4243 } else {
4244 if (src_bo->tiling == I915_TILING_Y &&
4245 !sna->kgem.can_blt_y &&
4246 kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
4247 DBG(("%s: src is y-tiled\n", __FUNCTION__));
4248 if (src->type != DRAWABLE_PIXMAP)
4249 return false;
4250 assert(sna_pixmap((PixmapPtr)src)->gpu_bo == src_bo);
4251 src_bo = sna_pixmap_change_tiling((PixmapPtr)src, I915_TILING_X);
4252 if (src_bo == NULL) {
4253 DBG(("%s: fallback -- src y-tiling conversion failed\n",
4254 __FUNCTION__));
4255 return false;
4256 }
4257 }
4258
4259 if (dst_bo->tiling == I915_TILING_Y &&
4260 !sna->kgem.can_blt_y &&
4261 kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
4262 DBG(("%s: dst is y-tiled\n", __FUNCTION__));
4263 if (dst->type != DRAWABLE_PIXMAP)
4264 return false;
4265 assert(sna_pixmap((PixmapPtr)dst)->gpu_bo == dst_bo);
4266 dst_bo = sna_pixmap_change_tiling((PixmapPtr)dst, I915_TILING_X);
4267 if (dst_bo == NULL) {
4268 DBG(("%s: fallback -- dst y-tiling conversion failed\n",
4269 __FUNCTION__));
4270 return false;
4271 }
4272 }
4273 }
4274
4275 ret = sna_blt_copy_boxes(sna, alu,
4276 src_bo, src_dx, src_dy,
4277 dst_bo, dst_dx, dst_dy,
4278 dst->bitsPerPixel,
4279 box, nbox);
4280
4281 if (free_bo)
4282 kgem_bo_destroy(&sna->kgem, free_bo);
4283
4284 return ret;
4285 }
4286