1 /*
2 * Copyright (c) 2007 David Turner
3 * Copyright (c) 2008 M Joonas Pihlaja
4 * Copyright (c) 2011 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Chris Wilson <chris@chris-wilson.co.uk>
27 *
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include "sna.h"
35 #include "sna_render.h"
36 #include "sna_render_inline.h"
37 #include "sna_trapezoids.h"
38 #include "fb/fbpict.h"
39
40 #include <mipict.h>
41
42 #undef SAMPLES_X
43 #undef SAMPLES_Y
44
45 /* TODO: Emit unantialiased and MSAA triangles. */
46
47 #ifndef MAX
48 #define MAX(x,y) ((x) >= (y) ? (x) : (y))
49 #endif
50
51 #ifndef MIN
52 #define MIN(x,y) ((x) <= (y) ? (x) : (y))
53 #endif
54
55 typedef void (*span_func_t)(struct sna *sna,
56 struct sna_composite_spans_op *op,
57 pixman_region16_t *clip,
58 const BoxRec *box,
59 int coverage);
60
61 #if HAS_DEBUG_FULL
_assert_pixmap_contains_box(PixmapPtr pixmap,BoxPtr box,const char * function)62 static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
63 {
64 if (box->x1 < 0 || box->y1 < 0 ||
65 box->x2 > pixmap->drawable.width ||
66 box->y2 > pixmap->drawable.height)
67 {
68 FatalError("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
69 function,
70 box->x1, box->y1, box->x2, box->y2,
71 pixmap->drawable.width,
72 pixmap->drawable.height);
73 }
74 }
75 #define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
76 #else
77 #define assert_pixmap_contains_box(p, b)
78 #endif
79
apply_damage(struct sna_composite_op * op,RegionPtr region)80 static void apply_damage(struct sna_composite_op *op, RegionPtr region)
81 {
82 DBG(("%s: damage=%p, region=%dx[(%d, %d), (%d, %d)]\n",
83 __FUNCTION__, op->damage,
84 region_num_rects(region),
85 region->extents.x1, region->extents.y1,
86 region->extents.x2, region->extents.y2));
87
88 if (op->damage == NULL)
89 return;
90
91 RegionTranslate(region, op->dst.x, op->dst.y);
92
93 assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
94 sna_damage_add(op->damage, region);
95 }
96
_apply_damage_box(struct sna_composite_op * op,const BoxRec * box)97 static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
98 {
99 BoxRec r;
100
101 r.x1 = box->x1 + op->dst.x;
102 r.x2 = box->x2 + op->dst.x;
103 r.y1 = box->y1 + op->dst.y;
104 r.y2 = box->y2 + op->dst.y;
105
106 assert_pixmap_contains_box(op->dst.pixmap, &r);
107 sna_damage_add_box(op->damage, &r);
108 }
109
apply_damage_box(struct sna_composite_op * op,const BoxRec * box)110 inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
111 {
112 if (op->damage)
113 _apply_damage_box(op, box);
114 }
115
116 #define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
117 _GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_shift)
118
119 #define FAST_SAMPLES_INT(x) ((x) >> (FAST_SAMPLES_shift))
120 #define FAST_SAMPLES_FRAC(x) ((x) & (FAST_SAMPLES_mask))
121
122 #define _GRID_TO_INT_FRAC_shift(t, i, f, b) do { \
123 (f) = FAST_SAMPLES_FRAC(t); \
124 (i) = FAST_SAMPLES_INT(t); \
125 } while (0)
126
127 #define FAST_SAMPLES_XY (FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */
128 #define AREA_TO_ALPHA(c) ((c) / (float)FAST_SAMPLES_XY)
129
130 struct quorem {
131 int32_t quo;
132 int64_t rem;
133 };
134
135 struct edge {
136 struct edge *next, *prev;
137
138 int dir;
139 int cell;
140 int height_left;
141
142 struct quorem x;
143
144 /* Advance of the current x when moving down a subsample line. */
145 struct quorem dxdy;
146 int64_t dy;
147
148 /* The clipped y of the top of the edge. */
149 int ytop;
150
151 /* y2-y1 after orienting the edge downwards. */
152 };
153
154 /* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
155 #define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y
156 #define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
157
158 /* A collection of sorted and vertically clipped edges of the polygon.
159 * Edges are moved from the polygon to an active list while scan
160 * converting. */
161 struct polygon {
162 /* The vertical clip extents. */
163 int ymin, ymax;
164
165 /* Array of edges all starting in the same bucket. An edge is put
166 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
167 * it is added to the polygon. */
168 struct edge **y_buckets;
169 struct edge *y_buckets_embedded[64];
170
171 struct edge edges_embedded[32];
172 struct edge *edges;
173 int num_edges;
174 };
175
176 /* A cell records the effect on pixel coverage of polygon edges
177 * passing through a pixel. It contains two accumulators of pixel
178 * coverage.
179 *
180 * Consider the effects of a polygon edge on the coverage of a pixel
181 * it intersects and that of the following one. The coverage of the
182 * following pixel is the height of the edge multiplied by the width
183 * of the pixel, and the coverage of the pixel itself is the area of
184 * the trapezoid formed by the edge and the right side of the pixel.
185 *
186 * +-----------------------+-----------------------+
187 * | | |
188 * | | |
189 * |_______________________|_______________________|
190 * | \...................|.......................|\
191 * | \..................|.......................| |
192 * | \.................|.......................| |
193 * | \....covered.....|.......................| |
194 * | \....area.......|.......................| } covered height
195 * | \..............|.......................| |
196 * |uncovered\.............|.......................| |
197 * | area \............|.......................| |
198 * |___________\...........|.......................|/
199 * | | |
200 * | | |
201 * | | |
202 * +-----------------------+-----------------------+
203 *
204 * Since the coverage of the following pixel will always be a multiple
205 * of the width of the pixel, we can store the height of the covered
206 * area instead. The coverage of the pixel itself is the total
207 * coverage minus the area of the uncovered area to the left of the
208 * edge. As it's faster to compute the uncovered area we only store
209 * that and subtract it from the total coverage later when forming
210 * spans to blit.
211 *
212 * The heights and areas are signed, with left edges of the polygon
213 * having positive sign and right edges having negative sign. When
214 * two edges intersect they swap their left/rightness so their
215 * contribution above and below the intersection point must be
216 * computed separately. */
217 struct cell {
218 struct cell *next;
219 int x;
220 int16_t uncovered_area;
221 int16_t covered_height;
222 };
223
224 /* A cell list represents the scan line sparsely as cells ordered by
225 * ascending x. It is geared towards scanning the cells in order
226 * using an internal cursor. */
227 struct cell_list {
228 struct cell *cursor;
229
230 /* Points to the left-most cell in the scan line. */
231 struct cell head, tail;
232
233 int16_t x1, x2;
234 int16_t count, size;
235 struct cell *cells;
236 struct cell embedded[256];
237 };
238
239 /* The active list contains edges in the current scan line ordered by
240 * the x-coordinate of the intercept of the edge and the scan line. */
241 struct active_list {
242 /* Leftmost edge on the current scan line. */
243 struct edge head, tail;
244 };
245
246 struct tor {
247 struct polygon polygon[1];
248 struct active_list active[1];
249 struct cell_list coverages[1];
250
251 BoxRec extents;
252 };
253
254 /* Compute the floored division a/b. Assumes / and % perform symmetric
255 * division. */
256 /* Rewinds the cell list's cursor to the beginning. After rewinding
257 * we're good to cell_list_find() the cell any x coordinate. */
258 inline static void
cell_list_rewind(struct cell_list * cells)259 cell_list_rewind(struct cell_list *cells)
260 {
261 cells->cursor = &cells->head;
262 }
263
264 static bool
cell_list_init(struct cell_list * cells,int x1,int x2)265 cell_list_init(struct cell_list *cells, int x1, int x2)
266 {
267 cells->tail.next = NULL;
268 cells->tail.x = INT_MAX;
269 cells->head.x = INT_MIN;
270 cells->head.next = &cells->tail;
271 cells->head.covered_height = 0;
272 cell_list_rewind(cells);
273 cells->count = 0;
274 cells->x1 = x1;
275 cells->x2 = x2;
276 cells->size = x2 - x1 + 1;
277 cells->cells = cells->embedded;
278 if (cells->size > ARRAY_SIZE(cells->embedded))
279 cells->cells = malloc(cells->size * sizeof(struct cell));
280 return cells->cells != NULL;
281 }
282
283 static void
cell_list_fini(struct cell_list * cells)284 cell_list_fini(struct cell_list *cells)
285 {
286 if (cells->cells != cells->embedded)
287 free(cells->cells);
288 }
289
290 inline static void
cell_list_reset(struct cell_list * cells)291 cell_list_reset(struct cell_list *cells)
292 {
293 cell_list_rewind(cells);
294 cells->head.next = &cells->tail;
295 cells->head.covered_height = 0;
296 cells->count = 0;
297 }
298
299 inline static struct cell *
cell_list_alloc(struct cell_list * cells,struct cell * tail,int x)300 cell_list_alloc(struct cell_list *cells,
301 struct cell *tail,
302 int x)
303 {
304 struct cell *cell;
305
306 assert(cells->count < cells->size);
307 cell = cells->cells + cells->count++;
308 cell->next = tail->next;
309 tail->next = cell;
310
311 cell->x = x;
312 cell->covered_height = 0;
313 cell->uncovered_area = 0;
314 return cell;
315 }
316
317 /* Find a cell at the given x-coordinate. Returns %NULL if a new cell
318 * needed to be allocated but couldn't be. Cells must be found with
319 * non-decreasing x-coordinate until the cell list is rewound using
320 * cell_list_rewind(). Ownership of the returned cell is retained by
321 * the cell list. */
322 inline static struct cell *
cell_list_find(struct cell_list * cells,int x)323 cell_list_find(struct cell_list *cells, int x)
324 {
325 struct cell *tail;
326
327 if (x >= cells->x2)
328 return &cells->tail;
329
330 if (x < cells->x1)
331 return &cells->head;
332
333 tail = cells->cursor;
334 if (tail->x == x)
335 return tail;
336
337 do {
338 if (tail->next->x > x)
339 break;
340
341 tail = tail->next;
342 if (tail->next->x > x)
343 break;
344
345 tail = tail->next;
346 if (tail->next->x > x)
347 break;
348
349 tail = tail->next;
350 } while (1);
351
352 if (tail->x != x)
353 tail = cell_list_alloc(cells, tail, x);
354
355 return cells->cursor = tail;
356 }
357
358 /* Add a subpixel span covering [x1, x2) to the coverage cells. */
359 inline static void
cell_list_add_subspan(struct cell_list * cells,int x1,int x2)360 cell_list_add_subspan(struct cell_list *cells, int x1, int x2)
361 {
362 struct cell *cell;
363 int ix1, fx1;
364 int ix2, fx2;
365
366 if (x1 == x2)
367 return;
368
369 FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
370 FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
371
372 __DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
373 x1, ix1, fx1, x2, ix2, fx2));
374
375 cell = cell_list_find(cells, ix1);
376 if (ix1 != ix2) {
377 cell->uncovered_area += fx1;
378 ++cell->covered_height;
379
380 cell = cell_list_find(cells, ix2);
381 cell->uncovered_area -= fx2;
382 --cell->covered_height;
383 } else
384 cell->uncovered_area += (fx1-fx2);
385 }
386
387 inline static void
cell_list_add_span(struct cell_list * cells,int x1,int x2)388 cell_list_add_span(struct cell_list *cells, int x1, int x2)
389 {
390 struct cell *cell;
391 int ix1, fx1;
392 int ix2, fx2;
393
394 FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
395 FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
396
397 __DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
398 x1, ix1, fx1, x2, ix2, fx2));
399
400 cell = cell_list_find(cells, ix1);
401 if (ix1 != ix2) {
402 cell->uncovered_area += fx1*FAST_SAMPLES_Y;
403 cell->covered_height += FAST_SAMPLES_Y;
404
405 cell = cell_list_find(cells, ix2);
406 cell->uncovered_area -= fx2*FAST_SAMPLES_Y;
407 cell->covered_height -= FAST_SAMPLES_Y;
408 } else
409 cell->uncovered_area += (fx1-fx2)*FAST_SAMPLES_Y;
410 }
411
412 static void
polygon_fini(struct polygon * polygon)413 polygon_fini(struct polygon *polygon)
414 {
415 if (polygon->y_buckets != polygon->y_buckets_embedded)
416 free(polygon->y_buckets);
417
418 if (polygon->edges != polygon->edges_embedded)
419 free(polygon->edges);
420 }
421
422 static bool
polygon_init(struct polygon * polygon,int num_edges,int ymin,int ymax)423 polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
424 {
425 unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
426
427 if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
428 return false;
429
430 polygon->edges = polygon->edges_embedded;
431 polygon->y_buckets = polygon->y_buckets_embedded;
432
433 polygon->num_edges = 0;
434 if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
435 polygon->edges = malloc(sizeof(struct edge)*num_edges);
436 if (unlikely(NULL == polygon->edges))
437 goto bail_no_mem;
438 }
439
440 if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
441 polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
442 if (unlikely(NULL == polygon->y_buckets))
443 goto bail_no_mem;
444 }
445 memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
446 polygon->y_buckets[num_buckets] = (void *)-1;
447
448 polygon->ymin = ymin;
449 polygon->ymax = ymax;
450 return true;
451
452 bail_no_mem:
453 polygon_fini(polygon);
454 return false;
455 }
456
457 static void
_polygon_insert_edge_into_its_y_bucket(struct polygon * polygon,struct edge * e)458 _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
459 {
460 unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
461 struct edge **ptail = &polygon->y_buckets[ix];
462 assert(e->ytop < polygon->ymax);
463 e->next = *ptail;
464 *ptail = e;
465 }
466
467 inline static void
polygon_add_edge(struct polygon * polygon,const xTrapezoid * t,const xLineFixed * edge,int dir,int dx,int dy)468 polygon_add_edge(struct polygon *polygon,
469 const xTrapezoid *t,
470 const xLineFixed *edge,
471 int dir, int dx, int dy)
472 {
473 struct edge *e = &polygon->edges[polygon->num_edges];
474 int ytop, ybot;
475
476 assert(t->bottom > t->top);
477 assert(edge->p2.y > edge->p1.y);
478
479 e->dir = dir;
480
481 ytop = pixman_fixed_to_fast(t->top) + dy;
482 if (ytop < polygon->ymin)
483 ytop = polygon->ymin;
484
485 ybot = pixman_fixed_to_fast(t->bottom) + dy;
486 if (ybot > polygon->ymax)
487 ybot = polygon->ymax;
488
489 e->ytop = ytop;
490 e->height_left = ybot - ytop;
491 if (e->height_left <= 0)
492 return;
493
494 if (pixman_fixed_to_fast(edge->p1.x) == pixman_fixed_to_fast(edge->p2.x)) {
495 e->cell = e->x.quo = pixman_fixed_to_fast(edge->p1.x) + dx;
496 e->x.rem = 0;
497 e->dxdy.quo = 0;
498 e->dxdy.rem = 0;
499 e->dy = 0;
500 } else {
501 int64_t Ex, Ey, tmp;
502
503 Ex = ((int64_t)edge->p2.x - edge->p1.x) * FAST_SAMPLES_X;
504 Ey = ((int64_t)edge->p2.y - edge->p1.y) * FAST_SAMPLES_Y * (2 << 16);
505 assert(Ey > 0);
506
507 e->dxdy.quo = Ex * (2 << 16) / Ey;
508 e->dxdy.rem = Ex * (2 << 16) % Ey;
509
510 tmp = (int64_t)(2 * (ytop - dy) + 1) << 16;
511 tmp -= (int64_t)edge->p1.y * FAST_SAMPLES_Y * 2;
512 tmp *= Ex;
513 e->x.quo = tmp / Ey;
514 e->x.rem = tmp % Ey;
515
516 tmp = (int64_t)edge->p1.x * FAST_SAMPLES_X;
517 e->x.quo += tmp / (1 << 16) + dx;
518 tmp &= (1 << 16) - 1;
519 if (tmp) {
520 if (Ey < INT64_MAX >> 16)
521 tmp = (tmp * Ey) / (1 << 16);
522 else /* Handle overflow by losing precision */
523 tmp = tmp * (Ey / (1 << 16));
524 e->x.rem += tmp;
525 }
526
527 if (e->x.rem < 0) {
528 --e->x.quo;
529 e->x.rem += Ey;
530 } else if (e->x.rem >= Ey) {
531 ++e->x.quo;
532 e->x.rem -= Ey;
533 }
534 assert(e->x.rem >= 0 && e->x.rem < Ey);
535
536 e->cell = e->x.quo + (e->x.rem >= Ey/2);
537 e->dy = Ey;
538 }
539
540 _polygon_insert_edge_into_its_y_bucket(polygon, e);
541 polygon->num_edges++;
542 }
543
544 inline static void
polygon_add_line(struct polygon * polygon,const xPointFixed * p1,const xPointFixed * p2,int dx,int dy)545 polygon_add_line(struct polygon *polygon,
546 const xPointFixed *p1,
547 const xPointFixed *p2,
548 int dx, int dy)
549 {
550 struct edge *e = &polygon->edges[polygon->num_edges];
551 int top, bot;
552
553 if (p1->y == p2->y)
554 return;
555
556 __DBG(("%s: line=(%d, %d), (%d, %d)\n",
557 __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
558
559 e->dir = 1;
560 if (p2->y < p1->y) {
561 const xPointFixed *t;
562
563 e->dir = -1;
564
565 t = p1;
566 p1 = p2;
567 p2 = t;
568 }
569
570 top = pixman_fixed_to_fast(p1->y) + dy;
571 if (top < polygon->ymin)
572 top = polygon->ymin;
573
574 bot = pixman_fixed_to_fast(p2->y) + dy;
575 if (bot > polygon->ymax)
576 bot = polygon->ymax;
577
578 if (bot <= top)
579 return;
580
581 e->ytop = top;
582 e->height_left = bot - top;
583 if (e->height_left <= 0)
584 return;
585
586 __DBG(("%s: edge height=%d\n", __FUNCTION__, e->dir * e->height_left));
587
588 if (pixman_fixed_to_fast(p1->x) == pixman_fixed_to_fast(p2->x)) {
589 e->cell = e->x.quo = pixman_fixed_to_fast(p1->x) + dx;
590 e->x.rem = 0;
591 e->dxdy.quo = 0;
592 e->dxdy.rem = 0;
593 e->dy = 0;
594 } else {
595 int64_t Ex, Ey, tmp;
596
597 Ex = ((int64_t)p2->x - p1->x) * FAST_SAMPLES_X;
598 Ey = ((int64_t)p2->y - p1->y) * FAST_SAMPLES_Y * (2 << 16);
599
600 e->dxdy.quo = Ex * (2 << 16) / Ey;
601 e->dxdy.rem = Ex * (2 << 16) % Ey;
602
603 tmp = (int64_t)(2 * (top - dy) + 1) << 16;
604 tmp -= (int64_t)p1->y * FAST_SAMPLES_Y * 2;
605 tmp *= Ex;
606 e->x.quo = tmp / Ey;
607 e->x.rem = tmp % Ey;
608
609 tmp = (int64_t)p1->x * FAST_SAMPLES_X;
610 e->x.quo += tmp / (1 << 16) + dx;
611 e->x.rem += ((tmp & ((1 << 16) - 1)) * Ey) / (1 << 16);
612
613 if (e->x.rem < 0) {
614 --e->x.quo;
615 e->x.rem += Ey;
616 } else if (e->x.rem >= Ey) {
617 ++e->x.quo;
618 e->x.rem -= Ey;
619 }
620
621 e->cell = e->x.quo + (e->x.rem >= Ey/2);
622 e->dy = Ey;
623 }
624
625 if (polygon->num_edges > 0) {
626 struct edge *prev = &polygon->edges[polygon->num_edges-1];
627 /* detect degenerate triangles inserted into tristrips */
628 if (e->dir == -prev->dir &&
629 e->ytop == prev->ytop &&
630 e->height_left == prev->height_left &&
631 e->x.quo == prev->x.quo &&
632 e->x.rem == prev->x.rem &&
633 e->dxdy.quo == prev->dxdy.quo &&
634 e->dxdy.rem == prev->dxdy.rem) {
635 unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
636 polygon->ymin);
637 polygon->y_buckets[ix] = prev->next;
638 polygon->num_edges--;
639 return;
640 }
641 }
642
643 _polygon_insert_edge_into_its_y_bucket(polygon, e);
644 polygon->num_edges++;
645 }
646
647 static void
active_list_reset(struct active_list * active)648 active_list_reset(struct active_list *active)
649 {
650 active->head.height_left = INT_MAX;
651 active->head.cell = INT_MIN;
652 active->head.dy = 0;
653 active->head.prev = NULL;
654 active->head.next = &active->tail;
655 active->tail.prev = &active->head;
656 active->tail.next = NULL;
657 active->tail.cell = INT_MAX;
658 active->tail.height_left = INT_MAX;
659 active->tail.dy = 0;
660 }
661
662 static struct edge *
merge_sorted_edges(struct edge * head_a,struct edge * head_b)663 merge_sorted_edges(struct edge *head_a, struct edge *head_b)
664 {
665 struct edge *head, **next, *prev;
666 int32_t x;
667
668 if (head_b == NULL)
669 return head_a;
670
671 prev = head_a->prev;
672 next = &head;
673 if (head_a->cell <= head_b->cell) {
674 head = head_a;
675 } else {
676 head = head_b;
677 head_b->prev = prev;
678 goto start_with_b;
679 }
680
681 do {
682 x = head_b->cell;
683 while (head_a != NULL && head_a->cell <= x) {
684 prev = head_a;
685 next = &head_a->next;
686 head_a = head_a->next;
687 }
688
689 head_b->prev = prev;
690 *next = head_b;
691 if (head_a == NULL)
692 return head;
693
694 start_with_b:
695 x = head_a->cell;
696 while (head_b != NULL && head_b->cell <= x) {
697 prev = head_b;
698 next = &head_b->next;
699 head_b = head_b->next;
700 }
701
702 head_a->prev = prev;
703 *next = head_a;
704 if (head_b == NULL)
705 return head;
706 } while (1);
707 }
708
709 static struct edge *
sort_edges(struct edge * list,unsigned int level,struct edge ** head_out)710 sort_edges(struct edge *list,
711 unsigned int level,
712 struct edge **head_out)
713 {
714 struct edge *head_other, *remaining;
715 unsigned int i;
716
717 head_other = list->next;
718 if (head_other == NULL) {
719 *head_out = list;
720 return NULL;
721 }
722
723 remaining = head_other->next;
724 if (list->cell <= head_other->cell) {
725 *head_out = list;
726 head_other->next = NULL;
727 } else {
728 *head_out = head_other;
729 head_other->prev = list->prev;
730 head_other->next = list;
731 list->prev = head_other;
732 list->next = NULL;
733 }
734
735 for (i = 0; i < level && remaining; i++) {
736 remaining = sort_edges(remaining, i, &head_other);
737 *head_out = merge_sorted_edges(*head_out, head_other);
738 }
739
740 return remaining;
741 }
742
filter(struct edge * edges)743 static struct edge *filter(struct edge *edges)
744 {
745 struct edge *e;
746
747 e = edges;
748 while (e->next) {
749 struct edge *n = e->next;
750 if (e->dir == -n->dir &&
751 e->height_left == n->height_left &&
752 &e->x.quo == &n->x.quo &&
753 &e->x.rem == &n->x.rem &&
754 &e->dxdy.quo == &n->dxdy.quo &&
755 &e->dxdy.rem == &n->dxdy.rem) {
756 if (e->prev)
757 e->prev->next = n->next;
758 else
759 edges = n->next;
760 if (n->next)
761 n->next->prev = e->prev;
762 else
763 break;
764 e = n->next;
765 } else
766 e = n;
767 }
768
769 return edges;
770 }
771
772 static struct edge *
merge_unsorted_edges(struct edge * head,struct edge * unsorted)773 merge_unsorted_edges(struct edge *head, struct edge *unsorted)
774 {
775 sort_edges(unsorted, UINT_MAX, &unsorted);
776 return merge_sorted_edges(head, filter(unsorted));
777 }
778
779 /* Test if the edges on the active list can be safely advanced by a
780 * full row without intersections or any edges ending. */
781 inline static int
can_full_step(struct active_list * active)782 can_full_step(struct active_list *active)
783 {
784 const struct edge *e;
785 int min_height = INT_MAX;
786
787 assert(active->head.next != &active->tail);
788 for (e = active->head.next; &active->tail != e; e = e->next) {
789 assert(e->height_left > 0);
790
791 if (e->dy != 0)
792 return 0;
793
794 if (e->height_left < min_height) {
795 min_height = e->height_left;
796 if (min_height < FAST_SAMPLES_Y)
797 return 0;
798 }
799 }
800
801 return min_height;
802 }
803
804 inline static void
merge_edges(struct active_list * active,struct edge * edges)805 merge_edges(struct active_list *active, struct edge *edges)
806 {
807 active->head.next = merge_unsorted_edges(active->head.next, edges);
808 }
809
810 inline static int
fill_buckets(struct active_list * active,struct edge * edge,struct edge ** buckets)811 fill_buckets(struct active_list *active,
812 struct edge *edge,
813 struct edge **buckets)
814 {
815 int ymax = 0;
816
817 while (edge) {
818 int y = edge->ytop & (FAST_SAMPLES_Y-1);
819 struct edge *next = edge->next;
820 struct edge **b = &buckets[y];
821 __DBG(("%s: ytop %d -> bucket %d\n", __FUNCTION__,
822 edge->ytop, y));
823 if (*b)
824 (*b)->prev = edge;
825 edge->next = *b;
826 edge->prev = NULL;
827 *b = edge;
828 edge = next;
829 if (y > ymax)
830 ymax = y;
831 }
832
833 return ymax;
834 }
835
836 inline static void
nonzero_subrow(struct active_list * active,struct cell_list * coverages)837 nonzero_subrow(struct active_list *active, struct cell_list *coverages)
838 {
839 struct edge *edge = active->head.next;
840 int prev_x = INT_MIN;
841 int winding = 0, xstart = edge->cell;
842
843 cell_list_rewind(coverages);
844
845 while (&active->tail != edge) {
846 struct edge *next = edge->next;
847
848 winding += edge->dir;
849 if (0 == winding && edge->next->cell != edge->cell) {
850 cell_list_add_subspan(coverages, xstart, edge->cell);
851 xstart = edge->next->cell;
852 }
853
854 assert(edge->height_left > 0);
855 if (--edge->height_left) {
856 if (edge->dy) {
857 edge->x.quo += edge->dxdy.quo;
858 edge->x.rem += edge->dxdy.rem;
859 if (edge->x.rem < 0) {
860 --edge->x.quo;
861 edge->x.rem += edge->dy;
862 } else if (edge->x.rem >= edge->dy) {
863 ++edge->x.quo;
864 edge->x.rem -= edge->dy;
865 }
866 edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
867 }
868
869 if (edge->cell < prev_x) {
870 struct edge *pos = edge->prev;
871 pos->next = next;
872 next->prev = pos;
873 do
874 pos = pos->prev;
875 while (edge->cell < pos->cell);
876 pos->next->prev = edge;
877 edge->next = pos->next;
878 edge->prev = pos;
879 pos->next = edge;
880 } else
881 prev_x = edge->cell;
882 } else {
883 edge->prev->next = next;
884 next->prev = edge->prev;
885 }
886
887 edge = next;
888 }
889 }
890
891 static void
nonzero_row(struct active_list * active,struct cell_list * coverages)892 nonzero_row(struct active_list *active, struct cell_list *coverages)
893 {
894 struct edge *left = active->head.next;
895
896 while (&active->tail != left) {
897 struct edge *right;
898 int winding = left->dir;
899
900 left->height_left -= FAST_SAMPLES_Y;
901 assert(left->height_left >= 0);
902 if (!left->height_left) {
903 left->prev->next = left->next;
904 left->next->prev = left->prev;
905 }
906
907 right = left->next;
908 do {
909 right->height_left -= FAST_SAMPLES_Y;
910 assert(right->height_left >= 0);
911 if (!right->height_left) {
912 right->prev->next = right->next;
913 right->next->prev = right->prev;
914 }
915
916 winding += right->dir;
917 if (0 == winding)
918 break;
919
920 right = right->next;
921 } while (1);
922
923 cell_list_add_span(coverages, left->cell, right->cell);
924 left = right->next;
925 }
926 }
927
928 static void
tor_fini(struct tor * converter)929 tor_fini(struct tor *converter)
930 {
931 polygon_fini(converter->polygon);
932 cell_list_fini(converter->coverages);
933 }
934
935 static bool
tor_init(struct tor * converter,const BoxRec * box,int num_edges)936 tor_init(struct tor *converter, const BoxRec *box, int num_edges)
937 {
938 __DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
939 __FUNCTION__,
940 box->x1, box->y1, box->x2, box->y2,
941 FAST_SAMPLES_X, FAST_SAMPLES_Y,
942 num_edges));
943
944 converter->extents = *box;
945
946 if (!cell_list_init(converter->coverages, box->x1, box->x2))
947 return false;
948
949 active_list_reset(converter->active);
950 if (!polygon_init(converter->polygon, num_edges,
951 (int)box->y1 * FAST_SAMPLES_Y,
952 (int)box->y2 * FAST_SAMPLES_Y)) {
953 cell_list_fini(converter->coverages);
954 return false;
955 }
956
957 return true;
958 }
959
960 static void
tor_add_trapezoid(struct tor * tor,const xTrapezoid * t,int dx,int dy)961 tor_add_trapezoid(struct tor *tor,
962 const xTrapezoid *t,
963 int dx, int dy)
964 {
965 if (!xTrapezoidValid(t)) {
966 __DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n",
967 __FUNCTION__,
968 t->top, t->bottom,
969 t->left.p1.x, t->left.p1.y,
970 t->left.p2.x, t->left.p2.y,
971 t->right.p1.x, t->right.p1.y,
972 t->right.p2.x, t->right.p2.y));
973 return;
974 }
975 polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy);
976 polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy);
977 }
978
979 static void
step_edges(struct active_list * active,int count)980 step_edges(struct active_list *active, int count)
981 {
982 struct edge *edge;
983
984 count *= FAST_SAMPLES_Y;
985 for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
986 edge->height_left -= count;
987 assert(edge->height_left >= 0);
988 if (!edge->height_left) {
989 edge->prev->next = edge->next;
990 edge->next->prev = edge->prev;
991 }
992 }
993 }
994
995 static void
tor_blt_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)996 tor_blt_span(struct sna *sna,
997 struct sna_composite_spans_op *op,
998 pixman_region16_t *clip,
999 const BoxRec *box,
1000 int coverage)
1001 {
1002 __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1003
1004 op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1005 apply_damage_box(&op->base, box);
1006 }
1007
1008 static void
tor_blt_span__no_damage(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1009 tor_blt_span__no_damage(struct sna *sna,
1010 struct sna_composite_spans_op *op,
1011 pixman_region16_t *clip,
1012 const BoxRec *box,
1013 int coverage)
1014 {
1015 __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1016
1017 op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1018 }
1019
1020 static void
tor_blt_span_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1021 tor_blt_span_clipped(struct sna *sna,
1022 struct sna_composite_spans_op *op,
1023 pixman_region16_t *clip,
1024 const BoxRec *box,
1025 int coverage)
1026 {
1027 pixman_region16_t region;
1028 float opacity;
1029
1030 opacity = AREA_TO_ALPHA(coverage);
1031 __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1032
1033 pixman_region_init_rects(®ion, box, 1);
1034 RegionIntersect(®ion, ®ion, clip);
1035 if (region_num_rects(®ion)) {
1036 op->boxes(sna, op,
1037 region_rects(®ion),
1038 region_num_rects(®ion),
1039 opacity);
1040 apply_damage(&op->base, ®ion);
1041 }
1042 pixman_region_fini(®ion);
1043 }
1044
1045 static void
tor_blt_span_mono(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1046 tor_blt_span_mono(struct sna *sna,
1047 struct sna_composite_spans_op *op,
1048 pixman_region16_t *clip,
1049 const BoxRec *box,
1050 int coverage)
1051 {
1052 if (coverage < FAST_SAMPLES_XY/2)
1053 return;
1054
1055 tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY);
1056 }
1057
1058 static void
tor_blt_span_mono_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1059 tor_blt_span_mono_clipped(struct sna *sna,
1060 struct sna_composite_spans_op *op,
1061 pixman_region16_t *clip,
1062 const BoxRec *box,
1063 int coverage)
1064 {
1065 if (coverage < FAST_SAMPLES_XY/2)
1066 return;
1067
1068 tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
1069 }
1070
1071 static void
tor_blt_span_mono_unbounded(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1072 tor_blt_span_mono_unbounded(struct sna *sna,
1073 struct sna_composite_spans_op *op,
1074 pixman_region16_t *clip,
1075 const BoxRec *box,
1076 int coverage)
1077 {
1078 tor_blt_span(sna, op, clip, box,
1079 coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1080 }
1081
1082 static void
tor_blt_span_mono_unbounded_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1083 tor_blt_span_mono_unbounded_clipped(struct sna *sna,
1084 struct sna_composite_spans_op *op,
1085 pixman_region16_t *clip,
1086 const BoxRec *box,
1087 int coverage)
1088 {
1089 tor_blt_span_clipped(sna, op, clip, box,
1090 coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1091 }
1092
1093 static void
tor_blt(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int y,int height,int unbounded)1094 tor_blt(struct sna *sna,
1095 struct tor *converter,
1096 struct sna_composite_spans_op *op,
1097 pixman_region16_t *clip,
1098 void (*span)(struct sna *sna,
1099 struct sna_composite_spans_op *op,
1100 pixman_region16_t *clip,
1101 const BoxRec *box,
1102 int coverage),
1103 int y, int height,
1104 int unbounded)
1105 {
1106 struct cell_list *cells = converter->coverages;
1107 struct cell *cell;
1108 BoxRec box;
1109 int cover;
1110
1111 box.y1 = y + converter->extents.y1;
1112 box.y2 = box.y1 + height;
1113 assert(box.y2 <= converter->extents.y2);
1114 box.x1 = converter->extents.x1;
1115
1116 /* Form the spans from the coverages and areas. */
1117 cover = cells->head.covered_height*FAST_SAMPLES_X;
1118 assert(cover >= 0);
1119 for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1120 int x = cell->x;
1121
1122 assert(x >= converter->extents.x1);
1123 assert(x < converter->extents.x2);
1124 __DBG(("%s: cell=(%d, %d, %d), cover=%d\n", __FUNCTION__,
1125 cell->x, cell->covered_height, cell->uncovered_area,
1126 cover));
1127
1128 if (cell->covered_height || cell->uncovered_area) {
1129 box.x2 = x;
1130 if (box.x2 > box.x1 && (unbounded || cover)) {
1131 __DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1132 box.x1, box.y1,
1133 box.x2 - box.x1,
1134 box.y2 - box.y1,
1135 cover));
1136 span(sna, op, clip, &box, cover);
1137 }
1138 box.x1 = box.x2;
1139 cover += cell->covered_height*FAST_SAMPLES_X;
1140 }
1141
1142 if (cell->uncovered_area) {
1143 int area = cover - cell->uncovered_area;
1144 box.x2 = x + 1;
1145 if (unbounded || area) {
1146 __DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1147 box.x1, box.y1,
1148 box.x2 - box.x1,
1149 box.y2 - box.y1,
1150 area));
1151 span(sna, op, clip, &box, area);
1152 }
1153 box.x1 = box.x2;
1154 }
1155 }
1156
1157 box.x2 = converter->extents.x2;
1158 if (box.x2 > box.x1 && (unbounded || cover)) {
1159 __DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1160 box.x1, box.y1,
1161 box.x2 - box.x1,
1162 box.y2 - box.y1,
1163 cover));
1164 span(sna, op, clip, &box, cover);
1165 }
1166 }
1167
1168 flatten static void
tor_render(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int unbounded)1169 tor_render(struct sna *sna,
1170 struct tor *converter,
1171 struct sna_composite_spans_op *op,
1172 pixman_region16_t *clip,
1173 void (*span)(struct sna *sna,
1174 struct sna_composite_spans_op *op,
1175 pixman_region16_t *clip,
1176 const BoxRec *box,
1177 int coverage),
1178 int unbounded)
1179 {
1180 struct polygon *polygon = converter->polygon;
1181 struct cell_list *coverages = converter->coverages;
1182 struct active_list *active = converter->active;
1183 struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1184 int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
1185
1186 __DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1187
1188 /* Render each pixel row. */
1189 for (i = 0; i < h; i = j) {
1190 int do_full_step = 0;
1191
1192 j = i + 1;
1193
1194 /* Determine if we can ignore this row or use the full pixel
1195 * stepper. */
1196 if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1197 if (buckets[0]) {
1198 merge_edges(active, buckets[0]);
1199 buckets[0] = NULL;
1200 }
1201 if (active->head.next == &active->tail) {
1202 for (; polygon->y_buckets[j] == NULL; j++)
1203 ;
1204 __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1205 __FUNCTION__, i, j));
1206
1207 assert(j <= h);
1208 if (unbounded) {
1209 BoxRec box;
1210
1211 box = converter->extents;
1212 box.y1 += i;
1213 box.y2 = converter->extents.y1 + j;
1214
1215 span(sna, op, clip, &box, 0);
1216 }
1217 continue;
1218 }
1219
1220 do_full_step = can_full_step(active);
1221 }
1222
1223 __DBG(("%s: y=%d-%d, do_full_step=%d, new edges=%d\n",
1224 __FUNCTION__,
1225 i, j, do_full_step,
1226 polygon->y_buckets[i] != NULL));
1227 if (do_full_step) {
1228 nonzero_row(active, coverages);
1229
1230 while (polygon->y_buckets[j] == NULL &&
1231 do_full_step >= 2*FAST_SAMPLES_Y) {
1232 do_full_step -= FAST_SAMPLES_Y;
1233 j++;
1234 }
1235 assert(j >= i + 1 && j <= h);
1236 if (j != i + 1)
1237 step_edges(active, j - (i + 1));
1238
1239 __DBG(("%s: vertical edges, full step (%d, %d)\n",
1240 __FUNCTION__, i, j));
1241 } else {
1242 int suby;
1243
1244 /* Subsample this row. */
1245 for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1246 if (buckets[suby]) {
1247 merge_edges(active, buckets[suby]);
1248 buckets[suby] = NULL;
1249 }
1250
1251 nonzero_subrow(active, coverages);
1252 }
1253 }
1254
1255 assert(j > i);
1256 tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
1257 cell_list_reset(coverages);
1258 }
1259 }
1260
1261 static void
inplace_row(struct active_list * active,uint8_t * row,int width)1262 inplace_row(struct active_list *active, uint8_t *row, int width)
1263 {
1264 struct edge *left = active->head.next;
1265
1266 while (&active->tail != left) {
1267 struct edge *right;
1268 int winding = left->dir;
1269 int lfx, rfx;
1270 int lix, rix;
1271
1272 left->height_left -= FAST_SAMPLES_Y;
1273 assert(left->height_left >= 0);
1274 if (!left->height_left) {
1275 left->prev->next = left->next;
1276 left->next->prev = left->prev;
1277 }
1278
1279 right = left->next;
1280 do {
1281 right->height_left -= FAST_SAMPLES_Y;
1282 assert(right->height_left >= 0);
1283 if (!right->height_left) {
1284 right->prev->next = right->next;
1285 right->next->prev = right->prev;
1286 }
1287
1288 winding += right->dir;
1289 if (0 == winding && right->cell != right->next->cell)
1290 break;
1291
1292 right = right->next;
1293 } while (1);
1294
1295 if (left->cell < 0) {
1296 lix = lfx = 0;
1297 } else if (left->cell >= width * FAST_SAMPLES_X) {
1298 lix = width;
1299 lfx = 0;
1300 } else
1301 FAST_SAMPLES_X_TO_INT_FRAC(left->cell, lix, lfx);
1302
1303 if (right->cell < 0) {
1304 rix = rfx = 0;
1305 } else if (right->cell >= width * FAST_SAMPLES_X) {
1306 rix = width;
1307 rfx = 0;
1308 } else
1309 FAST_SAMPLES_X_TO_INT_FRAC(right->cell, rix, rfx);
1310 if (lix == rix) {
1311 if (rfx != lfx) {
1312 assert(lix < width);
1313 row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
1314 }
1315 } else {
1316 assert(lix < width);
1317 if (lfx == 0)
1318 row[lix] = 0xff;
1319 else
1320 row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
1321
1322 assert(rix <= width);
1323 if (rfx) {
1324 assert(rix < width);
1325 row[rix] += rfx * 256 / FAST_SAMPLES_X;
1326 }
1327
1328 if (rix > ++lix) {
1329 uint8_t *r = row + lix;
1330 rix -= lix;
1331 #if 0
1332 if (rix == 1)
1333 *row = 0xff;
1334 else
1335 memset(row, 0xff, rix);
1336 #else
1337 if ((uintptr_t)r & 1 && rix) {
1338 *r++ = 0xff;
1339 rix--;
1340 }
1341 if ((uintptr_t)r & 2 && rix >= 2) {
1342 *(uint16_t *)r = 0xffff;
1343 r += 2;
1344 rix -= 2;
1345 }
1346 if ((uintptr_t)r & 4 && rix >= 4) {
1347 *(uint32_t *)r = 0xffffffff;
1348 r += 4;
1349 rix -= 4;
1350 }
1351 while (rix >= 8) {
1352 *(uint64_t *)r = 0xffffffffffffffff;
1353 r += 8;
1354 rix -= 8;
1355 }
1356 if (rix & 4) {
1357 *(uint32_t *)r = 0xffffffff;
1358 r += 4;
1359 }
1360 if (rix & 2) {
1361 *(uint16_t *)r = 0xffff;
1362 r += 2;
1363 }
1364 if (rix & 1)
1365 *r = 0xff;
1366 #endif
1367 }
1368 }
1369
1370 left = right->next;
1371 }
1372 }
1373
1374 inline static void
inplace_subrow(struct active_list * active,int8_t * row,int width,int * min,int * max)1375 inplace_subrow(struct active_list *active, int8_t *row,
1376 int width, int *min, int *max)
1377 {
1378 struct edge *edge = active->head.next;
1379 int prev_x = INT_MIN;
1380 int winding = 0, xstart = INT_MIN;
1381
1382 while (&active->tail != edge) {
1383 struct edge *next = edge->next;
1384
1385 winding += edge->dir;
1386 if (0 == winding) {
1387 if (edge->next->cell != edge->cell) {
1388 if (edge->cell <= xstart) {
1389 xstart = INT_MIN;
1390 } else {
1391 int fx;
1392 int ix;
1393
1394 if (xstart < FAST_SAMPLES_X * width) {
1395 FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1396 if (ix < *min)
1397 *min = ix;
1398
1399 row[ix++] += FAST_SAMPLES_X - fx;
1400 if (fx && ix < width)
1401 row[ix] += fx;
1402 }
1403
1404 xstart = edge->cell;
1405 if (xstart < FAST_SAMPLES_X * width) {
1406 FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1407 row[ix] -= FAST_SAMPLES_X - fx;
1408 if (fx && ix + 1 < width)
1409 row[++ix] -= fx;
1410
1411 if (ix >= *max)
1412 *max = ix + 1;
1413
1414 xstart = INT_MIN;
1415 } else
1416 *max = width;
1417 }
1418 }
1419 } else if (xstart < 0) {
1420 xstart = MAX(edge->cell, 0);
1421 }
1422
1423 assert(edge->height_left > 0);
1424 if (--edge->height_left) {
1425 if (edge->dy) {
1426 edge->x.quo += edge->dxdy.quo;
1427 edge->x.rem += edge->dxdy.rem;
1428 if (edge->x.rem < 0) {
1429 --edge->x.quo;
1430 edge->x.rem += edge->dy;
1431 } else if (edge->x.rem >= 0) {
1432 ++edge->x.quo;
1433 edge->x.rem -= edge->dy;
1434 }
1435 edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
1436 }
1437
1438 if (edge->cell < prev_x) {
1439 struct edge *pos = edge->prev;
1440 pos->next = next;
1441 next->prev = pos;
1442 do
1443 pos = pos->prev;
1444 while (edge->cell < pos->cell);
1445 pos->next->prev = edge;
1446 edge->next = pos->next;
1447 edge->prev = pos;
1448 pos->next = edge;
1449 } else
1450 prev_x = edge->cell;
1451 } else {
1452 edge->prev->next = next;
1453 next->prev = edge->prev;
1454 }
1455
1456 edge = next;
1457 }
1458 }
1459
1460 inline static void
inplace_end_subrows(struct active_list * active,uint8_t * row,int8_t * buf,int width)1461 inplace_end_subrows(struct active_list *active, uint8_t *row,
1462 int8_t *buf, int width)
1463 {
1464 int cover = 0;
1465
1466 while (width >= 4) {
1467 uint32_t dw;
1468 int v;
1469
1470 dw = *(uint32_t *)buf;
1471 buf += 4;
1472
1473 if (dw == 0) {
1474 v = cover * 256 / FAST_SAMPLES_XY;
1475 v -= v >> 8;
1476 v |= v << 8;
1477 dw = v | v << 16;
1478 } else {
1479 cover += (int8_t)(dw & 0xff);
1480 if (cover) {
1481 assert(cover > 0);
1482 v = cover * 256 / FAST_SAMPLES_XY;
1483 v -= v >> 8;
1484 dw >>= 8;
1485 dw |= v << 24;
1486 } else
1487 dw >>= 8;
1488
1489 cover += (int8_t)(dw & 0xff);
1490 if (cover) {
1491 assert(cover > 0);
1492 v = cover * 256 / FAST_SAMPLES_XY;
1493 v -= v >> 8;
1494 dw >>= 8;
1495 dw |= v << 24;
1496 } else
1497 dw >>= 8;
1498
1499 cover += (int8_t)(dw & 0xff);
1500 if (cover) {
1501 assert(cover > 0);
1502 v = cover * 256 / FAST_SAMPLES_XY;
1503 v -= v >> 8;
1504 dw >>= 8;
1505 dw |= v << 24;
1506 } else
1507 dw >>= 8;
1508
1509 cover += (int8_t)(dw & 0xff);
1510 if (cover) {
1511 assert(cover > 0);
1512 v = cover * 256 / FAST_SAMPLES_XY;
1513 v -= v >> 8;
1514 dw >>= 8;
1515 dw |= v << 24;
1516 } else
1517 dw >>= 8;
1518 }
1519
1520 *(uint32_t *)row = dw;
1521 row += 4;
1522 width -= 4;
1523 }
1524
1525 while (width--) {
1526 int v;
1527
1528 cover += *buf++;
1529 assert(cover >= 0);
1530
1531 v = cover * 256 / FAST_SAMPLES_XY;
1532 v -= v >> 8;
1533 *row++ = v;
1534 }
1535 }
1536
1537 static void
convert_mono(uint8_t * ptr,int w)1538 convert_mono(uint8_t *ptr, int w)
1539 {
1540 while (w--) {
1541 *ptr = 0xff * (*ptr >= 0xf0);
1542 ptr++;
1543 }
1544 }
1545
1546 static void
tor_inplace(struct tor * converter,PixmapPtr scratch,int mono,uint8_t * buf)1547 tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
1548 {
1549 int i, j, h = converter->extents.y2;
1550 struct polygon *polygon = converter->polygon;
1551 struct active_list *active = converter->active;
1552 struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1553 uint8_t *row = scratch->devPrivate.ptr;
1554 int stride = scratch->devKind;
1555 int width = scratch->drawable.width;
1556
1557 __DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
1558 assert(converter->extents.y1 == 0);
1559 assert(converter->extents.x1 == 0);
1560 assert(scratch->drawable.depth == 8);
1561
1562 /* Render each pixel row. */
1563 for (i = 0; i < h; i = j) {
1564 int do_full_step = 0;
1565 void *ptr = buf ?: row;
1566
1567 j = i + 1;
1568
1569 /* Determine if we can ignore this row or use the full pixel
1570 * stepper. */
1571 if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1572 if (buckets[0]) {
1573 merge_edges(active, buckets[0]);
1574 buckets[0] = NULL;
1575 }
1576 if (active->head.next == &active->tail) {
1577 for (; !polygon->y_buckets[j]; j++)
1578 ;
1579 __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1580 __FUNCTION__, i, j));
1581
1582 memset(row, 0, stride*(j-i));
1583 row += stride*(j-i);
1584 continue;
1585 }
1586
1587 do_full_step = can_full_step(active);
1588 }
1589
1590 __DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
1591 __FUNCTION__,
1592 i, do_full_step,
1593 polygon->y_buckets[i] != NULL));
1594 if (do_full_step) {
1595 memset(ptr, 0, width);
1596 inplace_row(active, ptr, width);
1597 if (mono)
1598 convert_mono(ptr, width);
1599 if (row != ptr)
1600 memcpy(row, ptr, width);
1601
1602 while (polygon->y_buckets[j] == NULL &&
1603 do_full_step >= 2*FAST_SAMPLES_Y) {
1604 do_full_step -= FAST_SAMPLES_Y;
1605 row += stride;
1606 memcpy(row, ptr, width);
1607 j++;
1608 }
1609 if (j != i + 1)
1610 step_edges(active, j - (i + 1));
1611
1612 __DBG(("%s: vertical edges, full step (%d, %d)\n",
1613 __FUNCTION__, i, j));
1614 } else {
1615 int min = width, max = 0, suby;
1616
1617 /* Subsample this row. */
1618 memset(ptr, 0, width);
1619 for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1620 if (buckets[suby]) {
1621 merge_edges(active, buckets[suby]);
1622 buckets[suby] = NULL;
1623 }
1624
1625 inplace_subrow(active, ptr, width, &min, &max);
1626 }
1627 assert(min >= 0 && max <= width);
1628 memset(row, 0, min);
1629 if (max > min) {
1630 inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
1631 if (mono)
1632 convert_mono(row+min, max-min);
1633 }
1634 if (max < width)
1635 memset(row+max, 0, width-max);
1636 }
1637
1638 row += stride;
1639 }
1640 }
1641
operator_is_bounded(uint8_t op)1642 static int operator_is_bounded(uint8_t op)
1643 {
1644 switch (op) {
1645 case PictOpOver:
1646 case PictOpOutReverse:
1647 case PictOpAdd:
1648 return true;
1649 default:
1650 return false;
1651 }
1652 }
1653
1654 static span_func_t
choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1655 choose_span(struct sna_composite_spans_op *tmp,
1656 PicturePtr dst,
1657 PictFormatPtr maskFormat,
1658 RegionPtr clip)
1659 {
1660 span_func_t span;
1661
1662 if (is_mono(dst, maskFormat)) {
1663 /* XXX An imprecise approximation */
1664 if (maskFormat && !operator_is_bounded(tmp->base.op)) {
1665 span = tor_blt_span_mono_unbounded;
1666 if (clip->data)
1667 span = tor_blt_span_mono_unbounded_clipped;
1668 } else {
1669 span = tor_blt_span_mono;
1670 if (clip->data)
1671 span = tor_blt_span_mono_clipped;
1672 }
1673 } else {
1674 if (clip->data)
1675 span = tor_blt_span_clipped;
1676 else if (tmp->base.damage == NULL)
1677 span = tor_blt_span__no_damage;
1678 else
1679 span = tor_blt_span;
1680 }
1681
1682 return span;
1683 }
1684
1685 struct span_thread {
1686 struct sna *sna;
1687 const struct sna_composite_spans_op *op;
1688 const xTrapezoid *traps;
1689 RegionPtr clip;
1690 span_func_t span;
1691 BoxRec extents;
1692 int dx, dy, draw_y;
1693 int ntrap;
1694 bool unbounded;
1695 };
1696
1697 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
1698 struct span_thread_boxes {
1699 const struct sna_composite_spans_op *op;
1700 const BoxRec *clip_start, *clip_end;
1701 int num_boxes;
1702 struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
1703 };
1704
span_thread_add_box(struct sna * sna,void * data,const BoxRec * box,float alpha)1705 static void span_thread_add_box(struct sna *sna, void *data,
1706 const BoxRec *box, float alpha)
1707 {
1708 struct span_thread_boxes *b = data;
1709
1710 __DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha));
1711
1712 if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
1713 DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
1714 b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
1715 b->num_boxes = 0;
1716 }
1717
1718 b->boxes[b->num_boxes].box = *box++;
1719 b->boxes[b->num_boxes].alpha = alpha;
1720 b->num_boxes++;
1721 assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1722 }
1723
1724 static void
span_thread_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1725 span_thread_box(struct sna *sna,
1726 struct sna_composite_spans_op *op,
1727 pixman_region16_t *clip,
1728 const BoxRec *box,
1729 int coverage)
1730 {
1731 struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1732
1733 __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1734 if (b->num_boxes) {
1735 struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
1736 if (bb->box.x1 == box->x1 &&
1737 bb->box.x2 == box->x2 &&
1738 bb->box.y2 == box->y1 &&
1739 bb->alpha == AREA_TO_ALPHA(coverage)) {
1740 bb->box.y2 = box->y2;
1741 __DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
1742 return;
1743 }
1744 }
1745
1746 span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage));
1747 }
1748
1749 static void
span_thread_clipped_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1750 span_thread_clipped_box(struct sna *sna,
1751 struct sna_composite_spans_op *op,
1752 pixman_region16_t *clip,
1753 const BoxRec *box,
1754 int coverage)
1755 {
1756 struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1757 const BoxRec *c;
1758
1759 __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
1760 AREA_TO_ALPHA(coverage)));
1761
1762 b->clip_start =
1763 find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
1764
1765 c = b->clip_start;
1766 while (c != b->clip_end) {
1767 BoxRec clipped;
1768
1769 if (box->y2 <= c->y1)
1770 break;
1771
1772 clipped = *box;
1773 if (!box_intersect(&clipped, c++))
1774 continue;
1775
1776 span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage));
1777 }
1778 }
1779
1780 static span_func_t
thread_choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1781 thread_choose_span(struct sna_composite_spans_op *tmp,
1782 PicturePtr dst,
1783 PictFormatPtr maskFormat,
1784 RegionPtr clip)
1785 {
1786 span_func_t span;
1787
1788 if (tmp->base.damage) {
1789 DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
1790 return NULL;
1791 }
1792
1793 if (is_mono(dst, maskFormat)) {
1794 DBG(("%s: mono rendering -> no thread support\n", __FUNCTION__));
1795 return NULL;
1796 } else {
1797 assert(tmp->thread_boxes);
1798 DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
1799 if (clip->data)
1800 span = span_thread_clipped_box;
1801 else
1802 span = span_thread_box;
1803 }
1804
1805 return span;
1806 }
1807
1808 inline static void
span_thread_boxes_init(struct span_thread_boxes * boxes,const struct sna_composite_spans_op * op,const RegionRec * clip)1809 span_thread_boxes_init(struct span_thread_boxes *boxes,
1810 const struct sna_composite_spans_op *op,
1811 const RegionRec *clip)
1812 {
1813 boxes->op = op;
1814 region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end);
1815 boxes->num_boxes = 0;
1816 }
1817
1818 static void
span_thread(void * arg)1819 span_thread(void *arg)
1820 {
1821 struct span_thread *thread = arg;
1822 struct span_thread_boxes boxes;
1823 struct tor tor;
1824 const xTrapezoid *t;
1825 int n, y1, y2;
1826
1827 if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1828 return;
1829
1830 span_thread_boxes_init(&boxes, thread->op, thread->clip);
1831
1832 y1 = thread->extents.y1 - thread->draw_y;
1833 y2 = thread->extents.y2 - thread->draw_y;
1834 for (n = thread->ntrap, t = thread->traps; n--; t++) {
1835 if (pixman_fixed_integer_floor(t->top) >= y2 ||
1836 pixman_fixed_integer_ceil(t->bottom) <= y1)
1837 continue;
1838
1839 tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1840 }
1841
1842 tor_render(thread->sna, &tor,
1843 (struct sna_composite_spans_op *)&boxes, thread->clip,
1844 thread->span, thread->unbounded);
1845
1846 tor_fini(&tor);
1847
1848 if (boxes.num_boxes) {
1849 DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
1850 assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
1851 thread->op->thread_boxes(thread->sna, thread->op,
1852 boxes.boxes, boxes.num_boxes);
1853 }
1854 }
1855
1856 bool
imprecise_trapezoid_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned int flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)1857 imprecise_trapezoid_span_converter(struct sna *sna,
1858 CARD8 op, PicturePtr src, PicturePtr dst,
1859 PictFormatPtr maskFormat, unsigned int flags,
1860 INT16 src_x, INT16 src_y,
1861 int ntrap, xTrapezoid *traps)
1862 {
1863 struct sna_composite_spans_op tmp;
1864 pixman_region16_t clip;
1865 int16_t dst_x, dst_y;
1866 bool was_clear;
1867 int dx, dy, n;
1868 int num_threads;
1869
1870 if (NO_IMPRECISE)
1871 return false;
1872
1873 if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
1874 DBG(("%s: fallback -- composite spans not supported\n",
1875 __FUNCTION__));
1876 return false;
1877 }
1878
1879 if (!trapezoids_bounds(ntrap, traps, &clip.extents))
1880 return true;
1881
1882 #if 0
1883 if (clip.extents.y2 - clip.extents.y1 < 64 &&
1884 clip.extents.x2 - clip.extents.x1 < 64) {
1885 DBG(("%s: fallback -- traps extents too small %dx%d\n",
1886 __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
1887 return false;
1888 }
1889 #endif
1890
1891 DBG(("%s: extents (%d, %d), (%d, %d)\n",
1892 __FUNCTION__,
1893 clip.extents.x1, clip.extents.y1,
1894 clip.extents.x2, clip.extents.y2));
1895
1896 trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
1897
1898 if (!sna_compute_composite_region(&clip,
1899 src, NULL, dst,
1900 src_x + clip.extents.x1 - dst_x,
1901 src_y + clip.extents.y1 - dst_y,
1902 0, 0,
1903 clip.extents.x1, clip.extents.y1,
1904 clip.extents.x2 - clip.extents.x1,
1905 clip.extents.y2 - clip.extents.y1)) {
1906 DBG(("%s: trapezoids do not intersect drawable clips\n",
1907 __FUNCTION__)) ;
1908 return true;
1909 }
1910
1911 if (!sna->render.check_composite_spans(sna, op, src, dst,
1912 clip.extents.x2 - clip.extents.x1,
1913 clip.extents.y2 - clip.extents.y1,
1914 flags)) {
1915 DBG(("%s: fallback -- composite spans not supported\n",
1916 __FUNCTION__));
1917 return false;
1918 }
1919
1920 dx = dst->pDrawable->x;
1921 dy = dst->pDrawable->y;
1922
1923 DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
1924 __FUNCTION__,
1925 clip.extents.x1, clip.extents.y1,
1926 clip.extents.x2, clip.extents.y2,
1927 dx, dy,
1928 src_x + clip.extents.x1 - dst_x - dx,
1929 src_y + clip.extents.y1 - dst_y - dy));
1930
1931 was_clear = sna_drawable_is_clear(dst->pDrawable);
1932 switch (op) {
1933 case PictOpAdd:
1934 case PictOpOver:
1935 if (was_clear)
1936 op = PictOpSrc;
1937 break;
1938 case PictOpIn:
1939 if (was_clear)
1940 return true;
1941 break;
1942 }
1943
1944 if (!sna->render.composite_spans(sna, op, src, dst,
1945 src_x + clip.extents.x1 - dst_x - dx,
1946 src_y + clip.extents.y1 - dst_y - dy,
1947 clip.extents.x1, clip.extents.y1,
1948 clip.extents.x2 - clip.extents.x1,
1949 clip.extents.y2 - clip.extents.y1,
1950 flags, memset(&tmp, 0, sizeof(tmp)))) {
1951 DBG(("%s: fallback -- composite spans render op not supported\n",
1952 __FUNCTION__));
1953 return false;
1954 }
1955
1956 dx *= FAST_SAMPLES_X;
1957 dy *= FAST_SAMPLES_Y;
1958
1959 num_threads = 1;
1960 if (!NO_GPU_THREADS &&
1961 (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
1962 tmp.thread_boxes &&
1963 thread_choose_span(&tmp, dst, maskFormat, &clip))
1964 num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
1965 clip.extents.y2-clip.extents.y1,
1966 16);
1967 DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
1968 if (num_threads == 1) {
1969 struct tor tor;
1970
1971 if (!tor_init(&tor, &clip.extents, 2*ntrap))
1972 goto skip;
1973
1974 for (n = 0; n < ntrap; n++) {
1975 if (pixman_fixed_integer_floor(traps[n].top) + dst->pDrawable->y >= clip.extents.y2 ||
1976 pixman_fixed_integer_ceil(traps[n].bottom) + dst->pDrawable->y <= clip.extents.y1)
1977 continue;
1978
1979 tor_add_trapezoid(&tor, &traps[n], dx, dy);
1980 }
1981
1982 tor_render(sna, &tor, &tmp, &clip,
1983 choose_span(&tmp, dst, maskFormat, &clip),
1984 !was_clear && maskFormat && !operator_is_bounded(op));
1985
1986 tor_fini(&tor);
1987 } else {
1988 struct span_thread threads[num_threads];
1989 int y, h;
1990
1991 DBG(("%s: using %d threads for span compositing %dx%d\n",
1992 __FUNCTION__, num_threads,
1993 clip.extents.x2 - clip.extents.x1,
1994 clip.extents.y2 - clip.extents.y1));
1995
1996 threads[0].sna = sna;
1997 threads[0].op = &tmp;
1998 threads[0].traps = traps;
1999 threads[0].ntrap = ntrap;
2000 threads[0].extents = clip.extents;
2001 threads[0].clip = &clip;
2002 threads[0].dx = dx;
2003 threads[0].dy = dy;
2004 threads[0].draw_y = dst->pDrawable->y;
2005 threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
2006 threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
2007
2008 y = clip.extents.y1;
2009 h = clip.extents.y2 - clip.extents.y1;
2010 h = (h + num_threads - 1) / num_threads;
2011 num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
2012
2013 for (n = 1; n < num_threads; n++) {
2014 threads[n] = threads[0];
2015 threads[n].extents.y1 = y;
2016 threads[n].extents.y2 = y += h;
2017
2018 sna_threads_run(n, span_thread, &threads[n]);
2019 }
2020
2021 assert(y < threads[0].extents.y2);
2022 threads[0].extents.y1 = y;
2023 span_thread(&threads[0]);
2024
2025 sna_threads_wait();
2026 }
2027 skip:
2028 tmp.done(sna, &tmp);
2029
2030 REGION_UNINIT(NULL, &clip);
2031 return true;
2032 }
2033
2034 static void
tor_blt_mask(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2035 tor_blt_mask(struct sna *sna,
2036 struct sna_composite_spans_op *op,
2037 pixman_region16_t *clip,
2038 const BoxRec *box,
2039 int coverage)
2040 {
2041 uint8_t *ptr = (uint8_t *)op;
2042 int stride = (intptr_t)clip;
2043 int h, w;
2044
2045 coverage = 256 * coverage / FAST_SAMPLES_XY;
2046 coverage -= coverage >> 8;
2047
2048 ptr += box->y1 * stride + box->x1;
2049
2050 h = box->y2 - box->y1;
2051 w = box->x2 - box->x1;
2052 if ((w | h) == 1) {
2053 *ptr = coverage;
2054 } else if (w == 1) {
2055 do {
2056 *ptr = coverage;
2057 ptr += stride;
2058 } while (--h);
2059 } else do {
2060 memset(ptr, coverage, w);
2061 ptr += stride;
2062 } while (--h);
2063 }
2064
2065 static void
tor_blt_mask_mono(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2066 tor_blt_mask_mono(struct sna *sna,
2067 struct sna_composite_spans_op *op,
2068 pixman_region16_t *clip,
2069 const BoxRec *box,
2070 int coverage)
2071 {
2072 tor_blt_mask(sna, op, clip, box,
2073 coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
2074 }
2075
2076 bool
imprecise_trapezoid_mask_converter(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)2077 imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
2078 PictFormatPtr maskFormat, unsigned flags,
2079 INT16 src_x, INT16 src_y,
2080 int ntrap, xTrapezoid *traps)
2081 {
2082 struct tor tor;
2083 ScreenPtr screen = dst->pDrawable->pScreen;
2084 PixmapPtr scratch;
2085 PicturePtr mask;
2086 BoxRec extents;
2087 int16_t dst_x, dst_y;
2088 int dx, dy;
2089 int error, n;
2090
2091 if (NO_IMPRECISE)
2092 return false;
2093
2094 if (maskFormat == NULL && ntrap > 1) {
2095 DBG(("%s: individual rasterisation requested\n",
2096 __FUNCTION__));
2097 do {
2098 /* XXX unwind errors? */
2099 if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL, flags,
2100 src_x, src_y, 1, traps++))
2101 return false;
2102 } while (--ntrap);
2103 return true;
2104 }
2105
2106 if (!trapezoids_bounds(ntrap, traps, &extents))
2107 return true;
2108
2109 DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
2110 __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
2111
2112 if (!sna_compute_composite_extents(&extents,
2113 src, NULL, dst,
2114 src_x, src_y,
2115 0, 0,
2116 extents.x1, extents.y1,
2117 extents.x2 - extents.x1,
2118 extents.y2 - extents.y1))
2119 return true;
2120
2121 DBG(("%s: extents (%d, %d), (%d, %d)\n",
2122 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
2123
2124 extents.y2 -= extents.y1;
2125 extents.x2 -= extents.x1;
2126 extents.x1 -= dst->pDrawable->x;
2127 extents.y1 -= dst->pDrawable->y;
2128 dst_x = extents.x1;
2129 dst_y = extents.y1;
2130 dx = -extents.x1 * FAST_SAMPLES_X;
2131 dy = -extents.y1 * FAST_SAMPLES_Y;
2132 extents.x1 = extents.y1 = 0;
2133
2134 DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
2135 __FUNCTION__, extents.x2, extents.y2, dx, dy));
2136 scratch = sna_pixmap_create_upload(screen,
2137 extents.x2, extents.y2, 8,
2138 KGEM_BUFFER_WRITE_INPLACE);
2139 if (!scratch)
2140 return true;
2141
2142 DBG(("%s: created buffer %p, stride %d\n",
2143 __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
2144
2145 if (!tor_init(&tor, &extents, 2*ntrap)) {
2146 sna_pixmap_destroy(scratch);
2147 return true;
2148 }
2149
2150 for (n = 0; n < ntrap; n++) {
2151 if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
2152 pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
2153 continue;
2154
2155 tor_add_trapezoid(&tor, &traps[n], dx, dy);
2156 }
2157
2158 if (extents.x2 <= TOR_INPLACE_SIZE) {
2159 uint8_t buf[TOR_INPLACE_SIZE];
2160 tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
2161 scratch->usage_hint ? NULL : buf);
2162 } else {
2163 tor_render(NULL, &tor,
2164 scratch->devPrivate.ptr,
2165 (void *)(intptr_t)scratch->devKind,
2166 is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
2167 true);
2168 }
2169 tor_fini(&tor);
2170
2171 mask = CreatePicture(0, &scratch->drawable,
2172 PictureMatchFormat(screen, 8, PICT_a8),
2173 0, 0, serverClient, &error);
2174 if (mask) {
2175 int16_t x0, y0;
2176
2177 trapezoid_origin(&traps[0].left, &x0, &y0);
2178
2179 CompositePicture(op, src, mask, dst,
2180 src_x + dst_x - x0,
2181 src_y + dst_y - y0,
2182 0, 0,
2183 dst_x, dst_y,
2184 extents.x2, extents.y2);
2185 FreePicture(mask, 0);
2186 }
2187 sna_pixmap_destroy(scratch);
2188
2189 return true;
2190 }
2191
2192 struct inplace {
2193 uint8_t *ptr;
2194 uint32_t stride;
2195 union {
2196 uint8_t opacity;
2197 uint32_t color;
2198 };
2199 };
2200
coverage_opacity(int coverage,uint8_t opacity)2201 static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
2202 {
2203 coverage = coverage * 256 / FAST_SAMPLES_XY;
2204 coverage -= coverage >> 8;
2205 return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
2206 }
2207
_tor_blt_src(struct inplace * in,const BoxRec * box,uint8_t v)2208 static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
2209 {
2210 uint8_t *ptr = in->ptr;
2211 int h, w;
2212
2213 ptr += box->y1 * in->stride + box->x1;
2214
2215 h = box->y2 - box->y1;
2216 w = box->x2 - box->x1;
2217 if ((w | h) == 1) {
2218 *ptr = v;
2219 } else if (w == 1) {
2220 do {
2221 *ptr = v;
2222 ptr += in->stride;
2223 } while (--h);
2224 } else do {
2225 memset(ptr, v, w);
2226 ptr += in->stride;
2227 } while (--h);
2228 }
2229
2230 struct clipped_span {
2231 span_func_t span;
2232 const BoxRec *clip_start, *clip_end;
2233 };
2234
2235 static void
tor_blt_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2236 tor_blt_clipped(struct sna *sna,
2237 struct sna_composite_spans_op *op,
2238 pixman_region16_t *clip,
2239 const BoxRec *box,
2240 int coverage)
2241 {
2242 struct clipped_span *cs = (struct clipped_span *)clip;
2243 const BoxRec *c;
2244
2245 cs->clip_start =
2246 find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
2247
2248 c = cs->clip_start;
2249 while (c != cs->clip_end) {
2250 BoxRec clipped;
2251
2252 if (box->y2 <= c->y1)
2253 break;
2254
2255 clipped = *box;
2256 if (!box_intersect(&clipped, c++))
2257 continue;
2258
2259 cs->span(sna, op, NULL, &clipped, coverage);
2260 }
2261 }
2262
2263 inline static span_func_t
clipped_span(struct clipped_span * cs,span_func_t span,const RegionRec * clip)2264 clipped_span(struct clipped_span *cs,
2265 span_func_t span,
2266 const RegionRec *clip)
2267 {
2268 if (clip->data) {
2269 cs->span = span;
2270 region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
2271 span = tor_blt_clipped;
2272 }
2273 return span;
2274 }
2275
2276 static void
tor_blt_src(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2277 tor_blt_src(struct sna *sna,
2278 struct sna_composite_spans_op *op,
2279 pixman_region16_t *clip,
2280 const BoxRec *box,
2281 int coverage)
2282 {
2283 struct inplace *in = (struct inplace *)op;
2284
2285 _tor_blt_src(in, box, coverage_opacity(coverage, in->opacity));
2286 }
2287
2288 static void
tor_blt_in(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2289 tor_blt_in(struct sna *sna,
2290 struct sna_composite_spans_op *op,
2291 pixman_region16_t *clip,
2292 const BoxRec *box,
2293 int coverage)
2294 {
2295 struct inplace *in = (struct inplace *)op;
2296 uint8_t *ptr = in->ptr;
2297 int h, w, i;
2298
2299 if (coverage == 0) {
2300 _tor_blt_src(in, box, 0);
2301 return;
2302 }
2303
2304 coverage = coverage_opacity(coverage, in->opacity);
2305 if (coverage == 0xff)
2306 return;
2307
2308 ptr += box->y1 * in->stride + box->x1;
2309
2310 h = box->y2 - box->y1;
2311 w = box->x2 - box->x1;
2312 do {
2313 for (i = 0; i < w; i++)
2314 ptr[i] = mul_8_8(ptr[i], coverage);
2315 ptr += in->stride;
2316 } while (--h);
2317 }
2318
2319 static void
tor_blt_add(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2320 tor_blt_add(struct sna *sna,
2321 struct sna_composite_spans_op *op,
2322 pixman_region16_t *clip,
2323 const BoxRec *box,
2324 int coverage)
2325 {
2326 struct inplace *in = (struct inplace *)op;
2327 uint8_t *ptr = in->ptr;
2328 int h, w, v, i;
2329
2330 if (coverage == 0)
2331 return;
2332
2333 coverage = coverage_opacity(coverage, in->opacity);
2334 if (coverage == 0xff) {
2335 _tor_blt_src(in, box, 0xff);
2336 return;
2337 }
2338
2339 ptr += box->y1 * in->stride + box->x1;
2340
2341 h = box->y2 - box->y1;
2342 w = box->x2 - box->x1;
2343 if ((w | h) == 1) {
2344 v = coverage + *ptr;
2345 *ptr = v >= 255 ? 255 : v;
2346 } else {
2347 do {
2348 for (i = 0; i < w; i++) {
2349 v = coverage + ptr[i];
2350 ptr[i] = v >= 255 ? 255 : v;
2351 }
2352 ptr += in->stride;
2353 } while (--h);
2354 }
2355 }
2356
2357 static void
tor_blt_lerp32(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2358 tor_blt_lerp32(struct sna *sna,
2359 struct sna_composite_spans_op *op,
2360 pixman_region16_t *clip,
2361 const BoxRec *box,
2362 int coverage)
2363 {
2364 struct inplace *in = (struct inplace *)op;
2365 uint32_t *ptr = (uint32_t *)in->ptr;
2366 int stride = in->stride / sizeof(uint32_t);
2367 int h, w, i;
2368
2369 if (coverage == 0)
2370 return;
2371
2372 sigtrap_assert_active();
2373 ptr += box->y1 * stride + box->x1;
2374
2375 h = box->y2 - box->y1;
2376 w = box->x2 - box->x1;
2377 if (coverage == FAST_SAMPLES_XY) {
2378 if ((w | h) == 1) {
2379 *ptr = in->color;
2380 } else {
2381 if (w < 16) {
2382 do {
2383 for (i = 0; i < w; i++)
2384 ptr[i] = in->color;
2385 ptr += stride;
2386 } while (--h);
2387 } else {
2388 pixman_fill(ptr, stride, 32,
2389 0, 0, w, h, in->color);
2390 }
2391 }
2392 } else {
2393 coverage = coverage * 256 / FAST_SAMPLES_XY;
2394 coverage -= coverage >> 8;
2395
2396 if ((w | h) == 1) {
2397 *ptr = lerp8x4(in->color, coverage, *ptr);
2398 } else if (w == 1) {
2399 do {
2400 *ptr = lerp8x4(in->color, coverage, *ptr);
2401 ptr += stride;
2402 } while (--h);
2403 } else{
2404 do {
2405 for (i = 0; i < w; i++)
2406 ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
2407 ptr += stride;
2408 } while (--h);
2409 }
2410 }
2411 }
2412
2413 struct pixman_inplace {
2414 pixman_image_t *image, *source, *mask;
2415 uint32_t color;
2416 uint32_t *bits;
2417 int dx, dy;
2418 int sx, sy;
2419 uint8_t op;
2420 };
2421
2422 static void
pixmask_span_solid(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2423 pixmask_span_solid(struct sna *sna,
2424 struct sna_composite_spans_op *op,
2425 pixman_region16_t *clip,
2426 const BoxRec *box,
2427 int coverage)
2428 {
2429 struct pixman_inplace *pi = (struct pixman_inplace *)op;
2430 if (coverage != FAST_SAMPLES_XY) {
2431 coverage = coverage * 256 / FAST_SAMPLES_XY;
2432 coverage -= coverage >> 8;
2433 *pi->bits = mul_4x8_8(pi->color, coverage);
2434 } else
2435 *pi->bits = pi->color;
2436 pixman_image_composite(pi->op, pi->source, NULL, pi->image,
2437 box->x1, box->y1,
2438 0, 0,
2439 pi->dx + box->x1, pi->dy + box->y1,
2440 box->x2 - box->x1, box->y2 - box->y1);
2441 }
2442
2443 static void
pixmask_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2444 pixmask_span(struct sna *sna,
2445 struct sna_composite_spans_op *op,
2446 pixman_region16_t *clip,
2447 const BoxRec *box,
2448 int coverage)
2449 {
2450 struct pixman_inplace *pi = (struct pixman_inplace *)op;
2451 pixman_image_t *mask = NULL;
2452 if (coverage != FAST_SAMPLES_XY) {
2453 coverage = coverage * 256 / FAST_SAMPLES_XY;
2454 coverage -= coverage >> 8;
2455 *pi->bits = coverage;
2456 mask = pi->mask;
2457 }
2458 pixman_image_composite(pi->op, pi->source, mask, pi->image,
2459 pi->sx + box->x1, pi->sy + box->y1,
2460 0, 0,
2461 pi->dx + box->x1, pi->dy + box->y1,
2462 box->x2 - box->x1, box->y2 - box->y1);
2463 }
2464
2465 struct inplace_x8r8g8b8_thread {
2466 xTrapezoid *traps;
2467 PicturePtr dst, src;
2468 BoxRec extents;
2469 int dx, dy;
2470 int ntrap;
2471 bool lerp, is_solid;
2472 uint32_t color;
2473 int16_t src_x, src_y;
2474 uint8_t op;
2475 };
2476
inplace_x8r8g8b8_thread(void * arg)2477 static void inplace_x8r8g8b8_thread(void *arg)
2478 {
2479 struct inplace_x8r8g8b8_thread *thread = arg;
2480 struct tor tor;
2481 span_func_t span;
2482 struct clipped_span clipped;
2483 RegionPtr clip;
2484 int y1, y2, n;
2485
2486 if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2487 return;
2488
2489 y1 = thread->extents.y1 - thread->dst->pDrawable->y;
2490 y2 = thread->extents.y2 - thread->dst->pDrawable->y;
2491 for (n = 0; n < thread->ntrap; n++) {
2492 if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
2493 pixman_fixed_to_int(thread->traps[n].bottom) < y1)
2494 continue;
2495
2496 tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2497 }
2498
2499 clip = thread->dst->pCompositeClip;
2500 if (thread->lerp) {
2501 struct inplace inplace;
2502 int16_t dst_x, dst_y;
2503 PixmapPtr pixmap;
2504
2505 pixmap = get_drawable_pixmap(thread->dst->pDrawable);
2506
2507 inplace.ptr = pixmap->devPrivate.ptr;
2508 if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
2509 inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2510 inplace.stride = pixmap->devKind;
2511 inplace.color = thread->color;
2512
2513 span = clipped_span(&clipped, tor_blt_lerp32, clip);
2514
2515 tor_render(NULL, &tor,
2516 (void*)&inplace, (void*)&clipped,
2517 span, false);
2518 } else if (thread->is_solid) {
2519 struct pixman_inplace pi;
2520
2521 pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2522 pi.op = thread->op;
2523 pi.color = thread->color;
2524
2525 pi.bits = (uint32_t *)&pi.sx;
2526 pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2527 1, 1, pi.bits, 0);
2528 pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2529
2530 span = clipped_span(&clipped, pixmask_span_solid, clip);
2531
2532 tor_render(NULL, &tor,
2533 (void*)&pi, (void *)&clipped,
2534 span, false);
2535
2536 pixman_image_unref(pi.source);
2537 pixman_image_unref(pi.image);
2538 } else {
2539 struct pixman_inplace pi;
2540 int16_t x0, y0;
2541
2542 trapezoid_origin(&thread->traps[0].left, &x0, &y0);
2543
2544 pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2545 pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
2546 pi.sx += thread->src_x - x0;
2547 pi.sy += thread->src_y - y0;
2548 pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2549 pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2550 pi.bits = pixman_image_get_data(pi.mask);
2551 pi.op = thread->op;
2552
2553 span = clipped_span(&clipped, pixmask_span, clip);
2554
2555 tor_render(NULL, &tor,
2556 (void*)&pi, (void *)&clipped,
2557 span, false);
2558
2559 pixman_image_unref(pi.mask);
2560 pixman_image_unref(pi.source);
2561 pixman_image_unref(pi.image);
2562 }
2563
2564 tor_fini(&tor);
2565 }
2566
2567 static bool
trapezoid_span_inplace__x8r8g8b8(CARD8 op,PicturePtr dst,PicturePtr src,int16_t src_x,int16_t src_y,PictFormatPtr maskFormat,int ntrap,xTrapezoid * traps)2568 trapezoid_span_inplace__x8r8g8b8(CARD8 op,
2569 PicturePtr dst,
2570 PicturePtr src, int16_t src_x, int16_t src_y,
2571 PictFormatPtr maskFormat,
2572 int ntrap, xTrapezoid *traps)
2573 {
2574 uint32_t color;
2575 bool lerp, is_solid;
2576 RegionRec region;
2577 int dx, dy;
2578 int num_threads, n;
2579
2580 lerp = false;
2581 is_solid = sna_picture_is_solid(src, &color);
2582 if (is_solid) {
2583 if (op == PictOpOver && (color >> 24) == 0xff)
2584 op = PictOpSrc;
2585 if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
2586 op = PictOpSrc;
2587 lerp = op == PictOpSrc;
2588 }
2589 if (!lerp) {
2590 switch (op) {
2591 case PictOpOver:
2592 case PictOpAdd:
2593 case PictOpOutReverse:
2594 break;
2595 case PictOpSrc:
2596 if (!sna_drawable_is_clear(dst->pDrawable))
2597 return false;
2598 break;
2599 default:
2600 return false;
2601 }
2602 }
2603
2604 if (maskFormat == NULL && ntrap > 1) {
2605 DBG(("%s: individual rasterisation requested\n",
2606 __FUNCTION__));
2607 do {
2608 /* XXX unwind errors? */
2609 if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
2610 src, src_x, src_y,
2611 NULL, 1, traps++))
2612 return false;
2613 } while (--ntrap);
2614 return true;
2615 }
2616
2617 if (!trapezoids_bounds(ntrap, traps, ®ion.extents))
2618 return true;
2619
2620 DBG(("%s: extents (%d, %d), (%d, %d)\n",
2621 __FUNCTION__,
2622 region.extents.x1, region.extents.y1,
2623 region.extents.x2, region.extents.y2));
2624
2625 if (!sna_compute_composite_extents(®ion.extents,
2626 src, NULL, dst,
2627 src_x, src_y,
2628 0, 0,
2629 region.extents.x1, region.extents.y1,
2630 region.extents.x2 - region.extents.x1,
2631 region.extents.y2 - region.extents.y1))
2632 return true;
2633
2634 DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2635 __FUNCTION__,
2636 region.extents.x1, region.extents.y1,
2637 region.extents.x2, region.extents.y2));
2638
2639 region.data = NULL;
2640 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion,
2641 MOVE_WRITE | MOVE_READ))
2642 return true;
2643
2644 if (!is_solid && src->pDrawable) {
2645 if (!sna_drawable_move_to_cpu(src->pDrawable,
2646 MOVE_READ))
2647 return true;
2648
2649 if (src->alphaMap &&
2650 !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2651 MOVE_READ))
2652 return true;
2653 }
2654
2655 dx = dst->pDrawable->x * FAST_SAMPLES_X;
2656 dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2657
2658 num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
2659 region.extents.y2 - region.extents.y1,
2660 16);
2661
2662 DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
2663 __FUNCTION__,
2664 region.extents.x2 - region.extents.x1,
2665 region.extents.y2 - region.extents.y1,
2666 dst->format, op, lerp, num_threads));
2667
2668 if (num_threads == 1) {
2669 struct tor tor;
2670 span_func_t span;
2671 struct clipped_span clipped;
2672
2673 if (!tor_init(&tor, ®ion.extents, 2*ntrap))
2674 return true;
2675
2676 for (n = 0; n < ntrap; n++) {
2677 if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
2678 pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
2679 continue;
2680
2681 tor_add_trapezoid(&tor, &traps[n], dx, dy);
2682 }
2683
2684 if (lerp) {
2685 struct inplace inplace;
2686 PixmapPtr pixmap;
2687 int16_t dst_x, dst_y;
2688
2689 pixmap = get_drawable_pixmap(dst->pDrawable);
2690
2691 inplace.ptr = pixmap->devPrivate.ptr;
2692 if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2693 inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2694 inplace.stride = pixmap->devKind;
2695 inplace.color = color;
2696
2697 span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
2698
2699 DBG(("%s: render inplace op=%d, color=%08x\n",
2700 __FUNCTION__, op, color));
2701
2702 if (sigtrap_get() == 0) {
2703 tor_render(NULL, &tor,
2704 (void*)&inplace, (void*)&clipped,
2705 span, false);
2706 sigtrap_put();
2707 }
2708 } else if (is_solid) {
2709 struct pixman_inplace pi;
2710
2711 pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2712 pi.op = op;
2713 pi.color = color;
2714
2715 pi.bits = (uint32_t *)&pi.sx;
2716 pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2717 1, 1, pi.bits, 0);
2718 pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2719
2720 span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
2721
2722 if (sigtrap_get() == 0) {
2723 tor_render(NULL, &tor,
2724 (void*)&pi, (void*)&clipped,
2725 span, false);
2726 sigtrap_put();
2727 }
2728
2729 pixman_image_unref(pi.source);
2730 pixman_image_unref(pi.image);
2731 } else {
2732 struct pixman_inplace pi;
2733 int16_t x0, y0;
2734
2735 trapezoid_origin(&traps[0].left, &x0, &y0);
2736
2737 pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2738 pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
2739 pi.sx += src_x - x0;
2740 pi.sy += src_y - y0;
2741 pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2742 pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2743 pi.bits = pixman_image_get_data(pi.mask);
2744 pi.op = op;
2745
2746 span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
2747
2748 if (sigtrap_get() == 0) {
2749 tor_render(NULL, &tor,
2750 (void*)&pi, (void*)&clipped,
2751 span, false);
2752 sigtrap_put();
2753 }
2754
2755 pixman_image_unref(pi.mask);
2756 pixman_image_unref(pi.source);
2757 pixman_image_unref(pi.image);
2758 }
2759
2760 tor_fini(&tor);
2761 } else {
2762 struct inplace_x8r8g8b8_thread threads[num_threads];
2763 int y, h;
2764
2765 DBG(("%s: using %d threads for inplace compositing %dx%d\n",
2766 __FUNCTION__, num_threads,
2767 region.extents.x2 - region.extents.x1,
2768 region.extents.y2 - region.extents.y1));
2769
2770 threads[0].traps = traps;
2771 threads[0].ntrap = ntrap;
2772 threads[0].extents = region.extents;
2773 threads[0].lerp = lerp;
2774 threads[0].is_solid = is_solid;
2775 threads[0].color = color;
2776 threads[0].dx = dx;
2777 threads[0].dy = dy;
2778 threads[0].dst = dst;
2779 threads[0].src = src;
2780 threads[0].op = op;
2781 threads[0].src_x = src_x;
2782 threads[0].src_y = src_y;
2783
2784 y = region.extents.y1;
2785 h = region.extents.y2 - region.extents.y1;
2786 h = (h + num_threads - 1) / num_threads;
2787 num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
2788
2789 if (sigtrap_get() == 0) {
2790 for (n = 1; n < num_threads; n++) {
2791 threads[n] = threads[0];
2792 threads[n].extents.y1 = y;
2793 threads[n].extents.y2 = y += h;
2794
2795 sna_threads_run(n, inplace_x8r8g8b8_thread, &threads[n]);
2796 }
2797
2798 assert(y < threads[0].extents.y2);
2799 threads[0].extents.y1 = y;
2800 inplace_x8r8g8b8_thread(&threads[0]);
2801
2802 sna_threads_wait();
2803 sigtrap_put();
2804 } else
2805 sna_threads_kill(); /* leaks thread allocations */
2806 }
2807
2808 return true;
2809 }
2810
2811 struct inplace_thread {
2812 xTrapezoid *traps;
2813 span_func_t span;
2814 struct inplace inplace;
2815 struct clipped_span clipped;
2816 BoxRec extents;
2817 int dx, dy;
2818 int draw_x, draw_y;
2819 bool unbounded;
2820 int ntrap;
2821 };
2822
inplace_thread(void * arg)2823 static void inplace_thread(void *arg)
2824 {
2825 struct inplace_thread *thread = arg;
2826 struct tor tor;
2827 int n;
2828
2829 if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2830 return;
2831
2832 for (n = 0; n < thread->ntrap; n++) {
2833 if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
2834 pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
2835 continue;
2836
2837 tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2838 }
2839
2840 tor_render(NULL, &tor,
2841 (void*)&thread->inplace, (void*)&thread->clipped,
2842 thread->span, thread->unbounded);
2843
2844 tor_fini(&tor);
2845 }
2846
2847 bool
imprecise_trapezoid_span_inplace(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps,bool fallback)2848 imprecise_trapezoid_span_inplace(struct sna *sna,
2849 CARD8 op, PicturePtr src, PicturePtr dst,
2850 PictFormatPtr maskFormat, unsigned flags,
2851 INT16 src_x, INT16 src_y,
2852 int ntrap, xTrapezoid *traps,
2853 bool fallback)
2854 {
2855 struct inplace inplace;
2856 struct clipped_span clipped;
2857 span_func_t span;
2858 PixmapPtr pixmap;
2859 struct sna_pixmap *priv;
2860 RegionRec region;
2861 uint32_t color;
2862 bool unbounded;
2863 int16_t dst_x, dst_y;
2864 int dx, dy;
2865 int num_threads, n;
2866
2867 if (NO_IMPRECISE)
2868 return false;
2869
2870 if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
2871 return trapezoid_span_inplace__x8r8g8b8(op, dst,
2872 src, src_x, src_y,
2873 maskFormat,
2874 ntrap, traps);
2875
2876 if (!sna_picture_is_solid(src, &color)) {
2877 DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
2878 __FUNCTION__));
2879 return false;
2880 }
2881
2882 if (dst->format != PICT_a8) {
2883 DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
2884 __FUNCTION__, dst->format));
2885 return false;
2886 }
2887
2888 pixmap = get_drawable_pixmap(dst->pDrawable);
2889
2890 unbounded = false;
2891 priv = sna_pixmap(pixmap);
2892 if (priv) {
2893 switch (op) {
2894 case PictOpAdd:
2895 if (priv->clear && priv->clear_color == 0) {
2896 unbounded = true;
2897 op = PictOpSrc;
2898 }
2899 if ((color >> 24) == 0)
2900 return true;
2901 break;
2902 case PictOpIn:
2903 if (priv->clear && priv->clear_color == 0)
2904 return true;
2905 if (priv->clear && priv->clear_color == 0xff)
2906 op = PictOpSrc;
2907 unbounded = true;
2908 break;
2909 case PictOpSrc:
2910 unbounded = true;
2911 break;
2912 default:
2913 DBG(("%s: fallback -- can not perform op [%d] in place\n",
2914 __FUNCTION__, op));
2915 return false;
2916 }
2917 } else {
2918 switch (op) {
2919 case PictOpAdd:
2920 if ((color >> 24) == 0)
2921 return true;
2922 break;
2923 case PictOpIn:
2924 case PictOpSrc:
2925 unbounded = true;
2926 break;
2927 default:
2928 DBG(("%s: fallback -- can not perform op [%d] in place\n",
2929 __FUNCTION__, op));
2930 return false;
2931 }
2932 }
2933
2934 DBG(("%s: format=%x, op=%d, color=%x\n",
2935 __FUNCTION__, dst->format, op, color));
2936
2937 if (maskFormat == NULL && ntrap > 1) {
2938 DBG(("%s: individual rasterisation requested\n",
2939 __FUNCTION__));
2940 do {
2941 /* XXX unwind errors? */
2942 if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
2943 src_x, src_y, 1, traps++,
2944 fallback))
2945 return false;
2946 } while (--ntrap);
2947 return true;
2948 }
2949
2950 if (!trapezoids_bounds(ntrap, traps, ®ion.extents))
2951 return true;
2952
2953 DBG(("%s: extents (%d, %d), (%d, %d)\n",
2954 __FUNCTION__,
2955 region.extents.x1, region.extents.y1,
2956 region.extents.x2, region.extents.y2));
2957
2958 if (!sna_compute_composite_extents(®ion.extents,
2959 NULL, NULL, dst,
2960 0, 0,
2961 0, 0,
2962 region.extents.x1, region.extents.y1,
2963 region.extents.x2 - region.extents.x1,
2964 region.extents.y2 - region.extents.y1))
2965 return true;
2966
2967 DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2968 __FUNCTION__,
2969 region.extents.x1, region.extents.y1,
2970 region.extents.x2, region.extents.y2));
2971
2972 if (op == PictOpSrc) {
2973 span = tor_blt_src;
2974 } else if (op == PictOpIn) {
2975 span = tor_blt_in;
2976 } else {
2977 assert(op == PictOpAdd);
2978 span = tor_blt_add;
2979 }
2980
2981 DBG(("%s: move-to-cpu\n", __FUNCTION__));
2982 region.data = NULL;
2983 if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion,
2984 op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
2985 return true;
2986
2987 dx = dst->pDrawable->x * FAST_SAMPLES_X;
2988 dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2989
2990 inplace.ptr = pixmap->devPrivate.ptr;
2991 if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2992 inplace.ptr += dst_y * pixmap->devKind + dst_x;
2993 inplace.stride = pixmap->devKind;
2994 inplace.opacity = color >> 24;
2995
2996 span = clipped_span(&clipped, span, dst->pCompositeClip);
2997
2998 num_threads = 1;
2999 if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3000 num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
3001 region.extents.y2 - region.extents.y1,
3002 16);
3003 if (num_threads == 1) {
3004 struct tor tor;
3005
3006 if (!tor_init(&tor, ®ion.extents, 2*ntrap))
3007 return true;
3008
3009 for (n = 0; n < ntrap; n++) {
3010 if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
3011 pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
3012 continue;
3013
3014 tor_add_trapezoid(&tor, &traps[n], dx, dy);
3015 }
3016
3017 if (sigtrap_get() == 0) {
3018 tor_render(NULL, &tor,
3019 (void*)&inplace, (void *)&clipped,
3020 span, unbounded);
3021 sigtrap_put();
3022 }
3023
3024 tor_fini(&tor);
3025 } else {
3026 struct inplace_thread threads[num_threads];
3027 int y, h;
3028
3029 DBG(("%s: using %d threads for inplace compositing %dx%d\n",
3030 __FUNCTION__, num_threads,
3031 region.extents.x2 - region.extents.x1,
3032 region.extents.y2 - region.extents.y1));
3033
3034 threads[0].traps = traps;
3035 threads[0].ntrap = ntrap;
3036 threads[0].inplace = inplace;
3037 threads[0].clipped = clipped;
3038 threads[0].extents = region.extents;
3039 threads[0].span = span;
3040 threads[0].unbounded = unbounded;
3041 threads[0].dx = dx;
3042 threads[0].dy = dy;
3043 threads[0].draw_x = dst->pDrawable->x;
3044 threads[0].draw_y = dst->pDrawable->y;
3045
3046 y = region.extents.y1;
3047 h = region.extents.y2 - region.extents.y1;
3048 h = (h + num_threads - 1) / num_threads;
3049 num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
3050
3051 if (sigtrap_get() == 0) {
3052 for (n = 1; n < num_threads; n++) {
3053 threads[n] = threads[0];
3054 threads[n].extents.y1 = y;
3055 threads[n].extents.y2 = y += h;
3056
3057 sna_threads_run(n, inplace_thread, &threads[n]);
3058 }
3059
3060 assert(y < threads[0].extents.y2);
3061 threads[0].extents.y1 = y;
3062 inplace_thread(&threads[0]);
3063
3064 sna_threads_wait();
3065 sigtrap_put();
3066 } else
3067 sna_threads_kill(); /* leaks thread allocations */
3068 }
3069
3070 return true;
3071 }
3072
3073 bool
imprecise_trapezoid_span_fallback(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)3074 imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
3075 PictFormatPtr maskFormat, unsigned flags,
3076 INT16 src_x, INT16 src_y,
3077 int ntrap, xTrapezoid *traps)
3078 {
3079 struct tor tor;
3080 ScreenPtr screen = dst->pDrawable->pScreen;
3081 PixmapPtr scratch;
3082 PicturePtr mask;
3083 BoxRec extents;
3084 int16_t dst_x, dst_y;
3085 int dx, dy;
3086 int error, n;
3087
3088 if (NO_IMPRECISE)
3089 return false;
3090
3091 if (maskFormat == NULL && ntrap > 1) {
3092 DBG(("%s: individual rasterisation requested\n",
3093 __FUNCTION__));
3094 do {
3095 /* XXX unwind errors? */
3096 if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL, flags,
3097 src_x, src_y, 1, traps++))
3098 return false;
3099 } while (--ntrap);
3100 return true;
3101 }
3102
3103 if (!trapezoids_bounds(ntrap, traps, &extents))
3104 return true;
3105
3106 DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
3107 __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
3108
3109 if (!sna_compute_composite_extents(&extents,
3110 src, NULL, dst,
3111 src_x, src_y,
3112 0, 0,
3113 extents.x1, extents.y1,
3114 extents.x2 - extents.x1,
3115 extents.y2 - extents.y1))
3116 return true;
3117
3118 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3119 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3120
3121 extents.y2 -= extents.y1;
3122 extents.x2 -= extents.x1;
3123 extents.x1 -= dst->pDrawable->x;
3124 extents.y1 -= dst->pDrawable->y;
3125 dst_x = extents.x1;
3126 dst_y = extents.y1;
3127 dx = -extents.x1 * FAST_SAMPLES_X;
3128 dy = -extents.y1 * FAST_SAMPLES_Y;
3129 extents.x1 = extents.y1 = 0;
3130
3131 DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
3132 __FUNCTION__, extents.x2, extents.y2, dx, dy));
3133 scratch = sna_pixmap_create_unattached(screen,
3134 extents.x2, extents.y2, 8);
3135 if (!scratch)
3136 return true;
3137
3138 DBG(("%s: created buffer %p, stride %d\n",
3139 __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3140
3141 if (!tor_init(&tor, &extents, 2*ntrap)) {
3142 sna_pixmap_destroy(scratch);
3143 return true;
3144 }
3145
3146 for (n = 0; n < ntrap; n++) {
3147 if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
3148 pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
3149 continue;
3150
3151 tor_add_trapezoid(&tor, &traps[n], dx, dy);
3152 }
3153
3154 if (extents.x2 <= TOR_INPLACE_SIZE) {
3155 tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
3156 } else {
3157 tor_render(NULL, &tor,
3158 scratch->devPrivate.ptr,
3159 (void *)(intptr_t)scratch->devKind,
3160 is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
3161 true);
3162 }
3163 tor_fini(&tor);
3164
3165 mask = CreatePicture(0, &scratch->drawable,
3166 PictureMatchFormat(screen, 8, PICT_a8),
3167 0, 0, serverClient, &error);
3168 if (mask) {
3169 RegionRec region;
3170 int16_t x0, y0;
3171
3172 region.extents.x1 = dst_x + dst->pDrawable->x;
3173 region.extents.y1 = dst_y + dst->pDrawable->y;
3174 region.extents.x2 = region.extents.x1 + extents.x2;
3175 region.extents.y2 = region.extents.y1 + extents.y2;
3176 region.data = NULL;
3177
3178 trapezoid_origin(&traps[0].left, &x0, &y0);
3179
3180 DBG(("%s: fbComposite()\n", __FUNCTION__));
3181 sna_composite_fb(op, src, mask, dst, ®ion,
3182 src_x + dst_x - x0, src_y + dst_y - y0,
3183 0, 0,
3184 dst_x, dst_y,
3185 extents.x2, extents.y2);
3186
3187 FreePicture(mask, 0);
3188 }
3189 sna_pixmap_destroy(scratch);
3190
3191 return true;
3192 }
3193
3194 bool
imprecise_trap_span_converter(struct sna * sna,PicturePtr dst,INT16 src_x,INT16 src_y,int ntrap,xTrap * trap)3195 imprecise_trap_span_converter(struct sna *sna,
3196 PicturePtr dst,
3197 INT16 src_x, INT16 src_y,
3198 int ntrap, xTrap *trap)
3199 {
3200 struct sna_composite_spans_op tmp;
3201 struct tor tor;
3202 BoxRec extents;
3203 pixman_region16_t *clip;
3204 int dx, dy, n;
3205
3206 if (dst->pDrawable->depth < 8)
3207 return false;
3208
3209 if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3210 dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
3211 dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
3212 0)) {
3213 DBG(("%s: fallback -- composite spans not supported\n",
3214 __FUNCTION__));
3215 return false;
3216 }
3217
3218 clip = dst->pCompositeClip;
3219 extents = *RegionExtents(clip);
3220 dx = dst->pDrawable->x;
3221 dy = dst->pDrawable->y;
3222
3223 DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
3224 __FUNCTION__,
3225 extents.x1, extents.y1,
3226 extents.x2, extents.y2,
3227 dx, dy));
3228
3229 memset(&tmp, 0, sizeof(tmp));
3230 if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3231 0, 0,
3232 extents.x1, extents.y1,
3233 extents.x2 - extents.x1,
3234 extents.y2 - extents.y1,
3235 0,
3236 &tmp)) {
3237 DBG(("%s: fallback -- composite spans render op not supported\n",
3238 __FUNCTION__));
3239 return false;
3240 }
3241
3242 dx *= FAST_SAMPLES_X;
3243 dy *= FAST_SAMPLES_Y;
3244 if (!tor_init(&tor, &extents, 2*ntrap))
3245 goto skip;
3246
3247 for (n = 0; n < ntrap; n++) {
3248 xPointFixed p1, p2;
3249
3250 if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
3251 pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
3252 continue;
3253
3254 p1.y = trap[n].top.y;
3255 p2.y = trap[n].bot.y;
3256 p1.x = trap[n].top.l;
3257 p2.x = trap[n].bot.l;
3258 polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3259
3260 p1.y = trap[n].bot.y;
3261 p2.y = trap[n].top.y;
3262 p1.x = trap[n].top.r;
3263 p2.x = trap[n].bot.r;
3264 polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3265 }
3266
3267 tor_render(sna, &tor, &tmp, clip,
3268 choose_span(&tmp, dst, NULL, clip), false);
3269
3270 tor_fini(&tor);
3271 skip:
3272 tmp.done(sna, &tmp);
3273 return true;
3274 }
3275
mark_damaged(PixmapPtr pixmap,struct sna_pixmap * priv,BoxPtr box,int16_t x,int16_t y)3276 static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
3277 BoxPtr box, int16_t x, int16_t y)
3278 {
3279 box->x1 += x; box->x2 += x;
3280 box->y1 += y; box->y2 += y;
3281 if (box->x1 <= 0 && box->y1 <= 0 &&
3282 box->x2 >= pixmap->drawable.width &&
3283 box->y2 >= pixmap->drawable.height) {
3284 sna_damage_destroy(&priv->cpu_damage);
3285 sna_damage_all(&priv->gpu_damage, pixmap);
3286 list_del(&priv->flush_list);
3287 } else {
3288 sna_damage_add_box(&priv->gpu_damage, box);
3289 sna_damage_subtract_box(&priv->cpu_damage, box);
3290 }
3291 }
3292
3293 bool
trap_mask_converter(struct sna * sna,PicturePtr picture,INT16 x,INT16 y,int ntrap,xTrap * trap)3294 trap_mask_converter(struct sna *sna,
3295 PicturePtr picture,
3296 INT16 x, INT16 y,
3297 int ntrap, xTrap *trap)
3298 {
3299 struct tor tor;
3300 ScreenPtr screen = picture->pDrawable->pScreen;
3301 PixmapPtr scratch, pixmap;
3302 struct sna_pixmap *priv;
3303 BoxRec extents;
3304 span_func_t span;
3305 int dx, dy, n;
3306
3307 if (NO_SCAN_CONVERTER)
3308 return false;
3309
3310 pixmap = get_drawable_pixmap(picture->pDrawable);
3311 priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
3312 if (priv == NULL)
3313 return false;
3314
3315 /* XXX strict adherence to the Render specification */
3316 if (picture->polyMode == PolyModePrecise &&
3317 picture->polyEdge != PolyEdgeSharp) {
3318 DBG(("%s: fallback -- precise rasterisation requested\n",
3319 __FUNCTION__));
3320 return false;
3321 }
3322
3323 extents = *RegionExtents(picture->pCompositeClip);
3324 for (n = 0; n < ntrap; n++) {
3325 int v;
3326
3327 v = x + pixman_fixed_integer_floor (MIN(trap[n].top.l, trap[n].bot.l));
3328 if (v < extents.x1)
3329 extents.x1 = v;
3330
3331 v = x + pixman_fixed_integer_ceil (MAX(trap[n].top.r, trap[n].bot.r));
3332 if (v > extents.x2)
3333 extents.x2 = v;
3334
3335 v = y + pixman_fixed_integer_floor (trap[n].top.y);
3336 if (v < extents.y1)
3337 extents.y1 = v;
3338
3339 v = y + pixman_fixed_integer_ceil (trap[n].bot.y);
3340 if (v > extents.y2)
3341 extents.y2 = v;
3342 }
3343
3344 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3345 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3346
3347 scratch = sna_pixmap_create_upload(screen,
3348 extents.x2-extents.x1,
3349 extents.y2-extents.y1,
3350 8, KGEM_BUFFER_WRITE_INPLACE);
3351 if (!scratch)
3352 return true;
3353
3354 dx = picture->pDrawable->x;
3355 dy = picture->pDrawable->y;
3356 dx *= FAST_SAMPLES_X;
3357 dy *= FAST_SAMPLES_Y;
3358 if (!tor_init(&tor, &extents, 2*ntrap)) {
3359 sna_pixmap_destroy(scratch);
3360 return true;
3361 }
3362
3363 for (n = 0; n < ntrap; n++) {
3364 xPointFixed p1, p2;
3365
3366 if (pixman_fixed_to_int(trap[n].top.y) + picture->pDrawable->y >= extents.y2 ||
3367 pixman_fixed_to_int(trap[n].bot.y) + picture->pDrawable->y < extents.y1)
3368 continue;
3369
3370 p1.y = trap[n].top.y;
3371 p2.y = trap[n].bot.y;
3372 p1.x = trap[n].top.l;
3373 p2.x = trap[n].bot.l;
3374 polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3375
3376 p1.y = trap[n].bot.y;
3377 p2.y = trap[n].top.y;
3378 p1.x = trap[n].top.r;
3379 p2.x = trap[n].bot.r;
3380 polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3381 }
3382
3383 if (picture->polyEdge == PolyEdgeSharp)
3384 span = tor_blt_mask_mono;
3385 else
3386 span = tor_blt_mask;
3387
3388 tor_render(NULL, &tor,
3389 scratch->devPrivate.ptr,
3390 (void *)(intptr_t)scratch->devKind,
3391 span, true);
3392
3393 tor_fini(&tor);
3394
3395 /* XXX clip boxes */
3396 get_drawable_deltas(picture->pDrawable, pixmap, &x, &y);
3397 sna = to_sna_from_screen(screen);
3398 sna->render.copy_boxes(sna, GXcopy,
3399 &scratch->drawable, __sna_pixmap_get_bo(scratch), -extents.x1, -extents.x1,
3400 &pixmap->drawable, priv->gpu_bo, x, y,
3401 &extents, 1, 0);
3402 mark_damaged(pixmap, priv, &extents ,x, y);
3403 sna_pixmap_destroy(scratch);
3404 return true;
3405 }
3406
3407 bool
triangles_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xTriangle * tri)3408 triangles_span_converter(struct sna *sna,
3409 CARD8 op, PicturePtr src, PicturePtr dst,
3410 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3411 int count, xTriangle *tri)
3412 {
3413 struct sna_composite_spans_op tmp;
3414 struct tor tor;
3415 BoxRec extents;
3416 pixman_region16_t clip;
3417 int16_t dst_x, dst_y;
3418 int dx, dy, n;
3419 bool was_clear;
3420
3421 if (NO_SCAN_CONVERTER)
3422 return false;
3423
3424 if (is_mono(dst, maskFormat))
3425 return mono_triangles_span_converter(sna, op, src, dst,
3426 src_x, src_y,
3427 count, tri);
3428
3429 /* XXX strict adherence to the Render specification */
3430 if (dst->polyMode == PolyModePrecise) {
3431 DBG(("%s: fallback -- precise rasterisation requested\n",
3432 __FUNCTION__));
3433 return false;
3434 }
3435
3436 if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3437 DBG(("%s: fallback -- composite spans not supported\n",
3438 __FUNCTION__));
3439 return false;
3440 }
3441
3442 dst_x = pixman_fixed_to_int(tri[0].p1.x);
3443 dst_y = pixman_fixed_to_int(tri[0].p1.y);
3444
3445 miTriangleBounds(count, tri, &extents);
3446 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3447 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3448
3449 if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3450 return true;
3451
3452 #if 0
3453 if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3454 DBG(("%s: fallback -- traps extents too small %dx%d\n",
3455 __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3456 return false;
3457 }
3458 #endif
3459
3460 if (!sna_compute_composite_region(&clip,
3461 src, NULL, dst,
3462 src_x + extents.x1 - dst_x,
3463 src_y + extents.y1 - dst_y,
3464 0, 0,
3465 extents.x1, extents.y1,
3466 extents.x2 - extents.x1,
3467 extents.y2 - extents.y1)) {
3468 DBG(("%s: triangles do not intersect drawable clips\n",
3469 __FUNCTION__)) ;
3470 return true;
3471 }
3472
3473 if (!sna->render.check_composite_spans(sna, op, src, dst,
3474 clip.extents.x2 - clip.extents.x1,
3475 clip.extents.y2 - clip.extents.y1,
3476 0)) {
3477 DBG(("%s: fallback -- composite spans not supported\n",
3478 __FUNCTION__));
3479 return false;
3480 }
3481
3482 extents = *RegionExtents(&clip);
3483 dx = dst->pDrawable->x;
3484 dy = dst->pDrawable->y;
3485
3486 DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3487 __FUNCTION__,
3488 extents.x1, extents.y1,
3489 extents.x2, extents.y2,
3490 dx, dy,
3491 src_x + extents.x1 - dst_x - dx,
3492 src_y + extents.y1 - dst_y - dy));
3493
3494 was_clear = sna_drawable_is_clear(dst->pDrawable);
3495
3496 memset(&tmp, 0, sizeof(tmp));
3497 if (!sna->render.composite_spans(sna, op, src, dst,
3498 src_x + extents.x1 - dst_x - dx,
3499 src_y + extents.y1 - dst_y - dy,
3500 extents.x1, extents.y1,
3501 extents.x2 - extents.x1,
3502 extents.y2 - extents.y1,
3503 0,
3504 &tmp)) {
3505 DBG(("%s: fallback -- composite spans render op not supported\n",
3506 __FUNCTION__));
3507 return false;
3508 }
3509
3510 dx *= FAST_SAMPLES_X;
3511 dy *= FAST_SAMPLES_Y;
3512 if (!tor_init(&tor, &extents, 3*count))
3513 goto skip;
3514
3515 for (n = 0; n < count; n++) {
3516 polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3517 polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3518 polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3519 }
3520
3521 tor_render(sna, &tor, &tmp, &clip,
3522 choose_span(&tmp, dst, maskFormat, &clip),
3523 !was_clear && maskFormat && !operator_is_bounded(op));
3524
3525 tor_fini(&tor);
3526 skip:
3527 tmp.done(sna, &tmp);
3528
3529 REGION_UNINIT(NULL, &clip);
3530 return true;
3531 }
3532
3533 bool
triangles_mask_converter(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xTriangle * tri)3534 triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
3535 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3536 int count, xTriangle *tri)
3537 {
3538 struct tor tor;
3539 void (*span)(struct sna *sna,
3540 struct sna_composite_spans_op *op,
3541 pixman_region16_t *clip,
3542 const BoxRec *box,
3543 int coverage);
3544 ScreenPtr screen = dst->pDrawable->pScreen;
3545 PixmapPtr scratch;
3546 PicturePtr mask;
3547 BoxRec extents;
3548 int16_t dst_x, dst_y;
3549 int dx, dy;
3550 int error, n;
3551
3552 if (NO_SCAN_CONVERTER)
3553 return false;
3554
3555 if (is_precise(dst, maskFormat)) {
3556 DBG(("%s: fallback -- precise rasterisation requested\n",
3557 __FUNCTION__));
3558 return false;
3559 }
3560
3561 if (maskFormat == NULL && count > 1) {
3562 DBG(("%s: fallback -- individual rasterisation requested\n",
3563 __FUNCTION__));
3564 return false;
3565 }
3566
3567 miTriangleBounds(count, tri, &extents);
3568 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3569 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3570
3571 if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3572 return true;
3573
3574 if (!sna_compute_composite_extents(&extents,
3575 src, NULL, dst,
3576 src_x, src_y,
3577 0, 0,
3578 extents.x1, extents.y1,
3579 extents.x2 - extents.x1,
3580 extents.y2 - extents.y1))
3581 return true;
3582
3583 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3584 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3585
3586 extents.y2 -= extents.y1;
3587 extents.x2 -= extents.x1;
3588 extents.x1 -= dst->pDrawable->x;
3589 extents.y1 -= dst->pDrawable->y;
3590 dst_x = extents.x1;
3591 dst_y = extents.y1;
3592 dx = -extents.x1 * FAST_SAMPLES_X;
3593 dy = -extents.y1 * FAST_SAMPLES_Y;
3594 extents.x1 = extents.y1 = 0;
3595
3596 DBG(("%s: mask (%dx%d)\n",
3597 __FUNCTION__, extents.x2, extents.y2));
3598 scratch = sna_pixmap_create_upload(screen,
3599 extents.x2, extents.y2, 8,
3600 KGEM_BUFFER_WRITE_INPLACE);
3601 if (!scratch)
3602 return true;
3603
3604 DBG(("%s: created buffer %p, stride %d\n",
3605 __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3606
3607 if (!tor_init(&tor, &extents, 3*count)) {
3608 sna_pixmap_destroy(scratch);
3609 return true;
3610 }
3611
3612 for (n = 0; n < count; n++) {
3613 polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3614 polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3615 polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3616 }
3617
3618 if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
3619 span = tor_blt_mask_mono;
3620 else
3621 span = tor_blt_mask;
3622
3623 tor_render(NULL, &tor,
3624 scratch->devPrivate.ptr,
3625 (void *)(intptr_t)scratch->devKind,
3626 span, true);
3627
3628 mask = CreatePicture(0, &scratch->drawable,
3629 PictureMatchFormat(screen, 8, PICT_a8),
3630 0, 0, serverClient, &error);
3631 if (mask) {
3632 CompositePicture(op, src, mask, dst,
3633 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
3634 src_y + dst_y - pixman_fixed_to_int(tri[0].p1.y),
3635 0, 0,
3636 dst_x, dst_y,
3637 extents.x2, extents.y2);
3638 FreePicture(mask, 0);
3639 }
3640 tor_fini(&tor);
3641 sna_pixmap_destroy(scratch);
3642
3643 return true;
3644 }
3645
3646 struct tristrip_thread {
3647 struct sna *sna;
3648 const struct sna_composite_spans_op *op;
3649 const xPointFixed *points;
3650 RegionPtr clip;
3651 span_func_t span;
3652 BoxRec extents;
3653 int dx, dy, draw_y;
3654 int count;
3655 bool unbounded;
3656 };
3657
3658 static void
tristrip_thread(void * arg)3659 tristrip_thread(void *arg)
3660 {
3661 struct tristrip_thread *thread = arg;
3662 struct span_thread_boxes boxes;
3663 struct tor tor;
3664 int n, cw, ccw;
3665
3666 if (!tor_init(&tor, &thread->extents, 2*thread->count))
3667 return;
3668
3669 span_thread_boxes_init(&boxes, thread->op, thread->clip);
3670
3671 cw = 0; ccw = 1;
3672 polygon_add_line(tor.polygon,
3673 &thread->points[ccw], &thread->points[cw],
3674 thread->dx, thread->dy);
3675 n = 2;
3676 do {
3677 polygon_add_line(tor.polygon,
3678 &thread->points[cw], &thread->points[n],
3679 thread->dx, thread->dy);
3680 cw = n;
3681 if (++n == thread->count)
3682 break;
3683
3684 polygon_add_line(tor.polygon,
3685 &thread->points[n], &thread->points[ccw],
3686 thread->dx, thread->dy);
3687 ccw = n;
3688 if (++n == thread->count)
3689 break;
3690 } while (1);
3691 polygon_add_line(tor.polygon,
3692 &thread->points[cw], &thread->points[ccw],
3693 thread->dx, thread->dy);
3694 assert(tor.polygon->num_edges <= 2*thread->count);
3695
3696 tor_render(thread->sna, &tor,
3697 (struct sna_composite_spans_op *)&boxes, thread->clip,
3698 thread->span, thread->unbounded);
3699
3700 tor_fini(&tor);
3701
3702 if (boxes.num_boxes) {
3703 DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
3704 assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
3705 thread->op->thread_boxes(thread->sna, thread->op,
3706 boxes.boxes, boxes.num_boxes);
3707 }
3708 }
3709
3710 bool
imprecise_tristrip_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xPointFixed * points)3711 imprecise_tristrip_span_converter(struct sna *sna,
3712 CARD8 op, PicturePtr src, PicturePtr dst,
3713 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3714 int count, xPointFixed *points)
3715 {
3716 struct sna_composite_spans_op tmp;
3717 BoxRec extents;
3718 pixman_region16_t clip;
3719 int16_t dst_x, dst_y;
3720 int dx, dy, num_threads;
3721 bool was_clear;
3722
3723 if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3724 DBG(("%s: fallback -- composite spans not supported\n",
3725 __FUNCTION__));
3726 return false;
3727 }
3728
3729 dst_x = pixman_fixed_to_int(points[0].x);
3730 dst_y = pixman_fixed_to_int(points[0].y);
3731
3732 miPointFixedBounds(count, points, &extents);
3733 DBG(("%s: extents (%d, %d), (%d, %d)\n",
3734 __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3735
3736 if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3737 return true;
3738
3739 #if 0
3740 if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3741 DBG(("%s: fallback -- traps extents too small %dx%d\n",
3742 __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3743 return false;
3744 }
3745 #endif
3746
3747 if (!sna_compute_composite_region(&clip,
3748 src, NULL, dst,
3749 src_x + extents.x1 - dst_x,
3750 src_y + extents.y1 - dst_y,
3751 0, 0,
3752 extents.x1, extents.y1,
3753 extents.x2 - extents.x1,
3754 extents.y2 - extents.y1)) {
3755 DBG(("%s: triangles do not intersect drawable clips\n",
3756 __FUNCTION__)) ;
3757 return true;
3758 }
3759
3760 if (!sna->render.check_composite_spans(sna, op, src, dst,
3761 clip.extents.x2 - clip.extents.x1,
3762 clip.extents.y2 - clip.extents.y1,
3763 0)) {
3764 DBG(("%s: fallback -- composite spans not supported\n",
3765 __FUNCTION__));
3766 return false;
3767 }
3768
3769 extents = *RegionExtents(&clip);
3770 dx = dst->pDrawable->x;
3771 dy = dst->pDrawable->y;
3772
3773 DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3774 __FUNCTION__,
3775 extents.x1, extents.y1,
3776 extents.x2, extents.y2,
3777 dx, dy,
3778 src_x + extents.x1 - dst_x - dx,
3779 src_y + extents.y1 - dst_y - dy));
3780
3781 was_clear = sna_drawable_is_clear(dst->pDrawable);
3782
3783 memset(&tmp, 0, sizeof(tmp));
3784 if (!sna->render.composite_spans(sna, op, src, dst,
3785 src_x + extents.x1 - dst_x - dx,
3786 src_y + extents.y1 - dst_y - dy,
3787 extents.x1, extents.y1,
3788 extents.x2 - extents.x1,
3789 extents.y2 - extents.y1,
3790 0,
3791 &tmp)) {
3792 DBG(("%s: fallback -- composite spans render op not supported\n",
3793 __FUNCTION__));
3794 return false;
3795 }
3796
3797 dx *= FAST_SAMPLES_X;
3798 dy *= FAST_SAMPLES_Y;
3799
3800 num_threads = 1;
3801 if (!NO_GPU_THREADS &&
3802 tmp.thread_boxes &&
3803 thread_choose_span(&tmp, dst, maskFormat, &clip))
3804 num_threads = sna_use_threads(extents.x2 - extents.x1,
3805 extents.y2 - extents.y1,
3806 16);
3807 if (num_threads == 1) {
3808 struct tor tor;
3809 int cw, ccw, n;
3810
3811 if (!tor_init(&tor, &extents, 2*count))
3812 goto skip;
3813
3814 cw = 0; ccw = 1;
3815 polygon_add_line(tor.polygon,
3816 &points[ccw], &points[cw],
3817 dx, dy);
3818 n = 2;
3819 do {
3820 polygon_add_line(tor.polygon,
3821 &points[cw], &points[n],
3822 dx, dy);
3823 cw = n;
3824 if (++n == count)
3825 break;
3826
3827 polygon_add_line(tor.polygon,
3828 &points[n], &points[ccw],
3829 dx, dy);
3830 ccw = n;
3831 if (++n == count)
3832 break;
3833 } while (1);
3834 polygon_add_line(tor.polygon,
3835 &points[cw], &points[ccw],
3836 dx, dy);
3837 assert(tor.polygon->num_edges <= 2*count);
3838
3839 tor_render(sna, &tor, &tmp, &clip,
3840 choose_span(&tmp, dst, maskFormat, &clip),
3841 !was_clear && maskFormat && !operator_is_bounded(op));
3842
3843 tor_fini(&tor);
3844 } else {
3845 struct tristrip_thread threads[num_threads];
3846 int y, h, n;
3847
3848 DBG(("%s: using %d threads for tristrip compositing %dx%d\n",
3849 __FUNCTION__, num_threads,
3850 clip.extents.x2 - clip.extents.x1,
3851 clip.extents.y2 - clip.extents.y1));
3852
3853 threads[0].sna = sna;
3854 threads[0].op = &tmp;
3855 threads[0].points = points;
3856 threads[0].count = count;
3857 threads[0].extents = clip.extents;
3858 threads[0].clip = &clip;
3859 threads[0].dx = dx;
3860 threads[0].dy = dy;
3861 threads[0].draw_y = dst->pDrawable->y;
3862 threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
3863 threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
3864
3865 y = clip.extents.y1;
3866 h = clip.extents.y2 - clip.extents.y1;
3867 h = (h + num_threads - 1) / num_threads;
3868 num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
3869
3870 for (n = 1; n < num_threads; n++) {
3871 threads[n] = threads[0];
3872 threads[n].extents.y1 = y;
3873 threads[n].extents.y2 = y += h;
3874
3875 sna_threads_run(n, tristrip_thread, &threads[n]);
3876 }
3877
3878 assert(y < threads[0].extents.y2);
3879 threads[0].extents.y1 = y;
3880 tristrip_thread(&threads[0]);
3881
3882 sna_threads_wait();
3883 }
3884 skip:
3885 tmp.done(sna, &tmp);
3886
3887 REGION_UNINIT(NULL, &clip);
3888 return true;
3889 }
3890