1 /*
2  * Copyright (c) 2007  David Turner
3  * Copyright (c) 2008  M Joonas Pihlaja
4  * Copyright (c) 2011 Intel Corporation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Chris Wilson <chris@chris-wilson.co.uk>
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "sna.h"
35 #include "sna_render.h"
36 #include "sna_render_inline.h"
37 #include "sna_trapezoids.h"
38 #include "fb/fbpict.h"
39 
40 #include <mipict.h>
41 
42 #undef SAMPLES_X
43 #undef SAMPLES_Y
44 
45 /* TODO: Emit unantialiased and MSAA triangles. */
46 
47 #ifndef MAX
48 #define MAX(x,y) ((x) >= (y) ? (x) : (y))
49 #endif
50 
51 #ifndef MIN
52 #define MIN(x,y) ((x) <= (y) ? (x) : (y))
53 #endif
54 
55 typedef void (*span_func_t)(struct sna *sna,
56 			    struct sna_composite_spans_op *op,
57 			    pixman_region16_t *clip,
58 			    const BoxRec *box,
59 			    int coverage);
60 
61 #if HAS_DEBUG_FULL
_assert_pixmap_contains_box(PixmapPtr pixmap,BoxPtr box,const char * function)62 static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
63 {
64 	if (box->x1 < 0 || box->y1 < 0 ||
65 	    box->x2 > pixmap->drawable.width ||
66 	    box->y2 > pixmap->drawable.height)
67 	{
68 		FatalError("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
69 			   function,
70 			   box->x1, box->y1, box->x2, box->y2,
71 			   pixmap->drawable.width,
72 			   pixmap->drawable.height);
73 	}
74 }
75 #define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
76 #else
77 #define assert_pixmap_contains_box(p, b)
78 #endif
79 
apply_damage(struct sna_composite_op * op,RegionPtr region)80 static void apply_damage(struct sna_composite_op *op, RegionPtr region)
81 {
82 	DBG(("%s: damage=%p, region=%dx[(%d, %d), (%d, %d)]\n",
83 	     __FUNCTION__, op->damage,
84 	     region_num_rects(region),
85 	     region->extents.x1, region->extents.y1,
86 	     region->extents.x2, region->extents.y2));
87 
88 	if (op->damage == NULL)
89 		return;
90 
91 	RegionTranslate(region, op->dst.x, op->dst.y);
92 
93 	assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
94 	sna_damage_add(op->damage, region);
95 }
96 
_apply_damage_box(struct sna_composite_op * op,const BoxRec * box)97 static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
98 {
99 	BoxRec r;
100 
101 	r.x1 = box->x1 + op->dst.x;
102 	r.x2 = box->x2 + op->dst.x;
103 	r.y1 = box->y1 + op->dst.y;
104 	r.y2 = box->y2 + op->dst.y;
105 
106 	assert_pixmap_contains_box(op->dst.pixmap, &r);
107 	sna_damage_add_box(op->damage, &r);
108 }
109 
apply_damage_box(struct sna_composite_op * op,const BoxRec * box)110 inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
111 {
112 	if (op->damage)
113 		_apply_damage_box(op, box);
114 }
115 
116 #define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
117 	_GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_shift)
118 
119 #define FAST_SAMPLES_INT(x) ((x) >> (FAST_SAMPLES_shift))
120 #define FAST_SAMPLES_FRAC(x) ((x) & (FAST_SAMPLES_mask))
121 
122 #define _GRID_TO_INT_FRAC_shift(t, i, f, b) do {	\
123     (f) = FAST_SAMPLES_FRAC(t);				\
124     (i) = FAST_SAMPLES_INT(t);				\
125 } while (0)
126 
127 #define FAST_SAMPLES_XY (FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */
128 #define AREA_TO_ALPHA(c)  ((c) / (float)FAST_SAMPLES_XY)
129 
130 struct quorem {
131 	int32_t quo;
132 	int64_t rem;
133 };
134 
135 struct edge {
136 	struct edge *next, *prev;
137 
138 	int dir;
139 	int cell;
140 	int height_left;
141 
142 	struct quorem x;
143 
144 	/* Advance of the current x when moving down a subsample line. */
145 	struct quorem dxdy;
146 	int64_t dy;
147 
148 	/* The clipped y of the top of the edge. */
149 	int ytop;
150 
151 	/* y2-y1 after orienting the edge downwards.  */
152 };
153 
154 /* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
155 #define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y
156 #define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
157 
158 /* A collection of sorted and vertically clipped edges of the polygon.
159  * Edges are moved from the polygon to an active list while scan
160  * converting. */
161 struct polygon {
162 	/* The vertical clip extents. */
163 	int ymin, ymax;
164 
165 	/* Array of edges all starting in the same bucket.	An edge is put
166 	 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
167 	 * it is added to the polygon. */
168 	struct edge **y_buckets;
169 	struct edge *y_buckets_embedded[64];
170 
171 	struct edge edges_embedded[32];
172 	struct edge *edges;
173 	int num_edges;
174 };
175 
176 /* A cell records the effect on pixel coverage of polygon edges
177  * passing through a pixel.  It contains two accumulators of pixel
178  * coverage.
179  *
180  * Consider the effects of a polygon edge on the coverage of a pixel
181  * it intersects and that of the following one.  The coverage of the
182  * following pixel is the height of the edge multiplied by the width
183  * of the pixel, and the coverage of the pixel itself is the area of
184  * the trapezoid formed by the edge and the right side of the pixel.
185  *
186  * +-----------------------+-----------------------+
187  * |                       |                       |
188  * |                       |                       |
189  * |_______________________|_______________________|
190  * |   \...................|.......................|\
191  * |    \..................|.......................| |
192  * |     \.................|.......................| |
193  * |      \....covered.....|.......................| |
194  * |       \....area.......|.......................| } covered height
195  * |        \..............|.......................| |
196  * |uncovered\.............|.......................| |
197  * |  area    \............|.......................| |
198  * |___________\...........|.......................|/
199  * |                       |                       |
200  * |                       |                       |
201  * |                       |                       |
202  * +-----------------------+-----------------------+
203  *
204  * Since the coverage of the following pixel will always be a multiple
205  * of the width of the pixel, we can store the height of the covered
206  * area instead.  The coverage of the pixel itself is the total
207  * coverage minus the area of the uncovered area to the left of the
208  * edge.  As it's faster to compute the uncovered area we only store
209  * that and subtract it from the total coverage later when forming
210  * spans to blit.
211  *
212  * The heights and areas are signed, with left edges of the polygon
213  * having positive sign and right edges having negative sign.  When
214  * two edges intersect they swap their left/rightness so their
215  * contribution above and below the intersection point must be
216  * computed separately. */
217 struct cell {
218 	struct cell *next;
219 	int x;
220 	int16_t uncovered_area;
221 	int16_t covered_height;
222 };
223 
224 /* A cell list represents the scan line sparsely as cells ordered by
225  * ascending x.  It is geared towards scanning the cells in order
226  * using an internal cursor. */
227 struct cell_list {
228 	struct cell *cursor;
229 
230 	/* Points to the left-most cell in the scan line. */
231 	struct cell head, tail;
232 
233 	int16_t x1, x2;
234 	int16_t count, size;
235 	struct cell *cells;
236 	struct cell embedded[256];
237 };
238 
239 /* The active list contains edges in the current scan line ordered by
240  * the x-coordinate of the intercept of the edge and the scan line. */
241 struct active_list {
242 	/* Leftmost edge on the current scan line. */
243 	struct edge head, tail;
244 };
245 
246 struct tor {
247     struct polygon	polygon[1];
248     struct active_list	active[1];
249     struct cell_list	coverages[1];
250 
251     BoxRec extents;
252 };
253 
254 /* Compute the floored division a/b. Assumes / and % perform symmetric
255  * division. */
256 /* Rewinds the cell list's cursor to the beginning.  After rewinding
257  * we're good to cell_list_find() the cell any x coordinate. */
258 inline static void
cell_list_rewind(struct cell_list * cells)259 cell_list_rewind(struct cell_list *cells)
260 {
261 	cells->cursor = &cells->head;
262 }
263 
264 static bool
cell_list_init(struct cell_list * cells,int x1,int x2)265 cell_list_init(struct cell_list *cells, int x1, int x2)
266 {
267 	cells->tail.next = NULL;
268 	cells->tail.x = INT_MAX;
269 	cells->head.x = INT_MIN;
270 	cells->head.next = &cells->tail;
271 	cells->head.covered_height = 0;
272 	cell_list_rewind(cells);
273 	cells->count = 0;
274 	cells->x1 = x1;
275 	cells->x2 = x2;
276 	cells->size = x2 - x1 + 1;
277 	cells->cells = cells->embedded;
278 	if (cells->size > ARRAY_SIZE(cells->embedded))
279 		cells->cells = malloc(cells->size * sizeof(struct cell));
280 	return cells->cells != NULL;
281 }
282 
283 static void
cell_list_fini(struct cell_list * cells)284 cell_list_fini(struct cell_list *cells)
285 {
286 	if (cells->cells != cells->embedded)
287 		free(cells->cells);
288 }
289 
290 inline static void
cell_list_reset(struct cell_list * cells)291 cell_list_reset(struct cell_list *cells)
292 {
293 	cell_list_rewind(cells);
294 	cells->head.next = &cells->tail;
295 	cells->head.covered_height = 0;
296 	cells->count = 0;
297 }
298 
299 inline static struct cell *
cell_list_alloc(struct cell_list * cells,struct cell * tail,int x)300 cell_list_alloc(struct cell_list *cells,
301 		struct cell *tail,
302 		int x)
303 {
304 	struct cell *cell;
305 
306 	assert(cells->count < cells->size);
307 	cell = cells->cells + cells->count++;
308 	cell->next = tail->next;
309 	tail->next = cell;
310 
311 	cell->x = x;
312 	cell->covered_height = 0;
313 	cell->uncovered_area = 0;
314 	return cell;
315 }
316 
317 /* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
318  * needed to be allocated but couldn't be.  Cells must be found with
319  * non-decreasing x-coordinate until the cell list is rewound using
320  * cell_list_rewind(). Ownership of the returned cell is retained by
321  * the cell list. */
322 inline static struct cell *
cell_list_find(struct cell_list * cells,int x)323 cell_list_find(struct cell_list *cells, int x)
324 {
325 	struct cell *tail;
326 
327 	if (x >= cells->x2)
328 		return &cells->tail;
329 
330 	if (x < cells->x1)
331 		return &cells->head;
332 
333 	tail = cells->cursor;
334 	if (tail->x == x)
335 		return tail;
336 
337 	do {
338 		if (tail->next->x > x)
339 			break;
340 
341 		tail = tail->next;
342 		if (tail->next->x > x)
343 			break;
344 
345 		tail = tail->next;
346 		if (tail->next->x > x)
347 			break;
348 
349 		tail = tail->next;
350 	} while (1);
351 
352 	if (tail->x != x)
353 		tail = cell_list_alloc(cells, tail, x);
354 
355 	return cells->cursor = tail;
356 }
357 
358 /* Add a subpixel span covering [x1, x2) to the coverage cells. */
359 inline static void
cell_list_add_subspan(struct cell_list * cells,int x1,int x2)360 cell_list_add_subspan(struct cell_list *cells, int x1, int x2)
361 {
362 	struct cell *cell;
363 	int ix1, fx1;
364 	int ix2, fx2;
365 
366 	if (x1 == x2)
367 		return;
368 
369 	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
370 	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
371 
372 	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
373 	       x1, ix1, fx1, x2, ix2, fx2));
374 
375 	cell = cell_list_find(cells, ix1);
376 	if (ix1 != ix2) {
377 		cell->uncovered_area += fx1;
378 		++cell->covered_height;
379 
380 		cell = cell_list_find(cells, ix2);
381 		cell->uncovered_area -= fx2;
382 		--cell->covered_height;
383 	} else
384 		cell->uncovered_area += (fx1-fx2);
385 }
386 
387 inline static void
cell_list_add_span(struct cell_list * cells,int x1,int x2)388 cell_list_add_span(struct cell_list *cells, int x1, int x2)
389 {
390 	struct cell *cell;
391 	int ix1, fx1;
392 	int ix2, fx2;
393 
394 	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
395 	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
396 
397 	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
398 	       x1, ix1, fx1, x2, ix2, fx2));
399 
400 	cell = cell_list_find(cells, ix1);
401 	if (ix1 != ix2) {
402 		cell->uncovered_area += fx1*FAST_SAMPLES_Y;
403 		cell->covered_height += FAST_SAMPLES_Y;
404 
405 		cell = cell_list_find(cells, ix2);
406 		cell->uncovered_area -= fx2*FAST_SAMPLES_Y;
407 		cell->covered_height -= FAST_SAMPLES_Y;
408 	} else
409 		cell->uncovered_area += (fx1-fx2)*FAST_SAMPLES_Y;
410 }
411 
412 static void
polygon_fini(struct polygon * polygon)413 polygon_fini(struct polygon *polygon)
414 {
415 	if (polygon->y_buckets != polygon->y_buckets_embedded)
416 		free(polygon->y_buckets);
417 
418 	if (polygon->edges != polygon->edges_embedded)
419 		free(polygon->edges);
420 }
421 
422 static bool
polygon_init(struct polygon * polygon,int num_edges,int ymin,int ymax)423 polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
424 {
425 	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
426 
427 	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
428 		return false;
429 
430 	polygon->edges = polygon->edges_embedded;
431 	polygon->y_buckets = polygon->y_buckets_embedded;
432 
433 	polygon->num_edges = 0;
434 	if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
435 		polygon->edges = malloc(sizeof(struct edge)*num_edges);
436 		if (unlikely(NULL == polygon->edges))
437 			goto bail_no_mem;
438 	}
439 
440 	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
441 		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
442 		if (unlikely(NULL == polygon->y_buckets))
443 			goto bail_no_mem;
444 	}
445 	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
446 	polygon->y_buckets[num_buckets] = (void *)-1;
447 
448 	polygon->ymin = ymin;
449 	polygon->ymax = ymax;
450 	return true;
451 
452 bail_no_mem:
453 	polygon_fini(polygon);
454 	return false;
455 }
456 
457 static void
_polygon_insert_edge_into_its_y_bucket(struct polygon * polygon,struct edge * e)458 _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
459 {
460 	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
461 	struct edge **ptail = &polygon->y_buckets[ix];
462 	assert(e->ytop < polygon->ymax);
463 	e->next = *ptail;
464 	*ptail = e;
465 }
466 
467 inline static void
polygon_add_edge(struct polygon * polygon,const xTrapezoid * t,const xLineFixed * edge,int dir,int dx,int dy)468 polygon_add_edge(struct polygon *polygon,
469 		 const xTrapezoid *t,
470 		 const xLineFixed *edge,
471 		 int dir, int dx, int dy)
472 {
473 	struct edge *e = &polygon->edges[polygon->num_edges];
474 	int ytop, ybot;
475 
476 	assert(t->bottom > t->top);
477 	assert(edge->p2.y > edge->p1.y);
478 
479 	e->dir = dir;
480 
481 	ytop = pixman_fixed_to_fast(t->top) + dy;
482 	if (ytop < polygon->ymin)
483 		ytop = polygon->ymin;
484 
485 	ybot = pixman_fixed_to_fast(t->bottom) + dy;
486 	if (ybot > polygon->ymax)
487 		ybot = polygon->ymax;
488 
489 	e->ytop = ytop;
490 	e->height_left = ybot - ytop;
491 	if (e->height_left <= 0)
492 		return;
493 
494 	if (pixman_fixed_to_fast(edge->p1.x) == pixman_fixed_to_fast(edge->p2.x)) {
495 		e->cell = e->x.quo = pixman_fixed_to_fast(edge->p1.x) + dx;
496 		e->x.rem = 0;
497 		e->dxdy.quo = 0;
498 		e->dxdy.rem = 0;
499 		e->dy = 0;
500 	} else {
501 		int64_t Ex, Ey, tmp;
502 
503 		Ex = ((int64_t)edge->p2.x - edge->p1.x) * FAST_SAMPLES_X;
504 		Ey = ((int64_t)edge->p2.y - edge->p1.y) * FAST_SAMPLES_Y * (2 << 16);
505 		assert(Ey > 0);
506 
507 		e->dxdy.quo = Ex * (2 << 16) / Ey;
508 		e->dxdy.rem = Ex * (2 << 16) % Ey;
509 
510 		tmp = (int64_t)(2 * (ytop - dy) + 1) << 16;
511 		tmp -= (int64_t)edge->p1.y * FAST_SAMPLES_Y * 2;
512 		tmp *= Ex;
513 		e->x.quo = tmp / Ey;
514 		e->x.rem = tmp % Ey;
515 
516 		tmp = (int64_t)edge->p1.x * FAST_SAMPLES_X;
517 		e->x.quo += tmp / (1 << 16) + dx;
518 		tmp &= (1 << 16) - 1;
519 		if (tmp) {
520 			if (Ey < INT64_MAX >> 16)
521 				tmp = (tmp * Ey) / (1 << 16);
522 			else /* Handle overflow by losing precision */
523 				tmp = tmp * (Ey / (1 << 16));
524 			e->x.rem += tmp;
525 		}
526 
527 		if (e->x.rem < 0) {
528 			--e->x.quo;
529 			e->x.rem += Ey;
530 		} else if (e->x.rem >= Ey) {
531 			++e->x.quo;
532 			e->x.rem -= Ey;
533 		}
534 		assert(e->x.rem >= 0 && e->x.rem < Ey);
535 
536 		e->cell = e->x.quo + (e->x.rem >= Ey/2);
537 		e->dy = Ey;
538 	}
539 
540 	_polygon_insert_edge_into_its_y_bucket(polygon, e);
541 	polygon->num_edges++;
542 }
543 
544 inline static void
polygon_add_line(struct polygon * polygon,const xPointFixed * p1,const xPointFixed * p2,int dx,int dy)545 polygon_add_line(struct polygon *polygon,
546 		 const xPointFixed *p1,
547 		 const xPointFixed *p2,
548 		 int dx, int dy)
549 {
550 	struct edge *e = &polygon->edges[polygon->num_edges];
551 	int top, bot;
552 
553 	if (p1->y == p2->y)
554 		return;
555 
556 	__DBG(("%s: line=(%d, %d), (%d, %d)\n",
557 	       __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
558 
559 	e->dir = 1;
560 	if (p2->y < p1->y) {
561 		const xPointFixed *t;
562 
563 		e->dir = -1;
564 
565 		t = p1;
566 		p1 = p2;
567 		p2 = t;
568 	}
569 
570 	top = pixman_fixed_to_fast(p1->y) + dy;
571 	if (top < polygon->ymin)
572 		top = polygon->ymin;
573 
574 	bot = pixman_fixed_to_fast(p2->y) + dy;
575 	if (bot > polygon->ymax)
576 		bot = polygon->ymax;
577 
578 	if (bot <= top)
579 		return;
580 
581 	e->ytop = top;
582 	e->height_left = bot - top;
583 	if (e->height_left <= 0)
584 		return;
585 
586 	__DBG(("%s: edge height=%d\n", __FUNCTION__, e->dir * e->height_left));
587 
588 	if (pixman_fixed_to_fast(p1->x) == pixman_fixed_to_fast(p2->x)) {
589 		e->cell = e->x.quo = pixman_fixed_to_fast(p1->x) + dx;
590 		e->x.rem = 0;
591 		e->dxdy.quo = 0;
592 		e->dxdy.rem = 0;
593 		e->dy = 0;
594 	} else {
595 		int64_t Ex, Ey, tmp;
596 
597 		Ex = ((int64_t)p2->x - p1->x) * FAST_SAMPLES_X;
598 		Ey = ((int64_t)p2->y - p1->y) * FAST_SAMPLES_Y * (2 << 16);
599 
600 		e->dxdy.quo = Ex * (2 << 16) / Ey;
601 		e->dxdy.rem = Ex * (2 << 16) % Ey;
602 
603 		tmp = (int64_t)(2 * (top - dy) + 1) << 16;
604 		tmp -= (int64_t)p1->y * FAST_SAMPLES_Y * 2;
605 		tmp *= Ex;
606 		e->x.quo = tmp / Ey;
607 		e->x.rem = tmp % Ey;
608 
609 		tmp = (int64_t)p1->x * FAST_SAMPLES_X;
610 		e->x.quo += tmp / (1 << 16) + dx;
611 		e->x.rem += ((tmp & ((1 << 16) - 1)) * Ey) / (1 << 16);
612 
613 		if (e->x.rem < 0) {
614 			--e->x.quo;
615 			e->x.rem += Ey;
616 		} else if (e->x.rem >= Ey) {
617 			++e->x.quo;
618 			e->x.rem -= Ey;
619 		}
620 
621 		e->cell = e->x.quo + (e->x.rem >= Ey/2);
622 		e->dy = Ey;
623 	}
624 
625 	if (polygon->num_edges > 0) {
626 		struct edge *prev = &polygon->edges[polygon->num_edges-1];
627 		/* detect degenerate triangles inserted into tristrips */
628 		if (e->dir == -prev->dir &&
629 		    e->ytop == prev->ytop &&
630 		    e->height_left == prev->height_left &&
631 		    e->x.quo == prev->x.quo &&
632 		    e->x.rem == prev->x.rem &&
633 		    e->dxdy.quo == prev->dxdy.quo &&
634 		    e->dxdy.rem == prev->dxdy.rem) {
635 			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
636 							  polygon->ymin);
637 			polygon->y_buckets[ix] = prev->next;
638 			polygon->num_edges--;
639 			return;
640 		}
641 	}
642 
643 	_polygon_insert_edge_into_its_y_bucket(polygon, e);
644 	polygon->num_edges++;
645 }
646 
647 static void
active_list_reset(struct active_list * active)648 active_list_reset(struct active_list *active)
649 {
650 	active->head.height_left = INT_MAX;
651 	active->head.cell = INT_MIN;
652 	active->head.dy = 0;
653 	active->head.prev = NULL;
654 	active->head.next = &active->tail;
655 	active->tail.prev = &active->head;
656 	active->tail.next = NULL;
657 	active->tail.cell = INT_MAX;
658 	active->tail.height_left = INT_MAX;
659 	active->tail.dy = 0;
660 }
661 
662 static struct edge *
merge_sorted_edges(struct edge * head_a,struct edge * head_b)663 merge_sorted_edges(struct edge *head_a, struct edge *head_b)
664 {
665 	struct edge *head, **next, *prev;
666 	int32_t x;
667 
668 	if (head_b == NULL)
669 		return head_a;
670 
671 	prev = head_a->prev;
672 	next = &head;
673 	if (head_a->cell <= head_b->cell) {
674 		head = head_a;
675 	} else {
676 		head = head_b;
677 		head_b->prev = prev;
678 		goto start_with_b;
679 	}
680 
681 	do {
682 		x = head_b->cell;
683 		while (head_a != NULL && head_a->cell <= x) {
684 			prev = head_a;
685 			next = &head_a->next;
686 			head_a = head_a->next;
687 		}
688 
689 		head_b->prev = prev;
690 		*next = head_b;
691 		if (head_a == NULL)
692 			return head;
693 
694 start_with_b:
695 		x = head_a->cell;
696 		while (head_b != NULL && head_b->cell <= x) {
697 			prev = head_b;
698 			next = &head_b->next;
699 			head_b = head_b->next;
700 		}
701 
702 		head_a->prev = prev;
703 		*next = head_a;
704 		if (head_b == NULL)
705 			return head;
706 	} while (1);
707 }
708 
709 static struct edge *
sort_edges(struct edge * list,unsigned int level,struct edge ** head_out)710 sort_edges(struct edge  *list,
711 	   unsigned int  level,
712 	   struct edge **head_out)
713 {
714 	struct edge *head_other, *remaining;
715 	unsigned int i;
716 
717 	head_other = list->next;
718 	if (head_other == NULL) {
719 		*head_out = list;
720 		return NULL;
721 	}
722 
723 	remaining = head_other->next;
724 	if (list->cell <= head_other->cell) {
725 		*head_out = list;
726 		head_other->next = NULL;
727 	} else {
728 		*head_out = head_other;
729 		head_other->prev = list->prev;
730 		head_other->next = list;
731 		list->prev = head_other;
732 		list->next = NULL;
733 	}
734 
735 	for (i = 0; i < level && remaining; i++) {
736 		remaining = sort_edges(remaining, i, &head_other);
737 		*head_out = merge_sorted_edges(*head_out, head_other);
738 	}
739 
740 	return remaining;
741 }
742 
filter(struct edge * edges)743 static struct edge *filter(struct edge *edges)
744 {
745 	struct edge *e;
746 
747 	e = edges;
748 	while (e->next) {
749 		struct edge *n = e->next;
750 		if (e->dir == -n->dir &&
751 		    e->height_left == n->height_left &&
752 		    &e->x.quo == &n->x.quo &&
753 		    &e->x.rem == &n->x.rem &&
754 		    &e->dxdy.quo == &n->dxdy.quo &&
755 		    &e->dxdy.rem == &n->dxdy.rem) {
756 			if (e->prev)
757 				e->prev->next = n->next;
758 			else
759 				edges = n->next;
760 			if (n->next)
761 				n->next->prev = e->prev;
762 			else
763 				break;
764 			e = n->next;
765 		} else
766 			e = n;
767 	}
768 
769 	return edges;
770 }
771 
772 static struct edge *
merge_unsorted_edges(struct edge * head,struct edge * unsorted)773 merge_unsorted_edges(struct edge *head, struct edge *unsorted)
774 {
775 	sort_edges(unsorted, UINT_MAX, &unsorted);
776 	return merge_sorted_edges(head, filter(unsorted));
777 }
778 
779 /* Test if the edges on the active list can be safely advanced by a
780  * full row without intersections or any edges ending. */
781 inline static int
can_full_step(struct active_list * active)782 can_full_step(struct active_list *active)
783 {
784 	const struct edge *e;
785 	int min_height = INT_MAX;
786 
787 	assert(active->head.next != &active->tail);
788 	for (e = active->head.next; &active->tail != e; e = e->next) {
789 		assert(e->height_left > 0);
790 
791 		if (e->dy != 0)
792 			return 0;
793 
794 		if (e->height_left < min_height) {
795 			min_height = e->height_left;
796 			if (min_height < FAST_SAMPLES_Y)
797 				return 0;
798 		}
799 	}
800 
801 	return min_height;
802 }
803 
804 inline static void
merge_edges(struct active_list * active,struct edge * edges)805 merge_edges(struct active_list *active, struct edge *edges)
806 {
807 	active->head.next = merge_unsorted_edges(active->head.next, edges);
808 }
809 
810 inline static int
fill_buckets(struct active_list * active,struct edge * edge,struct edge ** buckets)811 fill_buckets(struct active_list *active,
812 	     struct edge *edge,
813 	     struct edge **buckets)
814 {
815 	int ymax = 0;
816 
817 	while (edge) {
818 		int y = edge->ytop & (FAST_SAMPLES_Y-1);
819 		struct edge *next = edge->next;
820 		struct edge **b = &buckets[y];
821 		__DBG(("%s: ytop %d -> bucket %d\n", __FUNCTION__,
822 		       edge->ytop, y));
823 		if (*b)
824 			(*b)->prev = edge;
825 		edge->next = *b;
826 		edge->prev = NULL;
827 		*b = edge;
828 		edge = next;
829 		if (y > ymax)
830 			ymax = y;
831 	}
832 
833 	return ymax;
834 }
835 
836 inline static void
nonzero_subrow(struct active_list * active,struct cell_list * coverages)837 nonzero_subrow(struct active_list *active, struct cell_list *coverages)
838 {
839 	struct edge *edge = active->head.next;
840 	int prev_x = INT_MIN;
841 	int winding = 0, xstart = edge->cell;
842 
843 	cell_list_rewind(coverages);
844 
845 	while (&active->tail != edge) {
846 		struct edge *next = edge->next;
847 
848 		winding += edge->dir;
849 		if (0 == winding && edge->next->cell != edge->cell) {
850 			cell_list_add_subspan(coverages, xstart, edge->cell);
851 			xstart = edge->next->cell;
852 		}
853 
854 		assert(edge->height_left > 0);
855 		if (--edge->height_left) {
856 			if (edge->dy) {
857 				edge->x.quo += edge->dxdy.quo;
858 				edge->x.rem += edge->dxdy.rem;
859 				if (edge->x.rem < 0) {
860 					--edge->x.quo;
861 					edge->x.rem += edge->dy;
862 				} else if (edge->x.rem >= edge->dy) {
863 					++edge->x.quo;
864 					edge->x.rem -= edge->dy;
865 				}
866 				edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
867 			}
868 
869 			if (edge->cell < prev_x) {
870 				struct edge *pos = edge->prev;
871 				pos->next = next;
872 				next->prev = pos;
873 				do
874 					pos = pos->prev;
875 				while (edge->cell < pos->cell);
876 				pos->next->prev = edge;
877 				edge->next = pos->next;
878 				edge->prev = pos;
879 				pos->next = edge;
880 			} else
881 				prev_x = edge->cell;
882 		} else {
883 			edge->prev->next = next;
884 			next->prev = edge->prev;
885 		}
886 
887 		edge = next;
888 	}
889 }
890 
891 static void
nonzero_row(struct active_list * active,struct cell_list * coverages)892 nonzero_row(struct active_list *active, struct cell_list *coverages)
893 {
894 	struct edge *left = active->head.next;
895 
896 	while (&active->tail != left) {
897 		struct edge *right;
898 		int winding = left->dir;
899 
900 		left->height_left -= FAST_SAMPLES_Y;
901 		assert(left->height_left >= 0);
902 		if (!left->height_left) {
903 			left->prev->next = left->next;
904 			left->next->prev = left->prev;
905 		}
906 
907 		right = left->next;
908 		do {
909 			right->height_left -= FAST_SAMPLES_Y;
910 			assert(right->height_left >= 0);
911 			if (!right->height_left) {
912 				right->prev->next = right->next;
913 				right->next->prev = right->prev;
914 			}
915 
916 			winding += right->dir;
917 			if (0 == winding)
918 				break;
919 
920 			right = right->next;
921 		} while (1);
922 
923 		cell_list_add_span(coverages, left->cell, right->cell);
924 		left = right->next;
925 	}
926 }
927 
928 static void
tor_fini(struct tor * converter)929 tor_fini(struct tor *converter)
930 {
931 	polygon_fini(converter->polygon);
932 	cell_list_fini(converter->coverages);
933 }
934 
935 static bool
tor_init(struct tor * converter,const BoxRec * box,int num_edges)936 tor_init(struct tor *converter, const BoxRec *box, int num_edges)
937 {
938 	__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
939 	       __FUNCTION__,
940 	       box->x1, box->y1, box->x2, box->y2,
941 	       FAST_SAMPLES_X, FAST_SAMPLES_Y,
942 	       num_edges));
943 
944 	converter->extents = *box;
945 
946 	if (!cell_list_init(converter->coverages, box->x1, box->x2))
947 		return false;
948 
949 	active_list_reset(converter->active);
950 	if (!polygon_init(converter->polygon, num_edges,
951 			  (int)box->y1 * FAST_SAMPLES_Y,
952 			  (int)box->y2 * FAST_SAMPLES_Y)) {
953 		cell_list_fini(converter->coverages);
954 		return false;
955 	}
956 
957 	return true;
958 }
959 
960 static void
tor_add_trapezoid(struct tor * tor,const xTrapezoid * t,int dx,int dy)961 tor_add_trapezoid(struct tor *tor,
962 		  const xTrapezoid *t,
963 		  int dx, int dy)
964 {
965 	if (!xTrapezoidValid(t)) {
966 		__DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n",
967 		       __FUNCTION__,
968 		       t->top, t->bottom,
969 		       t->left.p1.x, t->left.p1.y,
970 		       t->left.p2.x, t->left.p2.y,
971 		       t->right.p1.x, t->right.p1.y,
972 		       t->right.p2.x, t->right.p2.y));
973 		return;
974 	}
975 	polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy);
976 	polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy);
977 }
978 
979 static void
step_edges(struct active_list * active,int count)980 step_edges(struct active_list *active, int count)
981 {
982 	struct edge *edge;
983 
984 	count *= FAST_SAMPLES_Y;
985 	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
986 		edge->height_left -= count;
987 		assert(edge->height_left >= 0);
988 		if (!edge->height_left) {
989 			edge->prev->next = edge->next;
990 			edge->next->prev = edge->prev;
991 		}
992 	}
993 }
994 
995 static void
tor_blt_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)996 tor_blt_span(struct sna *sna,
997 	     struct sna_composite_spans_op *op,
998 	     pixman_region16_t *clip,
999 	     const BoxRec *box,
1000 	     int coverage)
1001 {
1002 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1003 
1004 	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1005 	apply_damage_box(&op->base, box);
1006 }
1007 
1008 static void
tor_blt_span__no_damage(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1009 tor_blt_span__no_damage(struct sna *sna,
1010 			struct sna_composite_spans_op *op,
1011 			pixman_region16_t *clip,
1012 			const BoxRec *box,
1013 			int coverage)
1014 {
1015 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1016 
1017 	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
1018 }
1019 
1020 static void
tor_blt_span_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1021 tor_blt_span_clipped(struct sna *sna,
1022 		     struct sna_composite_spans_op *op,
1023 		     pixman_region16_t *clip,
1024 		     const BoxRec *box,
1025 		     int coverage)
1026 {
1027 	pixman_region16_t region;
1028 	float opacity;
1029 
1030 	opacity = AREA_TO_ALPHA(coverage);
1031 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1032 
1033 	pixman_region_init_rects(&region, box, 1);
1034 	RegionIntersect(&region, &region, clip);
1035 	if (region_num_rects(&region)) {
1036 		op->boxes(sna, op,
1037 			  region_rects(&region),
1038 			  region_num_rects(&region),
1039 			  opacity);
1040 		apply_damage(&op->base, &region);
1041 	}
1042 	pixman_region_fini(&region);
1043 }
1044 
1045 static void
tor_blt_span_mono(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1046 tor_blt_span_mono(struct sna *sna,
1047 		  struct sna_composite_spans_op *op,
1048 		  pixman_region16_t *clip,
1049 		  const BoxRec *box,
1050 		  int coverage)
1051 {
1052 	if (coverage < FAST_SAMPLES_XY/2)
1053 		return;
1054 
1055 	tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY);
1056 }
1057 
1058 static void
tor_blt_span_mono_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1059 tor_blt_span_mono_clipped(struct sna *sna,
1060 			  struct sna_composite_spans_op *op,
1061 			  pixman_region16_t *clip,
1062 			  const BoxRec *box,
1063 			  int coverage)
1064 {
1065 	if (coverage < FAST_SAMPLES_XY/2)
1066 		return;
1067 
1068 	tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
1069 }
1070 
1071 static void
tor_blt_span_mono_unbounded(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1072 tor_blt_span_mono_unbounded(struct sna *sna,
1073 			    struct sna_composite_spans_op *op,
1074 			    pixman_region16_t *clip,
1075 			    const BoxRec *box,
1076 			    int coverage)
1077 {
1078 	tor_blt_span(sna, op, clip, box,
1079 		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1080 }
1081 
1082 static void
tor_blt_span_mono_unbounded_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1083 tor_blt_span_mono_unbounded_clipped(struct sna *sna,
1084 				    struct sna_composite_spans_op *op,
1085 				    pixman_region16_t *clip,
1086 				    const BoxRec *box,
1087 				    int coverage)
1088 {
1089 	tor_blt_span_clipped(sna, op, clip, box,
1090 			     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
1091 }
1092 
1093 static void
tor_blt(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int y,int height,int unbounded)1094 tor_blt(struct sna *sna,
1095 	struct tor *converter,
1096 	struct sna_composite_spans_op *op,
1097 	pixman_region16_t *clip,
1098 	void (*span)(struct sna *sna,
1099 		     struct sna_composite_spans_op *op,
1100 		     pixman_region16_t *clip,
1101 		     const BoxRec *box,
1102 		     int coverage),
1103 	int y, int height,
1104 	int unbounded)
1105 {
1106 	struct cell_list *cells = converter->coverages;
1107 	struct cell *cell;
1108 	BoxRec box;
1109 	int cover;
1110 
1111 	box.y1 = y + converter->extents.y1;
1112 	box.y2 = box.y1 + height;
1113 	assert(box.y2 <= converter->extents.y2);
1114 	box.x1 = converter->extents.x1;
1115 
1116 	/* Form the spans from the coverages and areas. */
1117 	cover = cells->head.covered_height*FAST_SAMPLES_X;
1118 	assert(cover >= 0);
1119 	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1120 		int x = cell->x;
1121 
1122 		assert(x >= converter->extents.x1);
1123 		assert(x < converter->extents.x2);
1124 		__DBG(("%s: cell=(%d, %d, %d), cover=%d\n", __FUNCTION__,
1125 		       cell->x, cell->covered_height, cell->uncovered_area,
1126 		       cover));
1127 
1128 		if (cell->covered_height || cell->uncovered_area) {
1129 			box.x2 = x;
1130 			if (box.x2 > box.x1 && (unbounded || cover)) {
1131 				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1132 				       box.x1, box.y1,
1133 				       box.x2 - box.x1,
1134 				       box.y2 - box.y1,
1135 				       cover));
1136 				span(sna, op, clip, &box, cover);
1137 			}
1138 			box.x1 = box.x2;
1139 			cover += cell->covered_height*FAST_SAMPLES_X;
1140 		}
1141 
1142 		if (cell->uncovered_area) {
1143 			int area = cover - cell->uncovered_area;
1144 			box.x2 = x + 1;
1145 			if (unbounded || area) {
1146 				__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1147 				       box.x1, box.y1,
1148 				       box.x2 - box.x1,
1149 				       box.y2 - box.y1,
1150 				       area));
1151 				span(sna, op, clip, &box, area);
1152 			}
1153 			box.x1 = box.x2;
1154 		}
1155 	}
1156 
1157 	box.x2 = converter->extents.x2;
1158 	if (box.x2 > box.x1 && (unbounded || cover)) {
1159 		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1160 		       box.x1, box.y1,
1161 		       box.x2 - box.x1,
1162 		       box.y2 - box.y1,
1163 		       cover));
1164 		span(sna, op, clip, &box, cover);
1165 	}
1166 }
1167 
1168 flatten static void
tor_render(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int unbounded)1169 tor_render(struct sna *sna,
1170 	   struct tor *converter,
1171 	   struct sna_composite_spans_op *op,
1172 	   pixman_region16_t *clip,
1173 	   void (*span)(struct sna *sna,
1174 			struct sna_composite_spans_op *op,
1175 			pixman_region16_t *clip,
1176 			const BoxRec *box,
1177 			int coverage),
1178 	   int unbounded)
1179 {
1180 	struct polygon *polygon = converter->polygon;
1181 	struct cell_list *coverages = converter->coverages;
1182 	struct active_list *active = converter->active;
1183 	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1184 	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
1185 
1186 	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1187 
1188 	/* Render each pixel row. */
1189 	for (i = 0; i < h; i = j) {
1190 		int do_full_step = 0;
1191 
1192 		j = i + 1;
1193 
1194 		/* Determine if we can ignore this row or use the full pixel
1195 		 * stepper. */
1196 		if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1197 			if (buckets[0]) {
1198 				merge_edges(active, buckets[0]);
1199 				buckets[0] = NULL;
1200 			}
1201 			if (active->head.next == &active->tail) {
1202 				for (; polygon->y_buckets[j] == NULL; j++)
1203 					;
1204 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1205 				       __FUNCTION__, i, j));
1206 
1207 				assert(j <= h);
1208 				if (unbounded) {
1209 					BoxRec box;
1210 
1211 					box = converter->extents;
1212 					box.y1 += i;
1213 					box.y2 = converter->extents.y1 + j;
1214 
1215 					span(sna, op, clip, &box, 0);
1216 				}
1217 				continue;
1218 			}
1219 
1220 			do_full_step = can_full_step(active);
1221 		}
1222 
1223 		__DBG(("%s: y=%d-%d, do_full_step=%d, new edges=%d\n",
1224 		       __FUNCTION__,
1225 		       i, j, do_full_step,
1226 		       polygon->y_buckets[i] != NULL));
1227 		if (do_full_step) {
1228 			nonzero_row(active, coverages);
1229 
1230 			while (polygon->y_buckets[j] == NULL &&
1231 			       do_full_step >= 2*FAST_SAMPLES_Y) {
1232 				do_full_step -= FAST_SAMPLES_Y;
1233 				j++;
1234 			}
1235 			assert(j >= i + 1 && j <= h);
1236 			if (j != i + 1)
1237 				step_edges(active, j - (i + 1));
1238 
1239 			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1240 			       __FUNCTION__,  i, j));
1241 		} else {
1242 			int suby;
1243 
1244 			/* Subsample this row. */
1245 			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1246 				if (buckets[suby]) {
1247 					merge_edges(active, buckets[suby]);
1248 					buckets[suby] = NULL;
1249 				}
1250 
1251 				nonzero_subrow(active, coverages);
1252 			}
1253 		}
1254 
1255 		assert(j > i);
1256 		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
1257 		cell_list_reset(coverages);
1258 	}
1259 }
1260 
1261 static void
inplace_row(struct active_list * active,uint8_t * row,int width)1262 inplace_row(struct active_list *active, uint8_t *row, int width)
1263 {
1264 	struct edge *left = active->head.next;
1265 
1266 	while (&active->tail != left) {
1267 		struct edge *right;
1268 		int winding = left->dir;
1269 		int lfx, rfx;
1270 		int lix, rix;
1271 
1272 		left->height_left -= FAST_SAMPLES_Y;
1273 		assert(left->height_left >= 0);
1274 		if (!left->height_left) {
1275 			left->prev->next = left->next;
1276 			left->next->prev = left->prev;
1277 		}
1278 
1279 		right = left->next;
1280 		do {
1281 			right->height_left -= FAST_SAMPLES_Y;
1282 			assert(right->height_left >= 0);
1283 			if (!right->height_left) {
1284 				right->prev->next = right->next;
1285 				right->next->prev = right->prev;
1286 			}
1287 
1288 			winding += right->dir;
1289 			if (0 == winding && right->cell != right->next->cell)
1290 				break;
1291 
1292 			right = right->next;
1293 		} while (1);
1294 
1295 		if (left->cell < 0) {
1296 			lix = lfx = 0;
1297 		} else if (left->cell >= width * FAST_SAMPLES_X) {
1298 			lix = width;
1299 			lfx = 0;
1300 		} else
1301 			FAST_SAMPLES_X_TO_INT_FRAC(left->cell, lix, lfx);
1302 
1303 		if (right->cell < 0) {
1304 			rix = rfx = 0;
1305 		} else if (right->cell >= width * FAST_SAMPLES_X) {
1306 			rix = width;
1307 			rfx = 0;
1308 		} else
1309 			FAST_SAMPLES_X_TO_INT_FRAC(right->cell, rix, rfx);
1310 		if (lix == rix) {
1311 			if (rfx != lfx) {
1312 				assert(lix < width);
1313 				row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
1314 			}
1315 		} else {
1316 			assert(lix < width);
1317 			if (lfx == 0)
1318 				row[lix] = 0xff;
1319 			else
1320 				row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
1321 
1322 			assert(rix <= width);
1323 			if (rfx) {
1324 				assert(rix < width);
1325 				row[rix] += rfx * 256 / FAST_SAMPLES_X;
1326 			}
1327 
1328 			if (rix > ++lix) {
1329 				uint8_t *r = row + lix;
1330 				rix -= lix;
1331 #if 0
1332 				if (rix == 1)
1333 					*row = 0xff;
1334 				else
1335 					memset(row, 0xff, rix);
1336 #else
1337 				if ((uintptr_t)r & 1 && rix) {
1338 					*r++ = 0xff;
1339 					rix--;
1340 				}
1341 				if ((uintptr_t)r & 2 && rix >= 2) {
1342 					*(uint16_t *)r = 0xffff;
1343 					r += 2;
1344 					rix -= 2;
1345 				}
1346 				if ((uintptr_t)r & 4 && rix >= 4) {
1347 					*(uint32_t *)r = 0xffffffff;
1348 					r += 4;
1349 					rix -= 4;
1350 				}
1351 				while (rix >= 8) {
1352 					*(uint64_t *)r = 0xffffffffffffffff;
1353 					r += 8;
1354 					rix -= 8;
1355 				}
1356 				if (rix & 4) {
1357 					*(uint32_t *)r = 0xffffffff;
1358 					r += 4;
1359 				}
1360 				if (rix & 2) {
1361 					*(uint16_t *)r = 0xffff;
1362 					r += 2;
1363 				}
1364 				if (rix & 1)
1365 					*r = 0xff;
1366 #endif
1367 			}
1368 		}
1369 
1370 		left = right->next;
1371 	}
1372 }
1373 
1374 inline static void
inplace_subrow(struct active_list * active,int8_t * row,int width,int * min,int * max)1375 inplace_subrow(struct active_list *active, int8_t *row,
1376 	       int width, int *min, int *max)
1377 {
1378 	struct edge *edge = active->head.next;
1379 	int prev_x = INT_MIN;
1380 	int winding = 0, xstart = INT_MIN;
1381 
1382 	while (&active->tail != edge) {
1383 		struct edge *next = edge->next;
1384 
1385 		winding += edge->dir;
1386 		if (0 == winding) {
1387 			if (edge->next->cell != edge->cell) {
1388 				if (edge->cell <= xstart) {
1389 					xstart = INT_MIN;
1390 				} else  {
1391 					int fx;
1392 					int ix;
1393 
1394 					if (xstart < FAST_SAMPLES_X * width) {
1395 						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1396 						if (ix < *min)
1397 							*min = ix;
1398 
1399 						row[ix++] += FAST_SAMPLES_X - fx;
1400 						if (fx && ix < width)
1401 							row[ix] += fx;
1402 					}
1403 
1404 					xstart = edge->cell;
1405 					if (xstart < FAST_SAMPLES_X * width) {
1406 						FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
1407 						row[ix] -= FAST_SAMPLES_X - fx;
1408 						if (fx && ix + 1 < width)
1409 							row[++ix] -= fx;
1410 
1411 						if (ix >= *max)
1412 							*max = ix + 1;
1413 
1414 						xstart = INT_MIN;
1415 					} else
1416 						*max = width;
1417 				}
1418 			}
1419 		} else if (xstart < 0) {
1420 			xstart = MAX(edge->cell, 0);
1421 		}
1422 
1423 		assert(edge->height_left > 0);
1424 		if (--edge->height_left) {
1425 			if (edge->dy) {
1426 				edge->x.quo += edge->dxdy.quo;
1427 				edge->x.rem += edge->dxdy.rem;
1428 				if (edge->x.rem < 0) {
1429 					--edge->x.quo;
1430 					edge->x.rem += edge->dy;
1431 				} else if (edge->x.rem >= 0) {
1432 					++edge->x.quo;
1433 					edge->x.rem -= edge->dy;
1434 				}
1435 				edge->cell = edge->x.quo + (edge->x.rem >= edge->dy/2);
1436 			}
1437 
1438 			if (edge->cell < prev_x) {
1439 				struct edge *pos = edge->prev;
1440 				pos->next = next;
1441 				next->prev = pos;
1442 				do
1443 					pos = pos->prev;
1444 				while (edge->cell < pos->cell);
1445 				pos->next->prev = edge;
1446 				edge->next = pos->next;
1447 				edge->prev = pos;
1448 				pos->next = edge;
1449 			} else
1450 				prev_x = edge->cell;
1451 		} else {
1452 			edge->prev->next = next;
1453 			next->prev = edge->prev;
1454 		}
1455 
1456 		edge = next;
1457 	}
1458 }
1459 
1460 inline static void
inplace_end_subrows(struct active_list * active,uint8_t * row,int8_t * buf,int width)1461 inplace_end_subrows(struct active_list *active, uint8_t *row,
1462 		    int8_t *buf, int width)
1463 {
1464 	int cover = 0;
1465 
1466 	while (width >= 4) {
1467 		uint32_t dw;
1468 		int v;
1469 
1470 		dw = *(uint32_t *)buf;
1471 		buf += 4;
1472 
1473 		if (dw == 0) {
1474 			v = cover * 256 / FAST_SAMPLES_XY;
1475 			v -= v >> 8;
1476 			v |= v << 8;
1477 			dw = v | v << 16;
1478 		} else {
1479 			cover += (int8_t)(dw & 0xff);
1480 			if (cover) {
1481 				assert(cover > 0);
1482 				v = cover * 256 / FAST_SAMPLES_XY;
1483 				v -= v >> 8;
1484 				dw >>= 8;
1485 				dw |= v << 24;
1486 			} else
1487 				dw >>= 8;
1488 
1489 			cover += (int8_t)(dw & 0xff);
1490 			if (cover) {
1491 				assert(cover > 0);
1492 				v = cover * 256 / FAST_SAMPLES_XY;
1493 				v -= v >> 8;
1494 				dw >>= 8;
1495 				dw |= v << 24;
1496 			} else
1497 				dw >>= 8;
1498 
1499 			cover += (int8_t)(dw & 0xff);
1500 			if (cover) {
1501 				assert(cover > 0);
1502 				v = cover * 256 / FAST_SAMPLES_XY;
1503 				v -= v >> 8;
1504 				dw >>= 8;
1505 				dw |= v << 24;
1506 			} else
1507 				dw >>= 8;
1508 
1509 			cover += (int8_t)(dw & 0xff);
1510 			if (cover) {
1511 				assert(cover > 0);
1512 				v = cover * 256 / FAST_SAMPLES_XY;
1513 				v -= v >> 8;
1514 				dw >>= 8;
1515 				dw |= v << 24;
1516 			} else
1517 				dw >>= 8;
1518 		}
1519 
1520 		*(uint32_t *)row = dw;
1521 		row += 4;
1522 		width -= 4;
1523 	}
1524 
1525 	while (width--) {
1526 		int v;
1527 
1528 		cover += *buf++;
1529 		assert(cover >= 0);
1530 
1531 		v = cover * 256 / FAST_SAMPLES_XY;
1532 		v -= v >> 8;
1533 		*row++ = v;
1534 	}
1535 }
1536 
1537 static void
convert_mono(uint8_t * ptr,int w)1538 convert_mono(uint8_t *ptr, int w)
1539 {
1540 	while (w--) {
1541 		*ptr = 0xff * (*ptr >= 0xf0);
1542 		ptr++;
1543 	}
1544 }
1545 
1546 static void
tor_inplace(struct tor * converter,PixmapPtr scratch,int mono,uint8_t * buf)1547 tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
1548 {
1549 	int i, j, h = converter->extents.y2;
1550 	struct polygon *polygon = converter->polygon;
1551 	struct active_list *active = converter->active;
1552 	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
1553 	uint8_t *row = scratch->devPrivate.ptr;
1554 	int stride = scratch->devKind;
1555 	int width = scratch->drawable.width;
1556 
1557 	__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
1558 	assert(converter->extents.y1 == 0);
1559 	assert(converter->extents.x1 == 0);
1560 	assert(scratch->drawable.depth == 8);
1561 
1562 	/* Render each pixel row. */
1563 	for (i = 0; i < h; i = j) {
1564 		int do_full_step = 0;
1565 		void *ptr = buf ?: row;
1566 
1567 		j = i + 1;
1568 
1569 		/* Determine if we can ignore this row or use the full pixel
1570 		 * stepper. */
1571 		if (fill_buckets(active, polygon->y_buckets[i], buckets) == 0) {
1572 			if (buckets[0]) {
1573 				merge_edges(active, buckets[0]);
1574 				buckets[0] = NULL;
1575 			}
1576 			if (active->head.next == &active->tail) {
1577 				for (; !polygon->y_buckets[j]; j++)
1578 					;
1579 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1580 				       __FUNCTION__, i, j));
1581 
1582 				memset(row, 0, stride*(j-i));
1583 				row += stride*(j-i);
1584 				continue;
1585 			}
1586 
1587 			do_full_step = can_full_step(active);
1588 		}
1589 
1590 		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
1591 		       __FUNCTION__,
1592 		       i, do_full_step,
1593 		       polygon->y_buckets[i] != NULL));
1594 		if (do_full_step) {
1595 			memset(ptr, 0, width);
1596 			inplace_row(active, ptr, width);
1597 			if (mono)
1598 				convert_mono(ptr, width);
1599 			if (row != ptr)
1600 				memcpy(row, ptr, width);
1601 
1602 			while (polygon->y_buckets[j] == NULL &&
1603 			       do_full_step >= 2*FAST_SAMPLES_Y) {
1604 				do_full_step -= FAST_SAMPLES_Y;
1605 				row += stride;
1606 				memcpy(row, ptr, width);
1607 				j++;
1608 			}
1609 			if (j != i + 1)
1610 				step_edges(active, j - (i + 1));
1611 
1612 			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1613 			       __FUNCTION__,  i, j));
1614 		} else {
1615 			int min = width, max = 0, suby;
1616 
1617 			/* Subsample this row. */
1618 			memset(ptr, 0, width);
1619 			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
1620 				if (buckets[suby]) {
1621 					merge_edges(active, buckets[suby]);
1622 					buckets[suby] = NULL;
1623 				}
1624 
1625 				inplace_subrow(active, ptr, width, &min, &max);
1626 			}
1627 			assert(min >= 0 && max <= width);
1628 			memset(row, 0, min);
1629 			if (max > min) {
1630 				inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
1631 				if (mono)
1632 					convert_mono(row+min, max-min);
1633 			}
1634 			if (max < width)
1635 				memset(row+max, 0, width-max);
1636 		}
1637 
1638 		row += stride;
1639 	}
1640 }
1641 
operator_is_bounded(uint8_t op)1642 static int operator_is_bounded(uint8_t op)
1643 {
1644 	switch (op) {
1645 	case PictOpOver:
1646 	case PictOpOutReverse:
1647 	case PictOpAdd:
1648 		return true;
1649 	default:
1650 		return false;
1651 	}
1652 }
1653 
1654 static span_func_t
choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1655 choose_span(struct sna_composite_spans_op *tmp,
1656 	    PicturePtr dst,
1657 	    PictFormatPtr maskFormat,
1658 	    RegionPtr clip)
1659 {
1660 	span_func_t span;
1661 
1662 	if (is_mono(dst, maskFormat)) {
1663 		/* XXX An imprecise approximation */
1664 		if (maskFormat && !operator_is_bounded(tmp->base.op)) {
1665 			span = tor_blt_span_mono_unbounded;
1666 			if (clip->data)
1667 				span = tor_blt_span_mono_unbounded_clipped;
1668 		} else {
1669 			span = tor_blt_span_mono;
1670 			if (clip->data)
1671 				span = tor_blt_span_mono_clipped;
1672 		}
1673 	} else {
1674 		if (clip->data)
1675 			span = tor_blt_span_clipped;
1676 		else if (tmp->base.damage == NULL)
1677 			span = tor_blt_span__no_damage;
1678 		else
1679 			span = tor_blt_span;
1680 	}
1681 
1682 	return span;
1683 }
1684 
1685 struct span_thread {
1686 	struct sna *sna;
1687 	const struct sna_composite_spans_op *op;
1688 	const xTrapezoid *traps;
1689 	RegionPtr clip;
1690 	span_func_t span;
1691 	BoxRec extents;
1692 	int dx, dy, draw_y;
1693 	int ntrap;
1694 	bool unbounded;
1695 };
1696 
1697 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
1698 struct span_thread_boxes {
1699 	const struct sna_composite_spans_op *op;
1700 	const BoxRec *clip_start, *clip_end;
1701 	int num_boxes;
1702 	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
1703 };
1704 
span_thread_add_box(struct sna * sna,void * data,const BoxRec * box,float alpha)1705 static void span_thread_add_box(struct sna *sna, void *data,
1706 				const BoxRec *box, float alpha)
1707 {
1708 	struct span_thread_boxes *b = data;
1709 
1710 	__DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha));
1711 
1712 	if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
1713 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
1714 		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
1715 		b->num_boxes = 0;
1716 	}
1717 
1718 	b->boxes[b->num_boxes].box = *box++;
1719 	b->boxes[b->num_boxes].alpha = alpha;
1720 	b->num_boxes++;
1721 	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1722 }
1723 
1724 static void
span_thread_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1725 span_thread_box(struct sna *sna,
1726 		struct sna_composite_spans_op *op,
1727 		pixman_region16_t *clip,
1728 		const BoxRec *box,
1729 		int coverage)
1730 {
1731 	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1732 
1733 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1734 	if (b->num_boxes) {
1735 		struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
1736 		if (bb->box.x1 == box->x1 &&
1737 		    bb->box.x2 == box->x2 &&
1738 		    bb->box.y2 == box->y1 &&
1739 		    bb->alpha == AREA_TO_ALPHA(coverage)) {
1740 			bb->box.y2 = box->y2;
1741 			__DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
1742 			return;
1743 		}
1744 	}
1745 
1746 	span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage));
1747 }
1748 
1749 static void
span_thread_clipped_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1750 span_thread_clipped_box(struct sna *sna,
1751 			struct sna_composite_spans_op *op,
1752 			pixman_region16_t *clip,
1753 			const BoxRec *box,
1754 			int coverage)
1755 {
1756 	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1757 	const BoxRec *c;
1758 
1759 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
1760 	       AREA_TO_ALPHA(coverage)));
1761 
1762 	b->clip_start =
1763 		find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
1764 
1765 	c = b->clip_start;
1766 	while (c != b->clip_end) {
1767 		BoxRec clipped;
1768 
1769 		if (box->y2 <= c->y1)
1770 			break;
1771 
1772 		clipped = *box;
1773 		if (!box_intersect(&clipped, c++))
1774 			continue;
1775 
1776 		span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage));
1777 	}
1778 }
1779 
1780 static span_func_t
thread_choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1781 thread_choose_span(struct sna_composite_spans_op *tmp,
1782 		   PicturePtr dst,
1783 		   PictFormatPtr maskFormat,
1784 		   RegionPtr clip)
1785 {
1786 	span_func_t span;
1787 
1788 	if (tmp->base.damage) {
1789 		DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
1790 		return NULL;
1791 	}
1792 
1793 	if (is_mono(dst, maskFormat)) {
1794 		DBG(("%s: mono rendering -> no thread support\n", __FUNCTION__));
1795 		return NULL;
1796 	} else {
1797 		assert(tmp->thread_boxes);
1798 		DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
1799 		if (clip->data)
1800 			span = span_thread_clipped_box;
1801 		else
1802 			span = span_thread_box;
1803 	}
1804 
1805 	return span;
1806 }
1807 
1808 inline static void
span_thread_boxes_init(struct span_thread_boxes * boxes,const struct sna_composite_spans_op * op,const RegionRec * clip)1809 span_thread_boxes_init(struct span_thread_boxes *boxes,
1810 		       const struct sna_composite_spans_op *op,
1811 		       const RegionRec *clip)
1812 {
1813 	boxes->op = op;
1814 	region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end);
1815 	boxes->num_boxes = 0;
1816 }
1817 
1818 static void
span_thread(void * arg)1819 span_thread(void *arg)
1820 {
1821 	struct span_thread *thread = arg;
1822 	struct span_thread_boxes boxes;
1823 	struct tor tor;
1824 	const xTrapezoid *t;
1825 	int n, y1, y2;
1826 
1827 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1828 		return;
1829 
1830 	span_thread_boxes_init(&boxes, thread->op, thread->clip);
1831 
1832 	y1 = thread->extents.y1 - thread->draw_y;
1833 	y2 = thread->extents.y2 - thread->draw_y;
1834 	for (n = thread->ntrap, t = thread->traps; n--; t++) {
1835 		if (pixman_fixed_integer_floor(t->top) >= y2 ||
1836 		    pixman_fixed_integer_ceil(t->bottom) <= y1)
1837 			continue;
1838 
1839 		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1840 	}
1841 
1842 	tor_render(thread->sna, &tor,
1843 		   (struct sna_composite_spans_op *)&boxes, thread->clip,
1844 		   thread->span, thread->unbounded);
1845 
1846 	tor_fini(&tor);
1847 
1848 	if (boxes.num_boxes) {
1849 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
1850 		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
1851 		thread->op->thread_boxes(thread->sna, thread->op,
1852 					 boxes.boxes, boxes.num_boxes);
1853 	}
1854 }
1855 
1856 bool
imprecise_trapezoid_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned int flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)1857 imprecise_trapezoid_span_converter(struct sna *sna,
1858 				   CARD8 op, PicturePtr src, PicturePtr dst,
1859 				   PictFormatPtr maskFormat, unsigned int flags,
1860 				   INT16 src_x, INT16 src_y,
1861 				   int ntrap, xTrapezoid *traps)
1862 {
1863 	struct sna_composite_spans_op tmp;
1864 	pixman_region16_t clip;
1865 	int16_t dst_x, dst_y;
1866 	bool was_clear;
1867 	int dx, dy, n;
1868 	int num_threads;
1869 
1870 	if (NO_IMPRECISE)
1871 		return false;
1872 
1873 	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
1874 		DBG(("%s: fallback -- composite spans not supported\n",
1875 		     __FUNCTION__));
1876 		return false;
1877 	}
1878 
1879 	if (!trapezoids_bounds(ntrap, traps, &clip.extents))
1880 		return true;
1881 
1882 #if 0
1883 	if (clip.extents.y2 - clip.extents.y1 < 64 &&
1884 	    clip.extents.x2 - clip.extents.x1 < 64) {
1885 		DBG(("%s: fallback -- traps extents too small %dx%d\n",
1886 		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
1887 		return false;
1888 	}
1889 #endif
1890 
1891 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
1892 	     __FUNCTION__,
1893 	     clip.extents.x1, clip.extents.y1,
1894 	     clip.extents.x2, clip.extents.y2));
1895 
1896 	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
1897 
1898 	if (!sna_compute_composite_region(&clip,
1899 					  src, NULL, dst,
1900 					  src_x + clip.extents.x1 - dst_x,
1901 					  src_y + clip.extents.y1 - dst_y,
1902 					  0, 0,
1903 					  clip.extents.x1, clip.extents.y1,
1904 					  clip.extents.x2 - clip.extents.x1,
1905 					  clip.extents.y2 - clip.extents.y1)) {
1906 		DBG(("%s: trapezoids do not intersect drawable clips\n",
1907 		     __FUNCTION__)) ;
1908 		return true;
1909 	}
1910 
1911 	if (!sna->render.check_composite_spans(sna, op, src, dst,
1912 					       clip.extents.x2 - clip.extents.x1,
1913 					       clip.extents.y2 - clip.extents.y1,
1914 					       flags)) {
1915 		DBG(("%s: fallback -- composite spans not supported\n",
1916 		     __FUNCTION__));
1917 		return false;
1918 	}
1919 
1920 	dx = dst->pDrawable->x;
1921 	dy = dst->pDrawable->y;
1922 
1923 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
1924 	     __FUNCTION__,
1925 	     clip.extents.x1, clip.extents.y1,
1926 	     clip.extents.x2, clip.extents.y2,
1927 	     dx, dy,
1928 	     src_x + clip.extents.x1 - dst_x - dx,
1929 	     src_y + clip.extents.y1 - dst_y - dy));
1930 
1931 	was_clear = sna_drawable_is_clear(dst->pDrawable);
1932 	switch (op) {
1933 	case PictOpAdd:
1934 	case PictOpOver:
1935 		if (was_clear)
1936 			op = PictOpSrc;
1937 		break;
1938 	case PictOpIn:
1939 		if (was_clear)
1940 			return true;
1941 		break;
1942 	}
1943 
1944 	if (!sna->render.composite_spans(sna, op, src, dst,
1945 					 src_x + clip.extents.x1 - dst_x - dx,
1946 					 src_y + clip.extents.y1 - dst_y - dy,
1947 					 clip.extents.x1,  clip.extents.y1,
1948 					 clip.extents.x2 - clip.extents.x1,
1949 					 clip.extents.y2 - clip.extents.y1,
1950 					 flags, memset(&tmp, 0, sizeof(tmp)))) {
1951 		DBG(("%s: fallback -- composite spans render op not supported\n",
1952 		     __FUNCTION__));
1953 		return false;
1954 	}
1955 
1956 	dx *= FAST_SAMPLES_X;
1957 	dy *= FAST_SAMPLES_Y;
1958 
1959 	num_threads = 1;
1960 	if (!NO_GPU_THREADS &&
1961 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
1962 	    tmp.thread_boxes &&
1963 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
1964 		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
1965 					      clip.extents.y2-clip.extents.y1,
1966 					      16);
1967 	DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
1968 	if (num_threads == 1) {
1969 		struct tor tor;
1970 
1971 		if (!tor_init(&tor, &clip.extents, 2*ntrap))
1972 			goto skip;
1973 
1974 		for (n = 0; n < ntrap; n++) {
1975 			if (pixman_fixed_integer_floor(traps[n].top) + dst->pDrawable->y >= clip.extents.y2 ||
1976 			    pixman_fixed_integer_ceil(traps[n].bottom) + dst->pDrawable->y <= clip.extents.y1)
1977 				continue;
1978 
1979 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
1980 		}
1981 
1982 		tor_render(sna, &tor, &tmp, &clip,
1983 			   choose_span(&tmp, dst, maskFormat, &clip),
1984 			   !was_clear && maskFormat && !operator_is_bounded(op));
1985 
1986 		tor_fini(&tor);
1987 	} else {
1988 		struct span_thread threads[num_threads];
1989 		int y, h;
1990 
1991 		DBG(("%s: using %d threads for span compositing %dx%d\n",
1992 		     __FUNCTION__, num_threads,
1993 		     clip.extents.x2 - clip.extents.x1,
1994 		     clip.extents.y2 - clip.extents.y1));
1995 
1996 		threads[0].sna = sna;
1997 		threads[0].op = &tmp;
1998 		threads[0].traps = traps;
1999 		threads[0].ntrap = ntrap;
2000 		threads[0].extents = clip.extents;
2001 		threads[0].clip = &clip;
2002 		threads[0].dx = dx;
2003 		threads[0].dy = dy;
2004 		threads[0].draw_y = dst->pDrawable->y;
2005 		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
2006 		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
2007 
2008 		y = clip.extents.y1;
2009 		h = clip.extents.y2 - clip.extents.y1;
2010 		h = (h + num_threads - 1) / num_threads;
2011 		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
2012 
2013 		for (n = 1; n < num_threads; n++) {
2014 			threads[n] = threads[0];
2015 			threads[n].extents.y1 = y;
2016 			threads[n].extents.y2 = y += h;
2017 
2018 			sna_threads_run(n, span_thread, &threads[n]);
2019 		}
2020 
2021 		assert(y < threads[0].extents.y2);
2022 		threads[0].extents.y1 = y;
2023 		span_thread(&threads[0]);
2024 
2025 		sna_threads_wait();
2026 	}
2027 skip:
2028 	tmp.done(sna, &tmp);
2029 
2030 	REGION_UNINIT(NULL, &clip);
2031 	return true;
2032 }
2033 
2034 static void
tor_blt_mask(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2035 tor_blt_mask(struct sna *sna,
2036 	     struct sna_composite_spans_op *op,
2037 	     pixman_region16_t *clip,
2038 	     const BoxRec *box,
2039 	     int coverage)
2040 {
2041 	uint8_t *ptr = (uint8_t *)op;
2042 	int stride = (intptr_t)clip;
2043 	int h, w;
2044 
2045 	coverage = 256 * coverage / FAST_SAMPLES_XY;
2046 	coverage -= coverage >> 8;
2047 
2048 	ptr += box->y1 * stride + box->x1;
2049 
2050 	h = box->y2 - box->y1;
2051 	w = box->x2 - box->x1;
2052 	if ((w | h) == 1) {
2053 		*ptr = coverage;
2054 	} else if (w == 1) {
2055 		do {
2056 			*ptr = coverage;
2057 			ptr += stride;
2058 		} while (--h);
2059 	} else do {
2060 		memset(ptr, coverage, w);
2061 		ptr += stride;
2062 	} while (--h);
2063 }
2064 
2065 static void
tor_blt_mask_mono(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2066 tor_blt_mask_mono(struct sna *sna,
2067 		  struct sna_composite_spans_op *op,
2068 		  pixman_region16_t *clip,
2069 		  const BoxRec *box,
2070 		  int coverage)
2071 {
2072 	tor_blt_mask(sna, op, clip, box,
2073 		     coverage < FAST_SAMPLES_XY/2 ? 0 : FAST_SAMPLES_XY);
2074 }
2075 
2076 bool
imprecise_trapezoid_mask_converter(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)2077 imprecise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
2078 				   PictFormatPtr maskFormat, unsigned flags,
2079 				   INT16 src_x, INT16 src_y,
2080 				   int ntrap, xTrapezoid *traps)
2081 {
2082 	struct tor tor;
2083 	ScreenPtr screen = dst->pDrawable->pScreen;
2084 	PixmapPtr scratch;
2085 	PicturePtr mask;
2086 	BoxRec extents;
2087 	int16_t dst_x, dst_y;
2088 	int dx, dy;
2089 	int error, n;
2090 
2091 	if (NO_IMPRECISE)
2092 		return false;
2093 
2094 	if (maskFormat == NULL && ntrap > 1) {
2095 		DBG(("%s: individual rasterisation requested\n",
2096 		     __FUNCTION__));
2097 		do {
2098 			/* XXX unwind errors? */
2099 			if (!imprecise_trapezoid_mask_converter(op, src, dst, NULL, flags,
2100 								src_x, src_y, 1, traps++))
2101 				return false;
2102 		} while (--ntrap);
2103 		return true;
2104 	}
2105 
2106 	if (!trapezoids_bounds(ntrap, traps, &extents))
2107 		return true;
2108 
2109 	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
2110 	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
2111 
2112 	if (!sna_compute_composite_extents(&extents,
2113 					   src, NULL, dst,
2114 					   src_x, src_y,
2115 					   0, 0,
2116 					   extents.x1, extents.y1,
2117 					   extents.x2 - extents.x1,
2118 					   extents.y2 - extents.y1))
2119 		return true;
2120 
2121 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2122 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
2123 
2124 	extents.y2 -= extents.y1;
2125 	extents.x2 -= extents.x1;
2126 	extents.x1 -= dst->pDrawable->x;
2127 	extents.y1 -= dst->pDrawable->y;
2128 	dst_x = extents.x1;
2129 	dst_y = extents.y1;
2130 	dx = -extents.x1 * FAST_SAMPLES_X;
2131 	dy = -extents.y1 * FAST_SAMPLES_Y;
2132 	extents.x1 = extents.y1 = 0;
2133 
2134 	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
2135 	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
2136 	scratch = sna_pixmap_create_upload(screen,
2137 					   extents.x2, extents.y2, 8,
2138 					   KGEM_BUFFER_WRITE_INPLACE);
2139 	if (!scratch)
2140 		return true;
2141 
2142 	DBG(("%s: created buffer %p, stride %d\n",
2143 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
2144 
2145 	if (!tor_init(&tor, &extents, 2*ntrap)) {
2146 		sna_pixmap_destroy(scratch);
2147 		return true;
2148 	}
2149 
2150 	for (n = 0; n < ntrap; n++) {
2151 		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
2152 		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
2153 			continue;
2154 
2155 		tor_add_trapezoid(&tor, &traps[n], dx, dy);
2156 	}
2157 
2158 	if (extents.x2 <= TOR_INPLACE_SIZE) {
2159 		uint8_t buf[TOR_INPLACE_SIZE];
2160 		tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
2161 			    scratch->usage_hint ? NULL : buf);
2162 	} else {
2163 		tor_render(NULL, &tor,
2164 			   scratch->devPrivate.ptr,
2165 			   (void *)(intptr_t)scratch->devKind,
2166 			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
2167 			   true);
2168 	}
2169 	tor_fini(&tor);
2170 
2171 	mask = CreatePicture(0, &scratch->drawable,
2172 			     PictureMatchFormat(screen, 8, PICT_a8),
2173 			     0, 0, serverClient, &error);
2174 	if (mask) {
2175 		int16_t x0, y0;
2176 
2177 		trapezoid_origin(&traps[0].left, &x0, &y0);
2178 
2179 		CompositePicture(op, src, mask, dst,
2180 				 src_x + dst_x - x0,
2181 				 src_y + dst_y - y0,
2182 				 0, 0,
2183 				 dst_x, dst_y,
2184 				 extents.x2, extents.y2);
2185 		FreePicture(mask, 0);
2186 	}
2187 	sna_pixmap_destroy(scratch);
2188 
2189 	return true;
2190 }
2191 
2192 struct inplace {
2193 	uint8_t *ptr;
2194 	uint32_t stride;
2195 	union {
2196 		uint8_t opacity;
2197 		uint32_t color;
2198 	};
2199 };
2200 
coverage_opacity(int coverage,uint8_t opacity)2201 static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
2202 {
2203 	coverage = coverage * 256 / FAST_SAMPLES_XY;
2204 	coverage -= coverage >> 8;
2205 	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
2206 }
2207 
_tor_blt_src(struct inplace * in,const BoxRec * box,uint8_t v)2208 static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
2209 {
2210 	uint8_t *ptr = in->ptr;
2211 	int h, w;
2212 
2213 	ptr += box->y1 * in->stride + box->x1;
2214 
2215 	h = box->y2 - box->y1;
2216 	w = box->x2 - box->x1;
2217 	if ((w | h) == 1) {
2218 		*ptr = v;
2219 	} else if (w == 1) {
2220 		do {
2221 			*ptr = v;
2222 			ptr += in->stride;
2223 		} while (--h);
2224 	} else do {
2225 		memset(ptr, v, w);
2226 		ptr += in->stride;
2227 	} while (--h);
2228 }
2229 
2230 struct clipped_span {
2231 	span_func_t span;
2232 	const BoxRec *clip_start, *clip_end;
2233 };
2234 
2235 static void
tor_blt_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2236 tor_blt_clipped(struct sna *sna,
2237 		struct sna_composite_spans_op *op,
2238 		pixman_region16_t *clip,
2239 		const BoxRec *box,
2240 		int coverage)
2241 {
2242 	struct clipped_span *cs = (struct clipped_span *)clip;
2243 	const BoxRec *c;
2244 
2245 	cs->clip_start =
2246 		find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
2247 
2248 	c = cs->clip_start;
2249 	while (c != cs->clip_end) {
2250 		BoxRec clipped;
2251 
2252 		if (box->y2 <= c->y1)
2253 			break;
2254 
2255 		clipped = *box;
2256 		if (!box_intersect(&clipped, c++))
2257 			continue;
2258 
2259 		cs->span(sna, op, NULL, &clipped, coverage);
2260 	}
2261 }
2262 
2263 inline static span_func_t
clipped_span(struct clipped_span * cs,span_func_t span,const RegionRec * clip)2264 clipped_span(struct clipped_span *cs,
2265 	     span_func_t span,
2266 	     const RegionRec *clip)
2267 {
2268 	if (clip->data) {
2269 		cs->span = span;
2270 		region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
2271 		span = tor_blt_clipped;
2272 	}
2273 	return span;
2274 }
2275 
2276 static void
tor_blt_src(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2277 tor_blt_src(struct sna *sna,
2278 	    struct sna_composite_spans_op *op,
2279 	    pixman_region16_t *clip,
2280 	    const BoxRec *box,
2281 	    int coverage)
2282 {
2283 	struct inplace *in = (struct inplace *)op;
2284 
2285 	_tor_blt_src(in, box, coverage_opacity(coverage, in->opacity));
2286 }
2287 
2288 static void
tor_blt_in(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2289 tor_blt_in(struct sna *sna,
2290 	   struct sna_composite_spans_op *op,
2291 	   pixman_region16_t *clip,
2292 	   const BoxRec *box,
2293 	   int coverage)
2294 {
2295 	struct inplace *in = (struct inplace *)op;
2296 	uint8_t *ptr = in->ptr;
2297 	int h, w, i;
2298 
2299 	if (coverage == 0) {
2300 		_tor_blt_src(in, box, 0);
2301 		return;
2302 	}
2303 
2304 	coverage = coverage_opacity(coverage, in->opacity);
2305 	if (coverage == 0xff)
2306 		return;
2307 
2308 	ptr += box->y1 * in->stride + box->x1;
2309 
2310 	h = box->y2 - box->y1;
2311 	w = box->x2 - box->x1;
2312 	do {
2313 		for (i = 0; i < w; i++)
2314 			ptr[i] = mul_8_8(ptr[i], coverage);
2315 		ptr += in->stride;
2316 	} while (--h);
2317 }
2318 
2319 static void
tor_blt_add(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2320 tor_blt_add(struct sna *sna,
2321 	    struct sna_composite_spans_op *op,
2322 	    pixman_region16_t *clip,
2323 	    const BoxRec *box,
2324 	    int coverage)
2325 {
2326 	struct inplace *in = (struct inplace *)op;
2327 	uint8_t *ptr = in->ptr;
2328 	int h, w, v, i;
2329 
2330 	if (coverage == 0)
2331 		return;
2332 
2333 	coverage = coverage_opacity(coverage, in->opacity);
2334 	if (coverage == 0xff) {
2335 		_tor_blt_src(in, box, 0xff);
2336 		return;
2337 	}
2338 
2339 	ptr += box->y1 * in->stride + box->x1;
2340 
2341 	h = box->y2 - box->y1;
2342 	w = box->x2 - box->x1;
2343 	if ((w | h) == 1) {
2344 		v = coverage + *ptr;
2345 		*ptr = v >= 255 ? 255 : v;
2346 	} else {
2347 		do {
2348 			for (i = 0; i < w; i++) {
2349 				v = coverage + ptr[i];
2350 				ptr[i] = v >= 255 ? 255 : v;
2351 			}
2352 			ptr += in->stride;
2353 		} while (--h);
2354 	}
2355 }
2356 
2357 static void
tor_blt_lerp32(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2358 tor_blt_lerp32(struct sna *sna,
2359 	       struct sna_composite_spans_op *op,
2360 	       pixman_region16_t *clip,
2361 	       const BoxRec *box,
2362 	       int coverage)
2363 {
2364 	struct inplace *in = (struct inplace *)op;
2365 	uint32_t *ptr = (uint32_t *)in->ptr;
2366 	int stride = in->stride / sizeof(uint32_t);
2367 	int h, w, i;
2368 
2369 	if (coverage == 0)
2370 		return;
2371 
2372 	sigtrap_assert_active();
2373 	ptr += box->y1 * stride + box->x1;
2374 
2375 	h = box->y2 - box->y1;
2376 	w = box->x2 - box->x1;
2377 	if (coverage == FAST_SAMPLES_XY) {
2378 		if ((w | h) == 1) {
2379 			*ptr = in->color;
2380 		} else {
2381 			if (w < 16) {
2382 				do {
2383 					for (i = 0; i < w; i++)
2384 						ptr[i] = in->color;
2385 					ptr += stride;
2386 				} while (--h);
2387 			} else {
2388 				pixman_fill(ptr, stride, 32,
2389 					    0, 0, w, h, in->color);
2390 			}
2391 		}
2392 	} else {
2393 		coverage = coverage * 256 / FAST_SAMPLES_XY;
2394 		coverage -= coverage >> 8;
2395 
2396 		if ((w | h) == 1) {
2397 			*ptr = lerp8x4(in->color, coverage, *ptr);
2398 		} else if (w == 1) {
2399 			do {
2400 				*ptr = lerp8x4(in->color, coverage, *ptr);
2401 				ptr += stride;
2402 			} while (--h);
2403 		} else{
2404 			do {
2405 				for (i = 0; i < w; i++)
2406 					ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
2407 				ptr += stride;
2408 			} while (--h);
2409 		}
2410 	}
2411 }
2412 
2413 struct pixman_inplace {
2414 	pixman_image_t *image, *source, *mask;
2415 	uint32_t color;
2416 	uint32_t *bits;
2417 	int dx, dy;
2418 	int sx, sy;
2419 	uint8_t op;
2420 };
2421 
2422 static void
pixmask_span_solid(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2423 pixmask_span_solid(struct sna *sna,
2424 		   struct sna_composite_spans_op *op,
2425 		   pixman_region16_t *clip,
2426 		   const BoxRec *box,
2427 		   int coverage)
2428 {
2429 	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2430 	if (coverage != FAST_SAMPLES_XY) {
2431 		coverage = coverage * 256 / FAST_SAMPLES_XY;
2432 		coverage -= coverage >> 8;
2433 		*pi->bits = mul_4x8_8(pi->color, coverage);
2434 	} else
2435 		*pi->bits = pi->color;
2436 	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
2437 			       box->x1, box->y1,
2438 			       0, 0,
2439 			       pi->dx + box->x1, pi->dy + box->y1,
2440 			       box->x2 - box->x1, box->y2 - box->y1);
2441 }
2442 
2443 static void
pixmask_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2444 pixmask_span(struct sna *sna,
2445 	     struct sna_composite_spans_op *op,
2446 	     pixman_region16_t *clip,
2447 	     const BoxRec *box,
2448 	     int coverage)
2449 {
2450 	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2451 	pixman_image_t *mask = NULL;
2452 	if (coverage != FAST_SAMPLES_XY) {
2453 		coverage = coverage * 256 / FAST_SAMPLES_XY;
2454 		coverage -= coverage >> 8;
2455 		*pi->bits = coverage;
2456 		mask = pi->mask;
2457 	}
2458 	pixman_image_composite(pi->op, pi->source, mask, pi->image,
2459 			       pi->sx + box->x1, pi->sy + box->y1,
2460 			       0, 0,
2461 			       pi->dx + box->x1, pi->dy + box->y1,
2462 			       box->x2 - box->x1, box->y2 - box->y1);
2463 }
2464 
2465 struct inplace_x8r8g8b8_thread {
2466 	xTrapezoid *traps;
2467 	PicturePtr dst, src;
2468 	BoxRec extents;
2469 	int dx, dy;
2470 	int ntrap;
2471 	bool lerp, is_solid;
2472 	uint32_t color;
2473 	int16_t src_x, src_y;
2474 	uint8_t op;
2475 };
2476 
inplace_x8r8g8b8_thread(void * arg)2477 static void inplace_x8r8g8b8_thread(void *arg)
2478 {
2479 	struct inplace_x8r8g8b8_thread *thread = arg;
2480 	struct tor tor;
2481 	span_func_t span;
2482 	struct clipped_span clipped;
2483 	RegionPtr clip;
2484 	int y1, y2, n;
2485 
2486 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2487 		return;
2488 
2489 	y1 = thread->extents.y1 - thread->dst->pDrawable->y;
2490 	y2 = thread->extents.y2 - thread->dst->pDrawable->y;
2491 	for (n = 0; n < thread->ntrap; n++) {
2492 		if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
2493 		    pixman_fixed_to_int(thread->traps[n].bottom) < y1)
2494 			continue;
2495 
2496 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2497 	}
2498 
2499 	clip = thread->dst->pCompositeClip;
2500 	if (thread->lerp) {
2501 		struct inplace inplace;
2502 		int16_t dst_x, dst_y;
2503 		PixmapPtr pixmap;
2504 
2505 		pixmap = get_drawable_pixmap(thread->dst->pDrawable);
2506 
2507 		inplace.ptr = pixmap->devPrivate.ptr;
2508 		if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
2509 			inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2510 		inplace.stride = pixmap->devKind;
2511 		inplace.color = thread->color;
2512 
2513 		span = clipped_span(&clipped, tor_blt_lerp32, clip);
2514 
2515 		tor_render(NULL, &tor,
2516 			   (void*)&inplace, (void*)&clipped,
2517 			   span, false);
2518 	} else if (thread->is_solid) {
2519 		struct pixman_inplace pi;
2520 
2521 		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2522 		pi.op = thread->op;
2523 		pi.color = thread->color;
2524 
2525 		pi.bits = (uint32_t *)&pi.sx;
2526 		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2527 						     1, 1, pi.bits, 0);
2528 		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2529 
2530 		span = clipped_span(&clipped, pixmask_span_solid, clip);
2531 
2532 		tor_render(NULL, &tor,
2533 			   (void*)&pi, (void *)&clipped,
2534 			   span, false);
2535 
2536 		pixman_image_unref(pi.source);
2537 		pixman_image_unref(pi.image);
2538 	} else {
2539 		struct pixman_inplace pi;
2540 		int16_t x0, y0;
2541 
2542 		trapezoid_origin(&thread->traps[0].left, &x0, &y0);
2543 
2544 		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2545 		pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
2546 		pi.sx += thread->src_x - x0;
2547 		pi.sy += thread->src_y - y0;
2548 		pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2549 		pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2550 		pi.bits = pixman_image_get_data(pi.mask);
2551 		pi.op = thread->op;
2552 
2553 		span = clipped_span(&clipped, pixmask_span, clip);
2554 
2555 		tor_render(NULL, &tor,
2556 			   (void*)&pi, (void *)&clipped,
2557 			   span, false);
2558 
2559 		pixman_image_unref(pi.mask);
2560 		pixman_image_unref(pi.source);
2561 		pixman_image_unref(pi.image);
2562 	}
2563 
2564 	tor_fini(&tor);
2565 }
2566 
2567 static bool
trapezoid_span_inplace__x8r8g8b8(CARD8 op,PicturePtr dst,PicturePtr src,int16_t src_x,int16_t src_y,PictFormatPtr maskFormat,int ntrap,xTrapezoid * traps)2568 trapezoid_span_inplace__x8r8g8b8(CARD8 op,
2569 				 PicturePtr dst,
2570 				 PicturePtr src, int16_t src_x, int16_t src_y,
2571 				 PictFormatPtr maskFormat,
2572 				 int ntrap, xTrapezoid *traps)
2573 {
2574 	uint32_t color;
2575 	bool lerp, is_solid;
2576 	RegionRec region;
2577 	int dx, dy;
2578 	int num_threads, n;
2579 
2580 	lerp = false;
2581 	is_solid = sna_picture_is_solid(src, &color);
2582 	if (is_solid) {
2583 		if (op == PictOpOver && (color >> 24) == 0xff)
2584 			op = PictOpSrc;
2585 		if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
2586 			op = PictOpSrc;
2587 		lerp = op == PictOpSrc;
2588 	}
2589 	if (!lerp) {
2590 		switch (op) {
2591 		case PictOpOver:
2592 		case PictOpAdd:
2593 		case PictOpOutReverse:
2594 			break;
2595 		case PictOpSrc:
2596 			if (!sna_drawable_is_clear(dst->pDrawable))
2597 				return false;
2598 			break;
2599 		default:
2600 			return false;
2601 		}
2602 	}
2603 
2604 	if (maskFormat == NULL && ntrap > 1) {
2605 		DBG(("%s: individual rasterisation requested\n",
2606 		     __FUNCTION__));
2607 		do {
2608 			/* XXX unwind errors? */
2609 			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
2610 							      src, src_x, src_y,
2611 							      NULL, 1, traps++))
2612 				return false;
2613 		} while (--ntrap);
2614 		return true;
2615 	}
2616 
2617 	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2618 		return true;
2619 
2620 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2621 	     __FUNCTION__,
2622 	     region.extents.x1, region.extents.y1,
2623 	     region.extents.x2, region.extents.y2));
2624 
2625 	if (!sna_compute_composite_extents(&region.extents,
2626 					   src, NULL, dst,
2627 					   src_x, src_y,
2628 					   0, 0,
2629 					   region.extents.x1, region.extents.y1,
2630 					   region.extents.x2 - region.extents.x1,
2631 					   region.extents.y2 - region.extents.y1))
2632 		return true;
2633 
2634 	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2635 	     __FUNCTION__,
2636 	     region.extents.x1, region.extents.y1,
2637 	     region.extents.x2, region.extents.y2));
2638 
2639 	region.data = NULL;
2640 	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2641 					    MOVE_WRITE | MOVE_READ))
2642 		return true;
2643 
2644 	if (!is_solid && src->pDrawable) {
2645 		if (!sna_drawable_move_to_cpu(src->pDrawable,
2646 					      MOVE_READ))
2647 			return true;
2648 
2649 		if (src->alphaMap &&
2650 		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2651 					      MOVE_READ))
2652 			return true;
2653 	}
2654 
2655 	dx = dst->pDrawable->x * FAST_SAMPLES_X;
2656 	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2657 
2658 	num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
2659 				      region.extents.y2 - region.extents.y1,
2660 				      16);
2661 
2662 	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
2663 	     __FUNCTION__,
2664 	     region.extents.x2 - region.extents.x1,
2665 	     region.extents.y2 - region.extents.y1,
2666 	     dst->format, op, lerp, num_threads));
2667 
2668 	if (num_threads == 1) {
2669 		struct tor tor;
2670 		span_func_t span;
2671 		struct clipped_span clipped;
2672 
2673 		if (!tor_init(&tor, &region.extents, 2*ntrap))
2674 			return true;
2675 
2676 		for (n = 0; n < ntrap; n++) {
2677 			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
2678 			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
2679 				continue;
2680 
2681 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2682 		}
2683 
2684 		if (lerp) {
2685 			struct inplace inplace;
2686 			PixmapPtr pixmap;
2687 			int16_t dst_x, dst_y;
2688 
2689 			pixmap = get_drawable_pixmap(dst->pDrawable);
2690 
2691 			inplace.ptr = pixmap->devPrivate.ptr;
2692 			if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2693 				inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2694 			inplace.stride = pixmap->devKind;
2695 			inplace.color = color;
2696 
2697 			span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
2698 
2699 			DBG(("%s: render inplace op=%d, color=%08x\n",
2700 			     __FUNCTION__, op, color));
2701 
2702 			if (sigtrap_get() == 0) {
2703 				tor_render(NULL, &tor,
2704 					   (void*)&inplace, (void*)&clipped,
2705 					   span, false);
2706 				sigtrap_put();
2707 			}
2708 		} else if (is_solid) {
2709 			struct pixman_inplace pi;
2710 
2711 			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2712 			pi.op = op;
2713 			pi.color = color;
2714 
2715 			pi.bits = (uint32_t *)&pi.sx;
2716 			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2717 							     1, 1, pi.bits, 0);
2718 			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2719 
2720 			span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
2721 
2722 			if (sigtrap_get() == 0) {
2723 				tor_render(NULL, &tor,
2724 					   (void*)&pi, (void*)&clipped,
2725 					   span, false);
2726 				sigtrap_put();
2727 			}
2728 
2729 			pixman_image_unref(pi.source);
2730 			pixman_image_unref(pi.image);
2731 		} else {
2732 			struct pixman_inplace pi;
2733 			int16_t x0, y0;
2734 
2735 			trapezoid_origin(&traps[0].left, &x0, &y0);
2736 
2737 			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2738 			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
2739 			pi.sx += src_x - x0;
2740 			pi.sy += src_y - y0;
2741 			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2742 			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2743 			pi.bits = pixman_image_get_data(pi.mask);
2744 			pi.op = op;
2745 
2746 			span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
2747 
2748 			if (sigtrap_get() == 0) {
2749 				tor_render(NULL, &tor,
2750 					   (void*)&pi, (void*)&clipped,
2751 					   span, false);
2752 				sigtrap_put();
2753 			}
2754 
2755 			pixman_image_unref(pi.mask);
2756 			pixman_image_unref(pi.source);
2757 			pixman_image_unref(pi.image);
2758 		}
2759 
2760 		tor_fini(&tor);
2761 	} else {
2762 		struct inplace_x8r8g8b8_thread threads[num_threads];
2763 		int y, h;
2764 
2765 		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
2766 		     __FUNCTION__, num_threads,
2767 		     region.extents.x2 - region.extents.x1,
2768 		     region.extents.y2 - region.extents.y1));
2769 
2770 		threads[0].traps = traps;
2771 		threads[0].ntrap = ntrap;
2772 		threads[0].extents = region.extents;
2773 		threads[0].lerp = lerp;
2774 		threads[0].is_solid = is_solid;
2775 		threads[0].color = color;
2776 		threads[0].dx = dx;
2777 		threads[0].dy = dy;
2778 		threads[0].dst = dst;
2779 		threads[0].src = src;
2780 		threads[0].op = op;
2781 		threads[0].src_x = src_x;
2782 		threads[0].src_y = src_y;
2783 
2784 		y = region.extents.y1;
2785 		h = region.extents.y2 - region.extents.y1;
2786 		h = (h + num_threads - 1) / num_threads;
2787 		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
2788 
2789 		if (sigtrap_get() == 0) {
2790 			for (n = 1; n < num_threads; n++) {
2791 				threads[n] = threads[0];
2792 				threads[n].extents.y1 = y;
2793 				threads[n].extents.y2 = y += h;
2794 
2795 				sna_threads_run(n, inplace_x8r8g8b8_thread, &threads[n]);
2796 			}
2797 
2798 			assert(y < threads[0].extents.y2);
2799 			threads[0].extents.y1 = y;
2800 			inplace_x8r8g8b8_thread(&threads[0]);
2801 
2802 			sna_threads_wait();
2803 			sigtrap_put();
2804 		} else
2805 			sna_threads_kill(); /* leaks thread allocations */
2806 	}
2807 
2808 	return true;
2809 }
2810 
2811 struct inplace_thread {
2812 	xTrapezoid *traps;
2813 	span_func_t span;
2814 	struct inplace inplace;
2815 	struct clipped_span clipped;
2816 	BoxRec extents;
2817 	int dx, dy;
2818 	int draw_x, draw_y;
2819 	bool unbounded;
2820 	int ntrap;
2821 };
2822 
inplace_thread(void * arg)2823 static void inplace_thread(void *arg)
2824 {
2825 	struct inplace_thread *thread = arg;
2826 	struct tor tor;
2827 	int n;
2828 
2829 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2830 		return;
2831 
2832 	for (n = 0; n < thread->ntrap; n++) {
2833 		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
2834 		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
2835 			continue;
2836 
2837 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2838 	}
2839 
2840 	tor_render(NULL, &tor,
2841 		   (void*)&thread->inplace, (void*)&thread->clipped,
2842 		   thread->span, thread->unbounded);
2843 
2844 	tor_fini(&tor);
2845 }
2846 
2847 bool
imprecise_trapezoid_span_inplace(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps,bool fallback)2848 imprecise_trapezoid_span_inplace(struct sna *sna,
2849 				 CARD8 op, PicturePtr src, PicturePtr dst,
2850 				 PictFormatPtr maskFormat, unsigned flags,
2851 				 INT16 src_x, INT16 src_y,
2852 				 int ntrap, xTrapezoid *traps,
2853 				 bool fallback)
2854 {
2855 	struct inplace inplace;
2856 	struct clipped_span clipped;
2857 	span_func_t span;
2858 	PixmapPtr pixmap;
2859 	struct sna_pixmap *priv;
2860 	RegionRec region;
2861 	uint32_t color;
2862 	bool unbounded;
2863 	int16_t dst_x, dst_y;
2864 	int dx, dy;
2865 	int num_threads, n;
2866 
2867 	if (NO_IMPRECISE)
2868 		return false;
2869 
2870 	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
2871 		return trapezoid_span_inplace__x8r8g8b8(op, dst,
2872 							src, src_x, src_y,
2873 							maskFormat,
2874 							ntrap, traps);
2875 
2876 	if (!sna_picture_is_solid(src, &color)) {
2877 		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
2878 		     __FUNCTION__));
2879 		return false;
2880 	}
2881 
2882 	if (dst->format != PICT_a8) {
2883 		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
2884 		     __FUNCTION__, dst->format));
2885 		return false;
2886 	}
2887 
2888 	pixmap = get_drawable_pixmap(dst->pDrawable);
2889 
2890 	unbounded = false;
2891 	priv = sna_pixmap(pixmap);
2892 	if (priv) {
2893 		switch (op) {
2894 		case PictOpAdd:
2895 			if (priv->clear && priv->clear_color == 0) {
2896 				unbounded = true;
2897 				op = PictOpSrc;
2898 			}
2899 			if ((color >> 24) == 0)
2900 				return true;
2901 			break;
2902 		case PictOpIn:
2903 			if (priv->clear && priv->clear_color == 0)
2904 				return true;
2905 			if (priv->clear && priv->clear_color == 0xff)
2906 				op = PictOpSrc;
2907 			unbounded = true;
2908 			break;
2909 		case PictOpSrc:
2910 			unbounded = true;
2911 			break;
2912 		default:
2913 			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2914 			     __FUNCTION__, op));
2915 			return false;
2916 		}
2917 	} else {
2918 		switch (op) {
2919 		case PictOpAdd:
2920 			if ((color >> 24) == 0)
2921 				return true;
2922 			break;
2923 		case PictOpIn:
2924 		case PictOpSrc:
2925 			unbounded = true;
2926 			break;
2927 		default:
2928 			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2929 			     __FUNCTION__, op));
2930 			return false;
2931 		}
2932 	}
2933 
2934 	DBG(("%s: format=%x, op=%d, color=%x\n",
2935 	     __FUNCTION__, dst->format, op, color));
2936 
2937 	if (maskFormat == NULL && ntrap > 1) {
2938 		DBG(("%s: individual rasterisation requested\n",
2939 		     __FUNCTION__));
2940 		do {
2941 			/* XXX unwind errors? */
2942 			if (!imprecise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
2943 							      src_x, src_y, 1, traps++,
2944 							      fallback))
2945 				return false;
2946 		} while (--ntrap);
2947 		return true;
2948 	}
2949 
2950 	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2951 		return true;
2952 
2953 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2954 	     __FUNCTION__,
2955 	     region.extents.x1, region.extents.y1,
2956 	     region.extents.x2, region.extents.y2));
2957 
2958 	if (!sna_compute_composite_extents(&region.extents,
2959 					   NULL, NULL, dst,
2960 					   0, 0,
2961 					   0, 0,
2962 					   region.extents.x1, region.extents.y1,
2963 					   region.extents.x2 - region.extents.x1,
2964 					   region.extents.y2 - region.extents.y1))
2965 		return true;
2966 
2967 	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2968 	     __FUNCTION__,
2969 	     region.extents.x1, region.extents.y1,
2970 	     region.extents.x2, region.extents.y2));
2971 
2972 	if (op == PictOpSrc) {
2973 		span = tor_blt_src;
2974 	} else if (op == PictOpIn) {
2975 		span = tor_blt_in;
2976 	} else {
2977 		assert(op == PictOpAdd);
2978 		span = tor_blt_add;
2979 	}
2980 
2981 	DBG(("%s: move-to-cpu\n", __FUNCTION__));
2982 	region.data = NULL;
2983 	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2984 					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
2985 		return true;
2986 
2987 	dx = dst->pDrawable->x * FAST_SAMPLES_X;
2988 	dy = dst->pDrawable->y * FAST_SAMPLES_Y;
2989 
2990 	inplace.ptr = pixmap->devPrivate.ptr;
2991 	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2992 		inplace.ptr += dst_y * pixmap->devKind + dst_x;
2993 	inplace.stride = pixmap->devKind;
2994 	inplace.opacity = color >> 24;
2995 
2996 	span = clipped_span(&clipped, span, dst->pCompositeClip);
2997 
2998 	num_threads = 1;
2999 	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3000 		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
3001 					      region.extents.y2 - region.extents.y1,
3002 					      16);
3003 	if (num_threads == 1) {
3004 		struct tor tor;
3005 
3006 		if (!tor_init(&tor, &region.extents, 2*ntrap))
3007 			return true;
3008 
3009 		for (n = 0; n < ntrap; n++) {
3010 			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
3011 			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
3012 				continue;
3013 
3014 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3015 		}
3016 
3017 		if (sigtrap_get() == 0) {
3018 			tor_render(NULL, &tor,
3019 				   (void*)&inplace, (void *)&clipped,
3020 				   span, unbounded);
3021 			sigtrap_put();
3022 		}
3023 
3024 		tor_fini(&tor);
3025 	} else {
3026 		struct inplace_thread threads[num_threads];
3027 		int y, h;
3028 
3029 		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
3030 		     __FUNCTION__, num_threads,
3031 		     region.extents.x2 - region.extents.x1,
3032 		     region.extents.y2 - region.extents.y1));
3033 
3034 		threads[0].traps = traps;
3035 		threads[0].ntrap = ntrap;
3036 		threads[0].inplace = inplace;
3037 		threads[0].clipped = clipped;
3038 		threads[0].extents = region.extents;
3039 		threads[0].span = span;
3040 		threads[0].unbounded = unbounded;
3041 		threads[0].dx = dx;
3042 		threads[0].dy = dy;
3043 		threads[0].draw_x = dst->pDrawable->x;
3044 		threads[0].draw_y = dst->pDrawable->y;
3045 
3046 		y = region.extents.y1;
3047 		h = region.extents.y2 - region.extents.y1;
3048 		h = (h + num_threads - 1) / num_threads;
3049 		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
3050 
3051 		if (sigtrap_get() == 0) {
3052 			for (n = 1; n < num_threads; n++) {
3053 				threads[n] = threads[0];
3054 				threads[n].extents.y1 = y;
3055 				threads[n].extents.y2 = y += h;
3056 
3057 				sna_threads_run(n, inplace_thread, &threads[n]);
3058 			}
3059 
3060 			assert(y < threads[0].extents.y2);
3061 			threads[0].extents.y1 = y;
3062 			inplace_thread(&threads[0]);
3063 
3064 			sna_threads_wait();
3065 			sigtrap_put();
3066 		} else
3067 			sna_threads_kill(); /* leaks thread allocations */
3068 	}
3069 
3070 	return true;
3071 }
3072 
3073 bool
imprecise_trapezoid_span_fallback(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)3074 imprecise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
3075 				  PictFormatPtr maskFormat, unsigned flags,
3076 				  INT16 src_x, INT16 src_y,
3077 				  int ntrap, xTrapezoid *traps)
3078 {
3079 	struct tor tor;
3080 	ScreenPtr screen = dst->pDrawable->pScreen;
3081 	PixmapPtr scratch;
3082 	PicturePtr mask;
3083 	BoxRec extents;
3084 	int16_t dst_x, dst_y;
3085 	int dx, dy;
3086 	int error, n;
3087 
3088 	if (NO_IMPRECISE)
3089 		return false;
3090 
3091 	if (maskFormat == NULL && ntrap > 1) {
3092 		DBG(("%s: individual rasterisation requested\n",
3093 		     __FUNCTION__));
3094 		do {
3095 			/* XXX unwind errors? */
3096 			if (!imprecise_trapezoid_span_fallback(op, src, dst, NULL, flags,
3097 							       src_x, src_y, 1, traps++))
3098 				return false;
3099 		} while (--ntrap);
3100 		return true;
3101 	}
3102 
3103 	if (!trapezoids_bounds(ntrap, traps, &extents))
3104 		return true;
3105 
3106 	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
3107 	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
3108 
3109 	if (!sna_compute_composite_extents(&extents,
3110 					   src, NULL, dst,
3111 					   src_x, src_y,
3112 					   0, 0,
3113 					   extents.x1, extents.y1,
3114 					   extents.x2 - extents.x1,
3115 					   extents.y2 - extents.y1))
3116 		return true;
3117 
3118 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3119 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3120 
3121 	extents.y2 -= extents.y1;
3122 	extents.x2 -= extents.x1;
3123 	extents.x1 -= dst->pDrawable->x;
3124 	extents.y1 -= dst->pDrawable->y;
3125 	dst_x = extents.x1;
3126 	dst_y = extents.y1;
3127 	dx = -extents.x1 * FAST_SAMPLES_X;
3128 	dy = -extents.y1 * FAST_SAMPLES_Y;
3129 	extents.x1 = extents.y1 = 0;
3130 
3131 	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
3132 	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
3133 	scratch = sna_pixmap_create_unattached(screen,
3134 					       extents.x2, extents.y2, 8);
3135 	if (!scratch)
3136 		return true;
3137 
3138 	DBG(("%s: created buffer %p, stride %d\n",
3139 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3140 
3141 	if (!tor_init(&tor, &extents, 2*ntrap)) {
3142 		sna_pixmap_destroy(scratch);
3143 		return true;
3144 	}
3145 
3146 	for (n = 0; n < ntrap; n++) {
3147 		if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
3148 		    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
3149 			continue;
3150 
3151 		tor_add_trapezoid(&tor, &traps[n], dx, dy);
3152 	}
3153 
3154 	if (extents.x2 <= TOR_INPLACE_SIZE) {
3155 		tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
3156 	} else {
3157 		tor_render(NULL, &tor,
3158 			   scratch->devPrivate.ptr,
3159 			   (void *)(intptr_t)scratch->devKind,
3160 			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
3161 			   true);
3162 	}
3163 	tor_fini(&tor);
3164 
3165 	mask = CreatePicture(0, &scratch->drawable,
3166 			     PictureMatchFormat(screen, 8, PICT_a8),
3167 			     0, 0, serverClient, &error);
3168 	if (mask) {
3169 		RegionRec region;
3170 		int16_t x0, y0;
3171 
3172 		region.extents.x1 = dst_x + dst->pDrawable->x;
3173 		region.extents.y1 = dst_y + dst->pDrawable->y;
3174 		region.extents.x2 = region.extents.x1 + extents.x2;
3175 		region.extents.y2 = region.extents.y1 + extents.y2;
3176 		region.data = NULL;
3177 
3178 		trapezoid_origin(&traps[0].left, &x0, &y0);
3179 
3180 		DBG(("%s: fbComposite()\n", __FUNCTION__));
3181 		sna_composite_fb(op, src, mask, dst, &region,
3182 				 src_x + dst_x - x0, src_y + dst_y - y0,
3183 				 0, 0,
3184 				 dst_x, dst_y,
3185 				 extents.x2, extents.y2);
3186 
3187 		FreePicture(mask, 0);
3188 	}
3189 	sna_pixmap_destroy(scratch);
3190 
3191 	return true;
3192 }
3193 
3194 bool
imprecise_trap_span_converter(struct sna * sna,PicturePtr dst,INT16 src_x,INT16 src_y,int ntrap,xTrap * trap)3195 imprecise_trap_span_converter(struct sna *sna,
3196 			      PicturePtr dst,
3197 			      INT16 src_x, INT16 src_y,
3198 			      int ntrap, xTrap *trap)
3199 {
3200 	struct sna_composite_spans_op tmp;
3201 	struct tor tor;
3202 	BoxRec extents;
3203 	pixman_region16_t *clip;
3204 	int dx, dy, n;
3205 
3206 	if (dst->pDrawable->depth < 8)
3207 		return false;
3208 
3209 	if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3210 					       dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
3211 					       dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
3212 					       0)) {
3213 		DBG(("%s: fallback -- composite spans not supported\n",
3214 		     __FUNCTION__));
3215 		return false;
3216 	}
3217 
3218 	clip = dst->pCompositeClip;
3219 	extents = *RegionExtents(clip);
3220 	dx = dst->pDrawable->x;
3221 	dy = dst->pDrawable->y;
3222 
3223 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
3224 	     __FUNCTION__,
3225 	     extents.x1, extents.y1,
3226 	     extents.x2, extents.y2,
3227 	     dx, dy));
3228 
3229 	memset(&tmp, 0, sizeof(tmp));
3230 	if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3231 					 0, 0,
3232 					 extents.x1,  extents.y1,
3233 					 extents.x2 - extents.x1,
3234 					 extents.y2 - extents.y1,
3235 					 0,
3236 					 &tmp)) {
3237 		DBG(("%s: fallback -- composite spans render op not supported\n",
3238 		     __FUNCTION__));
3239 		return false;
3240 	}
3241 
3242 	dx *= FAST_SAMPLES_X;
3243 	dy *= FAST_SAMPLES_Y;
3244 	if (!tor_init(&tor, &extents, 2*ntrap))
3245 		goto skip;
3246 
3247 	for (n = 0; n < ntrap; n++) {
3248 		xPointFixed p1, p2;
3249 
3250 		if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
3251 		    pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
3252 			continue;
3253 
3254 		p1.y = trap[n].top.y;
3255 		p2.y = trap[n].bot.y;
3256 		p1.x = trap[n].top.l;
3257 		p2.x = trap[n].bot.l;
3258 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3259 
3260 		p1.y = trap[n].bot.y;
3261 		p2.y = trap[n].top.y;
3262 		p1.x = trap[n].top.r;
3263 		p2.x = trap[n].bot.r;
3264 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3265 	}
3266 
3267 	tor_render(sna, &tor, &tmp, clip,
3268 		   choose_span(&tmp, dst, NULL, clip), false);
3269 
3270 	tor_fini(&tor);
3271 skip:
3272 	tmp.done(sna, &tmp);
3273 	return true;
3274 }
3275 
mark_damaged(PixmapPtr pixmap,struct sna_pixmap * priv,BoxPtr box,int16_t x,int16_t y)3276 static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
3277 			 BoxPtr box, int16_t x, int16_t y)
3278 {
3279 	box->x1 += x; box->x2 += x;
3280 	box->y1 += y; box->y2 += y;
3281 	if (box->x1 <= 0 && box->y1 <= 0 &&
3282 	    box->x2 >= pixmap->drawable.width &&
3283 	    box->y2 >= pixmap->drawable.height) {
3284 		sna_damage_destroy(&priv->cpu_damage);
3285 		sna_damage_all(&priv->gpu_damage, pixmap);
3286 		list_del(&priv->flush_list);
3287 	} else {
3288 		sna_damage_add_box(&priv->gpu_damage, box);
3289 		sna_damage_subtract_box(&priv->cpu_damage, box);
3290 	}
3291 }
3292 
3293 bool
trap_mask_converter(struct sna * sna,PicturePtr picture,INT16 x,INT16 y,int ntrap,xTrap * trap)3294 trap_mask_converter(struct sna *sna,
3295 		    PicturePtr picture,
3296 		    INT16 x, INT16 y,
3297 		    int ntrap, xTrap *trap)
3298 {
3299 	struct tor tor;
3300 	ScreenPtr screen = picture->pDrawable->pScreen;
3301 	PixmapPtr scratch, pixmap;
3302 	struct sna_pixmap *priv;
3303 	BoxRec extents;
3304 	span_func_t span;
3305 	int dx, dy, n;
3306 
3307 	if (NO_SCAN_CONVERTER)
3308 		return false;
3309 
3310 	pixmap = get_drawable_pixmap(picture->pDrawable);
3311 	priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
3312 	if (priv == NULL)
3313 		return false;
3314 
3315 	/* XXX strict adherence to the Render specification */
3316 	if (picture->polyMode == PolyModePrecise &&
3317 	    picture->polyEdge != PolyEdgeSharp) {
3318 		DBG(("%s: fallback -- precise rasterisation requested\n",
3319 		     __FUNCTION__));
3320 		return false;
3321 	}
3322 
3323 	extents = *RegionExtents(picture->pCompositeClip);
3324 	for (n = 0; n < ntrap; n++) {
3325 		int v;
3326 
3327 		v = x + pixman_fixed_integer_floor (MIN(trap[n].top.l, trap[n].bot.l));
3328 		if (v < extents.x1)
3329 			extents.x1 = v;
3330 
3331 		v = x + pixman_fixed_integer_ceil (MAX(trap[n].top.r, trap[n].bot.r));
3332 		if (v > extents.x2)
3333 			extents.x2 = v;
3334 
3335 		v = y + pixman_fixed_integer_floor (trap[n].top.y);
3336 		if (v < extents.y1)
3337 			extents.y1 = v;
3338 
3339 		v = y + pixman_fixed_integer_ceil (trap[n].bot.y);
3340 		if (v > extents.y2)
3341 			extents.y2 = v;
3342 	}
3343 
3344 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3345 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3346 
3347 	scratch = sna_pixmap_create_upload(screen,
3348 					   extents.x2-extents.x1,
3349 					   extents.y2-extents.y1,
3350 					   8, KGEM_BUFFER_WRITE_INPLACE);
3351 	if (!scratch)
3352 		return true;
3353 
3354 	dx = picture->pDrawable->x;
3355 	dy = picture->pDrawable->y;
3356 	dx *= FAST_SAMPLES_X;
3357 	dy *= FAST_SAMPLES_Y;
3358 	if (!tor_init(&tor, &extents, 2*ntrap)) {
3359 		sna_pixmap_destroy(scratch);
3360 		return true;
3361 	}
3362 
3363 	for (n = 0; n < ntrap; n++) {
3364 		xPointFixed p1, p2;
3365 
3366 		if (pixman_fixed_to_int(trap[n].top.y) + picture->pDrawable->y >= extents.y2 ||
3367 		    pixman_fixed_to_int(trap[n].bot.y) + picture->pDrawable->y < extents.y1)
3368 			continue;
3369 
3370 		p1.y = trap[n].top.y;
3371 		p2.y = trap[n].bot.y;
3372 		p1.x = trap[n].top.l;
3373 		p2.x = trap[n].bot.l;
3374 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3375 
3376 		p1.y = trap[n].bot.y;
3377 		p2.y = trap[n].top.y;
3378 		p1.x = trap[n].top.r;
3379 		p2.x = trap[n].bot.r;
3380 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3381 	}
3382 
3383 	if (picture->polyEdge == PolyEdgeSharp)
3384 		span = tor_blt_mask_mono;
3385 	else
3386 		span = tor_blt_mask;
3387 
3388 	tor_render(NULL, &tor,
3389 		   scratch->devPrivate.ptr,
3390 		   (void *)(intptr_t)scratch->devKind,
3391 		   span, true);
3392 
3393 	tor_fini(&tor);
3394 
3395 	/* XXX clip boxes */
3396 	get_drawable_deltas(picture->pDrawable, pixmap, &x, &y);
3397 	sna = to_sna_from_screen(screen);
3398 	sna->render.copy_boxes(sna, GXcopy,
3399 			       &scratch->drawable, __sna_pixmap_get_bo(scratch), -extents.x1, -extents.x1,
3400 			       &pixmap->drawable, priv->gpu_bo, x, y,
3401 			       &extents, 1, 0);
3402 	mark_damaged(pixmap, priv, &extents ,x, y);
3403 	sna_pixmap_destroy(scratch);
3404 	return true;
3405 }
3406 
3407 bool
triangles_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xTriangle * tri)3408 triangles_span_converter(struct sna *sna,
3409 			 CARD8 op, PicturePtr src, PicturePtr dst,
3410 			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3411 			 int count, xTriangle *tri)
3412 {
3413 	struct sna_composite_spans_op tmp;
3414 	struct tor tor;
3415 	BoxRec extents;
3416 	pixman_region16_t clip;
3417 	int16_t dst_x, dst_y;
3418 	int dx, dy, n;
3419 	bool was_clear;
3420 
3421 	if (NO_SCAN_CONVERTER)
3422 		return false;
3423 
3424 	if (is_mono(dst, maskFormat))
3425 		return mono_triangles_span_converter(sna, op, src, dst,
3426 						     src_x, src_y,
3427 						     count, tri);
3428 
3429 	/* XXX strict adherence to the Render specification */
3430 	if (dst->polyMode == PolyModePrecise) {
3431 		DBG(("%s: fallback -- precise rasterisation requested\n",
3432 		     __FUNCTION__));
3433 		return false;
3434 	}
3435 
3436 	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3437 		DBG(("%s: fallback -- composite spans not supported\n",
3438 		     __FUNCTION__));
3439 		return false;
3440 	}
3441 
3442 	dst_x = pixman_fixed_to_int(tri[0].p1.x);
3443 	dst_y = pixman_fixed_to_int(tri[0].p1.y);
3444 
3445 	miTriangleBounds(count, tri, &extents);
3446 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3447 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3448 
3449 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3450 		return true;
3451 
3452 #if 0
3453 	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3454 		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3455 		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3456 		return false;
3457 	}
3458 #endif
3459 
3460 	if (!sna_compute_composite_region(&clip,
3461 					  src, NULL, dst,
3462 					  src_x + extents.x1 - dst_x,
3463 					  src_y + extents.y1 - dst_y,
3464 					  0, 0,
3465 					  extents.x1, extents.y1,
3466 					  extents.x2 - extents.x1,
3467 					  extents.y2 - extents.y1)) {
3468 		DBG(("%s: triangles do not intersect drawable clips\n",
3469 		     __FUNCTION__)) ;
3470 		return true;
3471 	}
3472 
3473 	if (!sna->render.check_composite_spans(sna, op, src, dst,
3474 					       clip.extents.x2 - clip.extents.x1,
3475 					       clip.extents.y2 - clip.extents.y1,
3476 					       0)) {
3477 		DBG(("%s: fallback -- composite spans not supported\n",
3478 		     __FUNCTION__));
3479 		return false;
3480 	}
3481 
3482 	extents = *RegionExtents(&clip);
3483 	dx = dst->pDrawable->x;
3484 	dy = dst->pDrawable->y;
3485 
3486 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3487 	     __FUNCTION__,
3488 	     extents.x1, extents.y1,
3489 	     extents.x2, extents.y2,
3490 	     dx, dy,
3491 	     src_x + extents.x1 - dst_x - dx,
3492 	     src_y + extents.y1 - dst_y - dy));
3493 
3494 	was_clear = sna_drawable_is_clear(dst->pDrawable);
3495 
3496 	memset(&tmp, 0, sizeof(tmp));
3497 	if (!sna->render.composite_spans(sna, op, src, dst,
3498 					 src_x + extents.x1 - dst_x - dx,
3499 					 src_y + extents.y1 - dst_y - dy,
3500 					 extents.x1,  extents.y1,
3501 					 extents.x2 - extents.x1,
3502 					 extents.y2 - extents.y1,
3503 					 0,
3504 					 &tmp)) {
3505 		DBG(("%s: fallback -- composite spans render op not supported\n",
3506 		     __FUNCTION__));
3507 		return false;
3508 	}
3509 
3510 	dx *= FAST_SAMPLES_X;
3511 	dy *= FAST_SAMPLES_Y;
3512 	if (!tor_init(&tor, &extents, 3*count))
3513 		goto skip;
3514 
3515 	for (n = 0; n < count; n++) {
3516 		polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3517 		polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3518 		polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3519 	}
3520 
3521 	tor_render(sna, &tor, &tmp, &clip,
3522 		   choose_span(&tmp, dst, maskFormat, &clip),
3523 		   !was_clear && maskFormat && !operator_is_bounded(op));
3524 
3525 	tor_fini(&tor);
3526 skip:
3527 	tmp.done(sna, &tmp);
3528 
3529 	REGION_UNINIT(NULL, &clip);
3530 	return true;
3531 }
3532 
3533 bool
triangles_mask_converter(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xTriangle * tri)3534 triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
3535 			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3536 			 int count, xTriangle *tri)
3537 {
3538 	struct tor tor;
3539 	void (*span)(struct sna *sna,
3540 		     struct sna_composite_spans_op *op,
3541 		     pixman_region16_t *clip,
3542 		     const BoxRec *box,
3543 		     int coverage);
3544 	ScreenPtr screen = dst->pDrawable->pScreen;
3545 	PixmapPtr scratch;
3546 	PicturePtr mask;
3547 	BoxRec extents;
3548 	int16_t dst_x, dst_y;
3549 	int dx, dy;
3550 	int error, n;
3551 
3552 	if (NO_SCAN_CONVERTER)
3553 		return false;
3554 
3555 	if (is_precise(dst, maskFormat)) {
3556 		DBG(("%s: fallback -- precise rasterisation requested\n",
3557 		     __FUNCTION__));
3558 		return false;
3559 	}
3560 
3561 	if (maskFormat == NULL && count > 1) {
3562 		DBG(("%s: fallback -- individual rasterisation requested\n",
3563 		     __FUNCTION__));
3564 		return false;
3565 	}
3566 
3567 	miTriangleBounds(count, tri, &extents);
3568 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3569 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3570 
3571 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3572 		return true;
3573 
3574 	if (!sna_compute_composite_extents(&extents,
3575 					   src, NULL, dst,
3576 					   src_x, src_y,
3577 					   0, 0,
3578 					   extents.x1, extents.y1,
3579 					   extents.x2 - extents.x1,
3580 					   extents.y2 - extents.y1))
3581 		return true;
3582 
3583 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3584 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3585 
3586 	extents.y2 -= extents.y1;
3587 	extents.x2 -= extents.x1;
3588 	extents.x1 -= dst->pDrawable->x;
3589 	extents.y1 -= dst->pDrawable->y;
3590 	dst_x = extents.x1;
3591 	dst_y = extents.y1;
3592 	dx = -extents.x1 * FAST_SAMPLES_X;
3593 	dy = -extents.y1 * FAST_SAMPLES_Y;
3594 	extents.x1 = extents.y1 = 0;
3595 
3596 	DBG(("%s: mask (%dx%d)\n",
3597 	     __FUNCTION__, extents.x2, extents.y2));
3598 	scratch = sna_pixmap_create_upload(screen,
3599 					   extents.x2, extents.y2, 8,
3600 					   KGEM_BUFFER_WRITE_INPLACE);
3601 	if (!scratch)
3602 		return true;
3603 
3604 	DBG(("%s: created buffer %p, stride %d\n",
3605 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3606 
3607 	if (!tor_init(&tor, &extents, 3*count)) {
3608 		sna_pixmap_destroy(scratch);
3609 		return true;
3610 	}
3611 
3612 	for (n = 0; n < count; n++) {
3613 		polygon_add_line(tor.polygon, &tri[n].p1, &tri[n].p2, dx, dy);
3614 		polygon_add_line(tor.polygon, &tri[n].p2, &tri[n].p3, dx, dy);
3615 		polygon_add_line(tor.polygon, &tri[n].p3, &tri[n].p1, dx, dy);
3616 	}
3617 
3618 	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
3619 		span = tor_blt_mask_mono;
3620 	else
3621 		span = tor_blt_mask;
3622 
3623 	tor_render(NULL, &tor,
3624 		   scratch->devPrivate.ptr,
3625 		   (void *)(intptr_t)scratch->devKind,
3626 		   span, true);
3627 
3628 	mask = CreatePicture(0, &scratch->drawable,
3629 			     PictureMatchFormat(screen, 8, PICT_a8),
3630 			     0, 0, serverClient, &error);
3631 	if (mask) {
3632 		CompositePicture(op, src, mask, dst,
3633 				 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
3634 				 src_y + dst_y - pixman_fixed_to_int(tri[0].p1.y),
3635 				 0, 0,
3636 				 dst_x, dst_y,
3637 				 extents.x2, extents.y2);
3638 		FreePicture(mask, 0);
3639 	}
3640 	tor_fini(&tor);
3641 	sna_pixmap_destroy(scratch);
3642 
3643 	return true;
3644 }
3645 
3646 struct tristrip_thread {
3647 	struct sna *sna;
3648 	const struct sna_composite_spans_op *op;
3649 	const xPointFixed *points;
3650 	RegionPtr clip;
3651 	span_func_t span;
3652 	BoxRec extents;
3653 	int dx, dy, draw_y;
3654 	int count;
3655 	bool unbounded;
3656 };
3657 
3658 static void
tristrip_thread(void * arg)3659 tristrip_thread(void *arg)
3660 {
3661 	struct tristrip_thread *thread = arg;
3662 	struct span_thread_boxes boxes;
3663 	struct tor tor;
3664 	int n, cw, ccw;
3665 
3666 	if (!tor_init(&tor, &thread->extents, 2*thread->count))
3667 		return;
3668 
3669 	span_thread_boxes_init(&boxes, thread->op, thread->clip);
3670 
3671 	cw = 0; ccw = 1;
3672 	polygon_add_line(tor.polygon,
3673 			 &thread->points[ccw], &thread->points[cw],
3674 			 thread->dx, thread->dy);
3675 	n = 2;
3676 	do {
3677 		polygon_add_line(tor.polygon,
3678 				 &thread->points[cw], &thread->points[n],
3679 				 thread->dx, thread->dy);
3680 		cw = n;
3681 		if (++n == thread->count)
3682 			break;
3683 
3684 		polygon_add_line(tor.polygon,
3685 				 &thread->points[n], &thread->points[ccw],
3686 				 thread->dx, thread->dy);
3687 		ccw = n;
3688 		if (++n == thread->count)
3689 			break;
3690 	} while (1);
3691 	polygon_add_line(tor.polygon,
3692 			 &thread->points[cw], &thread->points[ccw],
3693 			 thread->dx, thread->dy);
3694 	assert(tor.polygon->num_edges <= 2*thread->count);
3695 
3696 	tor_render(thread->sna, &tor,
3697 		   (struct sna_composite_spans_op *)&boxes, thread->clip,
3698 		   thread->span, thread->unbounded);
3699 
3700 	tor_fini(&tor);
3701 
3702 	if (boxes.num_boxes) {
3703 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
3704 		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
3705 		thread->op->thread_boxes(thread->sna, thread->op,
3706 					 boxes.boxes, boxes.num_boxes);
3707 	}
3708 }
3709 
3710 bool
imprecise_tristrip_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xPointFixed * points)3711 imprecise_tristrip_span_converter(struct sna *sna,
3712 				  CARD8 op, PicturePtr src, PicturePtr dst,
3713 				  PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3714 				  int count, xPointFixed *points)
3715 {
3716 	struct sna_composite_spans_op tmp;
3717 	BoxRec extents;
3718 	pixman_region16_t clip;
3719 	int16_t dst_x, dst_y;
3720 	int dx, dy, num_threads;
3721 	bool was_clear;
3722 
3723 	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3724 		DBG(("%s: fallback -- composite spans not supported\n",
3725 		     __FUNCTION__));
3726 		return false;
3727 	}
3728 
3729 	dst_x = pixman_fixed_to_int(points[0].x);
3730 	dst_y = pixman_fixed_to_int(points[0].y);
3731 
3732 	miPointFixedBounds(count, points, &extents);
3733 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3734 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3735 
3736 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3737 		return true;
3738 
3739 #if 0
3740 	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3741 		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3742 		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3743 		return false;
3744 	}
3745 #endif
3746 
3747 	if (!sna_compute_composite_region(&clip,
3748 					  src, NULL, dst,
3749 					  src_x + extents.x1 - dst_x,
3750 					  src_y + extents.y1 - dst_y,
3751 					  0, 0,
3752 					  extents.x1, extents.y1,
3753 					  extents.x2 - extents.x1,
3754 					  extents.y2 - extents.y1)) {
3755 		DBG(("%s: triangles do not intersect drawable clips\n",
3756 		     __FUNCTION__)) ;
3757 		return true;
3758 	}
3759 
3760 	if (!sna->render.check_composite_spans(sna, op, src, dst,
3761 					       clip.extents.x2 - clip.extents.x1,
3762 					       clip.extents.y2 - clip.extents.y1,
3763 					       0)) {
3764 		DBG(("%s: fallback -- composite spans not supported\n",
3765 		     __FUNCTION__));
3766 		return false;
3767 	}
3768 
3769 	extents = *RegionExtents(&clip);
3770 	dx = dst->pDrawable->x;
3771 	dy = dst->pDrawable->y;
3772 
3773 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3774 	     __FUNCTION__,
3775 	     extents.x1, extents.y1,
3776 	     extents.x2, extents.y2,
3777 	     dx, dy,
3778 	     src_x + extents.x1 - dst_x - dx,
3779 	     src_y + extents.y1 - dst_y - dy));
3780 
3781 	was_clear = sna_drawable_is_clear(dst->pDrawable);
3782 
3783 	memset(&tmp, 0, sizeof(tmp));
3784 	if (!sna->render.composite_spans(sna, op, src, dst,
3785 					 src_x + extents.x1 - dst_x - dx,
3786 					 src_y + extents.y1 - dst_y - dy,
3787 					 extents.x1,  extents.y1,
3788 					 extents.x2 - extents.x1,
3789 					 extents.y2 - extents.y1,
3790 					 0,
3791 					 &tmp)) {
3792 		DBG(("%s: fallback -- composite spans render op not supported\n",
3793 		     __FUNCTION__));
3794 		return false;
3795 	}
3796 
3797 	dx *= FAST_SAMPLES_X;
3798 	dy *= FAST_SAMPLES_Y;
3799 
3800 	num_threads = 1;
3801 	if (!NO_GPU_THREADS &&
3802 	    tmp.thread_boxes &&
3803 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
3804 		num_threads = sna_use_threads(extents.x2 - extents.x1,
3805 					      extents.y2 - extents.y1,
3806 					      16);
3807 	if (num_threads == 1) {
3808 		struct tor tor;
3809 		int cw, ccw, n;
3810 
3811 		if (!tor_init(&tor, &extents, 2*count))
3812 			goto skip;
3813 
3814 		cw = 0; ccw = 1;
3815 		polygon_add_line(tor.polygon,
3816 				 &points[ccw], &points[cw],
3817 				 dx, dy);
3818 		n = 2;
3819 		do {
3820 			polygon_add_line(tor.polygon,
3821 					 &points[cw], &points[n],
3822 					 dx, dy);
3823 			cw = n;
3824 			if (++n == count)
3825 				break;
3826 
3827 			polygon_add_line(tor.polygon,
3828 					 &points[n], &points[ccw],
3829 					 dx, dy);
3830 			ccw = n;
3831 			if (++n == count)
3832 				break;
3833 		} while (1);
3834 		polygon_add_line(tor.polygon,
3835 				 &points[cw], &points[ccw],
3836 				 dx, dy);
3837 		assert(tor.polygon->num_edges <= 2*count);
3838 
3839 		tor_render(sna, &tor, &tmp, &clip,
3840 			   choose_span(&tmp, dst, maskFormat, &clip),
3841 			   !was_clear && maskFormat && !operator_is_bounded(op));
3842 
3843 		tor_fini(&tor);
3844 	} else {
3845 		struct tristrip_thread threads[num_threads];
3846 		int y, h, n;
3847 
3848 		DBG(("%s: using %d threads for tristrip compositing %dx%d\n",
3849 		     __FUNCTION__, num_threads,
3850 		     clip.extents.x2 - clip.extents.x1,
3851 		     clip.extents.y2 - clip.extents.y1));
3852 
3853 		threads[0].sna = sna;
3854 		threads[0].op = &tmp;
3855 		threads[0].points = points;
3856 		threads[0].count = count;
3857 		threads[0].extents = clip.extents;
3858 		threads[0].clip = &clip;
3859 		threads[0].dx = dx;
3860 		threads[0].dy = dy;
3861 		threads[0].draw_y = dst->pDrawable->y;
3862 		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
3863 		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
3864 
3865 		y = clip.extents.y1;
3866 		h = clip.extents.y2 - clip.extents.y1;
3867 		h = (h + num_threads - 1) / num_threads;
3868 		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
3869 
3870 		for (n = 1; n < num_threads; n++) {
3871 			threads[n] = threads[0];
3872 			threads[n].extents.y1 = y;
3873 			threads[n].extents.y2 = y += h;
3874 
3875 			sna_threads_run(n, tristrip_thread, &threads[n]);
3876 		}
3877 
3878 		assert(y < threads[0].extents.y2);
3879 		threads[0].extents.y1 = y;
3880 		tristrip_thread(&threads[0]);
3881 
3882 		sna_threads_wait();
3883 	}
3884 skip:
3885 	tmp.done(sna, &tmp);
3886 
3887 	REGION_UNINIT(NULL, &clip);
3888 	return true;
3889 }
3890