1 /*
2  * Copyright (c) 2007  David Turner
3  * Copyright (c) 2008  M Joonas Pihlaja
4  * Copyright (c) 2011 Intel Corporation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Chris Wilson <chris@chris-wilson.co.uk>
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "sna.h"
35 #include "sna_render.h"
36 #include "sna_render_inline.h"
37 #include "sna_trapezoids.h"
38 #include "fb/fbpict.h"
39 
40 #include <mipict.h>
41 
42 #undef FAST_SAMPLES_X
43 #undef FAST_SAMPLES_Y
44 
45 /* TODO: Emit unantialiased and MSAA triangles. */
46 
47 #ifndef MAX
48 #define MAX(x,y) ((x) >= (y) ? (x) : (y))
49 #endif
50 
51 #ifndef MIN
52 #define MIN(x,y) ((x) <= (y) ? (x) : (y))
53 #endif
54 
55 #define _GRID_TO_INT_FRAC(t, i, f, m) do {      \
56 	(i) = (t) / (m);                   \
57 	(f) = (t) % (m);                   \
58 	if ((f) < 0) {                     \
59 		--(i);                     \
60 		(f) += (m);                \
61 	}                                  \
62 } while (0)
63 
64 #define GRID_AREA (2*SAMPLES_X*SAMPLES_Y)
65 
pixman_fixed_to_grid_x(pixman_fixed_t v)66 static inline int pixman_fixed_to_grid_x(pixman_fixed_t v)
67 {
68 	return ((int64_t)v * SAMPLES_X + (1<<15)) >> 16;
69 }
70 
pixman_fixed_to_grid_y(pixman_fixed_t v)71 static inline int pixman_fixed_to_grid_y(pixman_fixed_t v)
72 {
73 	return ((int64_t)v * SAMPLES_Y + (1<<15)) >> 16;
74 }
75 
76 typedef void (*span_func_t)(struct sna *sna,
77 			    struct sna_composite_spans_op *op,
78 			    pixman_region16_t *clip,
79 			    const BoxRec *box,
80 			    int coverage);
81 
82 #if HAS_DEBUG_FULL
_assert_pixmap_contains_box(PixmapPtr pixmap,BoxPtr box,const char * function)83 static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
84 {
85 	if (box->x1 < 0 || box->y1 < 0 ||
86 	    box->x2 > pixmap->drawable.width ||
87 	    box->y2 > pixmap->drawable.height)
88 	{
89 		FatalError("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
90 			   function,
91 			   box->x1, box->y1, box->x2, box->y2,
92 			   pixmap->drawable.width,
93 			   pixmap->drawable.height);
94 	}
95 }
96 #define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
97 #else
98 #define assert_pixmap_contains_box(p, b)
99 #endif
100 
apply_damage(struct sna_composite_op * op,RegionPtr region)101 static void apply_damage(struct sna_composite_op *op, RegionPtr region)
102 {
103 	DBG(("%s: damage=%p, region=%dx[(%d, %d), (%d, %d)]\n",
104 	     __FUNCTION__, op->damage,
105 	     region_num_rects(region),
106 	     region->extents.x1, region->extents.y1,
107 	     region->extents.x2, region->extents.y2));
108 
109 	if (op->damage == NULL)
110 		return;
111 
112 	RegionTranslate(region, op->dst.x, op->dst.y);
113 
114 	assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
115 	sna_damage_add(op->damage, region);
116 }
117 
_apply_damage_box(struct sna_composite_op * op,const BoxRec * box)118 static void _apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
119 {
120 	BoxRec r;
121 
122 	r.x1 = box->x1 + op->dst.x;
123 	r.x2 = box->x2 + op->dst.x;
124 	r.y1 = box->y1 + op->dst.y;
125 	r.y2 = box->y2 + op->dst.y;
126 
127 	assert_pixmap_contains_box(op->dst.pixmap, &r);
128 	sna_damage_add_box(op->damage, &r);
129 }
130 
apply_damage_box(struct sna_composite_op * op,const BoxRec * box)131 inline static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
132 {
133 	if (op->damage)
134 		_apply_damage_box(op, box);
135 }
136 
137 #define SAMPLES_X_TO_INT_FRAC(x, i, f) \
138 	_GRID_TO_INT_FRAC(x, i, f, SAMPLES_X)
139 
140 #define AREA_TO_FLOAT(c)  ((c) / (float)GRID_AREA)
141 #define TO_ALPHA(c) (((c)+1) >> 1)
142 
143 struct quorem {
144 	int64_t quo;
145 	int64_t rem;
146 };
147 
148 struct edge {
149 	struct edge *next, *prev;
150 
151 	int dir;
152 
153 	int height_left;
154 
155 	int cell;
156 	struct quorem x;
157 
158 	/* Advance of the current x when moving down a subsample line. */
159 	struct quorem dxdy;
160 	int64_t dy;
161 
162 	/* The clipped y of the top of the edge. */
163 	int ytop;
164 
165 	/* y2-y1 after orienting the edge downwards.  */
166 };
167 
168 /* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
169 #define EDGE_Y_BUCKET_HEIGHT SAMPLES_Y
170 #define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
171 
172 /* A collection of sorted and vertically clipped edges of the polygon.
173  * Edges are moved from the polygon to an active list while scan
174  * converting. */
175 struct polygon {
176 	/* The vertical clip extents. */
177 	int ymin, ymax;
178 
179 	/* Array of edges all starting in the same bucket.	An edge is put
180 	 * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
181 	 * it is added to the polygon. */
182 	struct edge **y_buckets;
183 	struct edge *y_buckets_embedded[64];
184 
185 	struct edge edges_embedded[32];
186 	struct edge *edges;
187 	int num_edges;
188 };
189 
190 /* A cell records the effect on pixel coverage of polygon edges
191  * passing through a pixel.  It contains two accumulators of pixel
192  * coverage.
193  *
194  * Consider the effects of a polygon edge on the coverage of a pixel
195  * it intersects and that of the following one.  The coverage of the
196  * following pixel is the height of the edge multiplied by the width
197  * of the pixel, and the coverage of the pixel itself is the area of
198  * the trapezoid formed by the edge and the right side of the pixel.
199  *
200  * +-----------------------+-----------------------+
201  * |                       |                       |
202  * |                       |                       |
203  * |_______________________|_______________________|
204  * |   \...................|.......................|\
205  * |    \..................|.......................| |
206  * |     \.................|.......................| |
207  * |      \....covered.....|.......................| |
208  * |       \....area.......|.......................| } covered height
209  * |        \..............|.......................| |
210  * |uncovered\.............|.......................| |
211  * |  area    \............|.......................| |
212  * |___________\...........|.......................|/
213  * |                       |                       |
214  * |                       |                       |
215  * |                       |                       |
216  * +-----------------------+-----------------------+
217  *
218  * Since the coverage of the following pixel will always be a multiple
219  * of the width of the pixel, we can store the height of the covered
220  * area instead.  The coverage of the pixel itself is the total
221  * coverage minus the area of the uncovered area to the left of the
222  * edge.  As it's faster to compute the uncovered area we only store
223  * that and subtract it from the total coverage later when forming
224  * spans to blit.
225  *
226  * The heights and areas are signed, with left edges of the polygon
227  * having positive sign and right edges having negative sign.  When
228  * two edges intersect they swap their left/rightness so their
229  * contribution above and below the intersection point must be
230  * computed separately. */
231 struct cell {
232 	struct cell *next;
233 	int x;
234 	int16_t uncovered_area;
235 	int16_t covered_height;
236 };
237 
238 /* A cell list represents the scan line sparsely as cells ordered by
239  * ascending x.  It is geared towards scanning the cells in order
240  * using an internal cursor. */
241 struct cell_list {
242 	struct cell *cursor;
243 
244 	/* Points to the left-most cell in the scan line. */
245 	struct cell head, tail;
246 
247 	int16_t x1, x2;
248 	int16_t count, size;
249 	struct cell *cells;
250 	struct cell embedded[256];
251 };
252 
253 /* The active list contains edges in the current scan line ordered by
254  * the x-coordinate of the intercept of the edge and the scan line. */
255 struct active_list {
256 	/* Leftmost edge on the current scan line. */
257 	struct edge head, tail;
258 };
259 
260 struct tor {
261     struct polygon	polygon[1];
262     struct active_list	active[1];
263     struct cell_list	coverages[1];
264 
265     BoxRec extents;
266 };
267 
268 /* Rewinds the cell list's cursor to the beginning.  After rewinding
269  * we're good to cell_list_find() the cell any x coordinate. */
270 inline static void
cell_list_rewind(struct cell_list * cells)271 cell_list_rewind(struct cell_list *cells)
272 {
273 	cells->cursor = &cells->head;
274 }
275 
276 static bool
cell_list_init(struct cell_list * cells,int x1,int x2)277 cell_list_init(struct cell_list *cells, int x1, int x2)
278 {
279 	cells->tail.next = NULL;
280 	cells->tail.x = INT_MAX;
281 	cells->head.x = INT_MIN;
282 	cells->head.next = &cells->tail;
283 	cells->head.covered_height = 0;
284 	cell_list_rewind(cells);
285 	cells->count = 0;
286 	cells->x1 = x1;
287 	cells->x2 = x2;
288 	cells->size = x2 - x1 + 1;
289 	cells->cells = cells->embedded;
290 	if (cells->size > ARRAY_SIZE(cells->embedded))
291 		cells->cells = malloc(cells->size * sizeof(struct cell));
292 	return cells->cells != NULL;
293 }
294 
295 static void
cell_list_fini(struct cell_list * cells)296 cell_list_fini(struct cell_list *cells)
297 {
298 	if (cells->cells != cells->embedded)
299 		free(cells->cells);
300 }
301 
302 inline static void
cell_list_reset(struct cell_list * cells)303 cell_list_reset(struct cell_list *cells)
304 {
305 	cell_list_rewind(cells);
306 	cells->head.next = &cells->tail;
307 	cells->head.covered_height = 0;
308 	cells->count = 0;
309 }
310 
311 inline static struct cell *
cell_list_alloc(struct cell_list * cells,struct cell * tail,int x)312 cell_list_alloc(struct cell_list *cells,
313 		struct cell *tail,
314 		int x)
315 {
316 	struct cell *cell;
317 
318 	assert(cells->count < cells->size);
319 	cell = cells->cells + cells->count++;
320 	cell->next = tail->next;
321 	tail->next = cell;
322 
323 	cell->x = x;
324 	cell->covered_height = 0;
325 	cell->uncovered_area = 0;
326 	return cell;
327 }
328 
329 /* Find a cell at the given x-coordinate.  Returns %NULL if a new cell
330  * needed to be allocated but couldn't be.  Cells must be found with
331  * non-decreasing x-coordinate until the cell list is rewound using
332  * cell_list_rewind(). Ownership of the returned cell is retained by
333  * the cell list. */
334 inline static struct cell *
cell_list_find(struct cell_list * cells,int x)335 cell_list_find(struct cell_list *cells, int x)
336 {
337 	struct cell *tail;
338 
339 	if (x >= cells->x2)
340 		return &cells->tail;
341 
342 	if (x < cells->x1)
343 		return &cells->head;
344 
345 	tail = cells->cursor;
346 	if (tail->x == x)
347 		return tail;
348 
349 	do {
350 		if (tail->next->x > x)
351 			break;
352 
353 		tail = tail->next;
354 		if (tail->next->x > x)
355 			break;
356 
357 		tail = tail->next;
358 		if (tail->next->x > x)
359 			break;
360 
361 		tail = tail->next;
362 	} while (1);
363 
364 	if (tail->x != x)
365 		tail = cell_list_alloc(cells, tail, x);
366 
367 	return cells->cursor = tail;
368 }
369 
370 /* Add a subpixel span covering [x1, x2) to the coverage cells. */
371 inline static void
cell_list_add_subspan(struct cell_list * cells,int x1,int x2)372 cell_list_add_subspan(struct cell_list *cells, int x1, int x2)
373 {
374 	struct cell *cell;
375 	int ix1, fx1;
376 	int ix2, fx2;
377 
378 	if (x1 == x2)
379 		return;
380 
381 	SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
382 	SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
383 
384 	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
385 	       x1, ix1, fx1, x2, ix2, fx2));
386 
387 	cell = cell_list_find(cells, ix1);
388 	if (ix1 != ix2) {
389 		cell->uncovered_area += 2*fx1;
390 		++cell->covered_height;
391 
392 		cell = cell_list_find(cells, ix2);
393 		cell->uncovered_area -= 2*fx2;
394 		--cell->covered_height;
395 	} else
396 		cell->uncovered_area += 2*(fx1-fx2);
397 }
398 
399 inline static void
cell_list_add_span(struct cell_list * cells,int x1,int x2)400 cell_list_add_span(struct cell_list *cells, int x1, int x2)
401 {
402 	struct cell *cell;
403 	int ix1, fx1;
404 	int ix2, fx2;
405 
406 	SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
407 	SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
408 
409 	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
410 	       x1, ix1, fx1, x2, ix2, fx2));
411 
412 	cell = cell_list_find(cells, ix1);
413 	if (ix1 != ix2) {
414 		cell->uncovered_area += 2*fx1*SAMPLES_Y;
415 		cell->covered_height += SAMPLES_Y;
416 
417 		cell = cell_list_find(cells, ix2);
418 		cell->uncovered_area -= 2*fx2*SAMPLES_Y;
419 		cell->covered_height -= SAMPLES_Y;
420 	} else
421 		cell->uncovered_area += 2*(fx1-fx2)*SAMPLES_Y;
422 }
423 
424 static void
polygon_fini(struct polygon * polygon)425 polygon_fini(struct polygon *polygon)
426 {
427 	if (polygon->y_buckets != polygon->y_buckets_embedded)
428 		free(polygon->y_buckets);
429 
430 	if (polygon->edges != polygon->edges_embedded)
431 		free(polygon->edges);
432 }
433 
434 static bool
polygon_init(struct polygon * polygon,int num_edges,int ymin,int ymax)435 polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
436 {
437 	unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
438 
439 	if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
440 		return false;
441 
442 	polygon->edges = polygon->edges_embedded;
443 	polygon->y_buckets = polygon->y_buckets_embedded;
444 
445 	polygon->num_edges = 0;
446 	if (num_edges > (int)ARRAY_SIZE(polygon->edges_embedded)) {
447 		polygon->edges = malloc(sizeof(struct edge)*num_edges);
448 		if (unlikely(NULL == polygon->edges))
449 			goto bail_no_mem;
450 	}
451 
452 	if (num_buckets >= ARRAY_SIZE(polygon->y_buckets_embedded)) {
453 		polygon->y_buckets = malloc((1+num_buckets)*sizeof(struct edge *));
454 		if (unlikely(NULL == polygon->y_buckets))
455 			goto bail_no_mem;
456 	}
457 	memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
458 	polygon->y_buckets[num_buckets] = (void *)-1;
459 
460 	polygon->ymin = ymin;
461 	polygon->ymax = ymax;
462 	return true;
463 
464 bail_no_mem:
465 	polygon_fini(polygon);
466 	return false;
467 }
468 
469 static void
_polygon_insert_edge_into_its_y_bucket(struct polygon * polygon,struct edge * e)470 _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
471 {
472 	unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
473 	struct edge **ptail = &polygon->y_buckets[ix];
474 	assert(e->ytop < polygon->ymax);
475 	e->next = *ptail;
476 	*ptail = e;
477 }
478 
edge_to_cell(struct edge * e)479 static inline int edge_to_cell(struct edge *e)
480 {
481 	int x = e->x.quo;
482 	if (e->x.rem > e->dy/2)
483 		x++;
484 	__DBG(("%s: %lld.%lld -> %d\n",
485 	       __FUNCTION__, e->x.quo, e->x.rem, x));
486 	return x;
487 }
488 
edge_advance(struct edge * e)489 static inline int edge_advance(struct edge *e)
490 {
491 	__DBG(("%s: %lld.%lld + %lld.%lld\n",
492 	       __FUNCTION__, e->x.quo, e->x.rem, e->dxdy.quo, e->dxdy.rem));
493 
494 	e->x.quo += e->dxdy.quo;
495 	e->x.rem += e->dxdy.rem;
496 	if (e->x.rem < 0) {
497 		e->x.quo--;
498 		e->x.rem += e->dy;
499 	} else if (e->x.rem >= e->dy) {
500 		e->x.quo++;
501 		e->x.rem -= e->dy;
502 	}
503 	assert(e->x.rem >= 0 && e->x.rem < e->dy);
504 	return edge_to_cell(e);
505 }
506 
507 inline static void
polygon_add_edge(struct polygon * polygon,const xTrapezoid * t,const xLineFixed * edge,int dir,int dx,int dy)508 polygon_add_edge(struct polygon *polygon,
509 		 const xTrapezoid *t,
510 		 const xLineFixed *edge,
511 		 int dir, int dx, int dy)
512 {
513 	struct edge *e = &polygon->edges[polygon->num_edges];
514 	const int ymin = polygon->ymin;
515 	const int ymax = polygon->ymax;
516 	int ytop, ybot;
517 
518 	assert(t->bottom > t->top);
519 	assert(edge->p2.y > edge->p1.y);
520 
521 	ytop = pixman_fixed_to_grid_y(t->top) + dy;
522 	if (ytop < ymin)
523 		ytop = ymin;
524 
525 	ybot = pixman_fixed_to_grid_y(t->bottom) + dy;
526 	if (ybot > ymax)
527 		ybot = ymax;
528 
529 	__DBG(("%s: dx=(%d, %d), y=[%d, %d] +%d, -%d\n",
530 	       __FUNCTION__, dx, dy, ytop, ybot,
531 	       ((int64_t)(ytop - dy)<<16) / SAMPLES_Y - edge->p1.y,
532 	       ((int64_t)(ybot - dy)<<16) / SAMPLES_Y - edge->p2.y));
533 
534 	e->ytop = ytop;
535 	e->height_left = ybot - ytop;
536 	if (e->height_left <= 0)
537 		return;
538 
539 	if (pixman_fixed_to_grid_x(edge->p1.x) ==
540 	    pixman_fixed_to_grid_x(edge->p2.x)) {
541 		e->cell = pixman_fixed_to_grid_x(edge->p1.x) + dx;
542 		e->x.quo = e->x.rem = 0;
543 		e->dxdy.quo = e->dxdy.rem = 0;
544 		e->dy = 0;
545 	} else {
546 		int64_t Ey, Ex, tmp;
547 
548 		__DBG(("%s: add diagonal edge (%d, %d) -> (%d, %d) [(%d, %d)]\n",
549 
550 		       __FUNCTION__,
551 		       edge->p1.x, edge->p1.y,
552 		       edge->p2.x, edge->p2.y,
553 		       edge->p2.x - edge->p1.x,
554 		       edge->p2.y - edge->p1.y));
555 
556 		Ex = ((int64_t)edge->p2.x - edge->p1.x) * SAMPLES_X;
557 		Ey = ((int64_t)edge->p2.y - edge->p1.y) * SAMPLES_Y * (2 << 16);
558 		assert(Ey > 0);
559 		e->dxdy.quo = Ex * (2 << 16) / Ey;
560 		e->dxdy.rem = Ex * (2 << 16) % Ey;
561 
562 		tmp = (int64_t)(2*(ytop - dy) + 1) << 16;
563 		tmp -= (int64_t)edge->p1.y * SAMPLES_Y*2;
564 		tmp *= Ex;
565 		e->x.quo = tmp / Ey;
566 		e->x.rem = tmp % Ey;
567 
568 		tmp = (int64_t)edge->p1.x * SAMPLES_X;
569 		e->x.quo += (tmp >> 16) + dx;
570 		tmp &= (1 << 16) - 1;
571 		if (tmp) {
572 			if (Ey < INT64_MAX >> 16)
573 				tmp = (tmp * Ey) / (1 << 16);
574 			else /* Handle overflow by losing precision */
575 				tmp = tmp * (Ey / (1 << 16));
576 			e->x.rem += tmp;
577 		}
578 
579 		if (e->x.rem < 0) {
580 			e->x.quo--;
581 			e->x.rem += Ey;
582 		} else if (e->x.rem >= Ey) {
583 			e->x.quo++;
584 			e->x.rem -= Ey;
585 		}
586 		assert(e->x.rem >= 0 && e->x.rem < Ey);
587 
588 		e->dy = Ey;
589 		e->cell = edge_to_cell(e);
590 
591 		__DBG(("%s: x=%lld.%lld + %lld.%lld %lld -> cell=%d\n",
592 		       __FUNCTION__,
593 		       (long long)e->x.quo,
594 		       (long long)e->x.rem,
595 		       (long long)e->dxdy.quo,
596 		       (long long)e->dxdy.rem,
597 		       (long long)Ey, e->cell));
598 	}
599 
600 	e->dir = dir;
601 
602 	_polygon_insert_edge_into_its_y_bucket(polygon, e);
603 	polygon->num_edges++;
604 }
605 
606 inline static void
polygon_add_line(struct polygon * polygon,const xPointFixed * p1,const xPointFixed * p2,int dx,int dy)607 polygon_add_line(struct polygon *polygon,
608 		 const xPointFixed *p1,
609 		 const xPointFixed *p2,
610 		 int dx, int dy)
611 {
612 	struct edge *e = &polygon->edges[polygon->num_edges];
613 	int ytop, ybot;
614 
615 	if (p1->y == p2->y)
616 		return;
617 
618 	__DBG(("%s: line=(%d, %d), (%d, %d)\n",
619 	       __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
620 
621 	e->dir = 1;
622 	if (p2->y < p1->y) {
623 		const xPointFixed *t;
624 
625 		e->dir = -1;
626 
627 		t = p1;
628 		p1 = p2;
629 		p2 = t;
630 	}
631 
632 	ytop = pixman_fixed_to_grid_y(p1->y) + dy;
633 	if (ytop < polygon->ymin)
634 		ytop = polygon->ymin;
635 
636 	ybot = pixman_fixed_to_grid_y(p2->y) + dy;
637 	if (ybot > polygon->ymax)
638 		ybot = polygon->ymax;
639 
640 	if (ybot <= ytop)
641 		return;
642 
643 	e->ytop = ytop;
644 	e->height_left = ybot - ytop;
645 	if (e->height_left <= 0)
646 		return;
647 
648 	__DBG(("%s: edge height=%d\n", __FUNCTION__, e->dir * e->height_left));
649 
650 	if (pixman_fixed_to_grid_x(p1->x) == pixman_fixed_to_grid_x(p2->x)) {
651 		e->cell = pixman_fixed_to_grid_x(p1->x);
652 		e->x.quo = e->x.rem = 0;
653 		e->dxdy.quo = e->dxdy.rem = 0;
654 		e->dy = 0;
655 	} else {
656 		int64_t Ey, Ex, tmp;
657 
658 		__DBG(("%s: add diagonal line (%d, %d) -> (%d, %d) [(%d, %d)]\n",
659 
660 		       __FUNCTION__,
661 		       p1->x, p1->y,
662 		       p2->x, p2->y,
663 		       p2->x - p1->x,
664 		       p2->y - p1->y));
665 
666 		Ex = ((int64_t)p2->x - p1->x) * SAMPLES_X;
667 		Ey = ((int64_t)p2->y - p1->y) * SAMPLES_Y * (2 << 16);
668 		e->dxdy.quo = Ex * (2 << 16) / Ey;
669 		e->dxdy.rem = Ex * (2 << 16) % Ey;
670 
671 		tmp = (int64_t)(2*(ytop - dy) + 1) << 16;
672 		tmp -= (int64_t)p1->y * SAMPLES_Y*2;
673 		tmp *= Ex;
674 		e->x.quo = tmp / Ey;
675 		e->x.rem = tmp % Ey;
676 
677 		tmp = (int64_t)p1->x * SAMPLES_X;
678 		e->x.quo += (tmp >> 16) + dx;
679 		e->x.rem += ((tmp & ((1 << 16) - 1)) * Ey) / (1 << 16);
680 
681 		if (e->x.rem < 0) {
682 			e->x.quo--;
683 			e->x.rem += Ey;
684 		} else if (e->x.rem >= Ey) {
685 			e->x.quo++;
686 			e->x.rem -= Ey;
687 		}
688 		assert(e->x.rem >= 0 && e->x.rem < Ey);
689 
690 		e->dy = Ey;
691 		e->cell = edge_to_cell(e);
692 
693 		__DBG(("%s: x=%lld.%lld + %lld.%lld %lld -> cell=%d\n",
694 		       __FUNCTION__,
695 		       (long long)e->x.quo,
696 		       (long long)e->x.rem,
697 		       (long long)e->dxdy.quo,
698 		       (long long)e->dxdy.rem,
699 		       (long long)Ey, e->cell));
700 	}
701 
702 	if (polygon->num_edges > 0) {
703 		struct edge *prev = &polygon->edges[polygon->num_edges-1];
704 		/* detect degenerate triangles inserted into tristrips */
705 		if (e->dir == -prev->dir &&
706 		    e->ytop == prev->ytop &&
707 		    e->height_left == prev->height_left &&
708 		    e->cell == prev->cell &&
709 		    e->x.quo == prev->x.quo &&
710 		    e->x.rem == prev->x.rem &&
711 		    e->dxdy.quo == prev->dxdy.quo &&
712 		    e->dxdy.rem == prev->dxdy.rem) {
713 			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
714 							  polygon->ymin);
715 			polygon->y_buckets[ix] = prev->next;
716 			polygon->num_edges--;
717 			return;
718 		}
719 	}
720 
721 	_polygon_insert_edge_into_its_y_bucket(polygon, e);
722 	polygon->num_edges++;
723 }
724 
725 static void
active_list_reset(struct active_list * active)726 active_list_reset(struct active_list *active)
727 {
728 	active->head.height_left = INT_MAX;
729 	active->head.x.quo = INT_MIN;
730 	active->head.cell = INT_MIN;
731 	active->head.dy = 0;
732 	active->head.prev = NULL;
733 	active->head.next = &active->tail;
734 	active->tail.prev = &active->head;
735 	active->tail.next = NULL;
736 	active->tail.x.quo = INT_MAX;
737 	active->tail.cell = INT_MAX;
738 	active->tail.height_left = INT_MAX;
739 	active->tail.dy = 0;
740 }
741 
742 static struct edge *
merge_sorted_edges(struct edge * head_a,struct edge * head_b)743 merge_sorted_edges(struct edge *head_a, struct edge *head_b)
744 {
745 	struct edge *head, **next, *prev;
746 	int32_t x;
747 
748 	if (head_b == NULL)
749 		return head_a;
750 
751 	prev = head_a->prev;
752 	next = &head;
753 	if (head_a->cell <= head_b->cell) {
754 		head = head_a;
755 	} else {
756 		head = head_b;
757 		head_b->prev = prev;
758 		goto start_with_b;
759 	}
760 
761 	do {
762 		x = head_b->cell;
763 		while (head_a != NULL && head_a->cell <= x) {
764 			prev = head_a;
765 			next = &head_a->next;
766 			head_a = head_a->next;
767 		}
768 
769 		head_b->prev = prev;
770 		*next = head_b;
771 		if (head_a == NULL)
772 			return head;
773 
774 start_with_b:
775 		x = head_a->cell;
776 		while (head_b != NULL && head_b->cell <= x) {
777 			prev = head_b;
778 			next = &head_b->next;
779 			head_b = head_b->next;
780 		}
781 
782 		head_a->prev = prev;
783 		*next = head_a;
784 		if (head_b == NULL)
785 			return head;
786 	} while (1);
787 }
788 
789 static struct edge *
sort_edges(struct edge * list,unsigned int level,struct edge ** head_out)790 sort_edges(struct edge  *list,
791 	   unsigned int  level,
792 	   struct edge **head_out)
793 {
794 	struct edge *head_other, *remaining;
795 	unsigned int i;
796 
797 	head_other = list->next;
798 	if (head_other == NULL) {
799 		*head_out = list;
800 		return NULL;
801 	}
802 
803 	remaining = head_other->next;
804 	if (list->cell <= head_other->cell) {
805 		*head_out = list;
806 		head_other->next = NULL;
807 	} else {
808 		*head_out = head_other;
809 		head_other->prev = list->prev;
810 		head_other->next = list;
811 		list->prev = head_other;
812 		list->next = NULL;
813 	}
814 
815 	for (i = 0; i < level && remaining; i++) {
816 		remaining = sort_edges(remaining, i, &head_other);
817 		*head_out = merge_sorted_edges(*head_out, head_other);
818 	}
819 
820 	return remaining;
821 }
822 
filter(struct edge * edges)823 static struct edge *filter(struct edge *edges)
824 {
825 	struct edge *e;
826 
827 	e = edges;
828 	while (e->next) {
829 		struct edge *n = e->next;
830 		if (e->dir == -n->dir &&
831 		    e->height_left == n->height_left &&
832 		    e->cell == n->cell &&
833 		    e->x.quo == n->x.quo &&
834 		    e->x.rem == n->x.rem &&
835 		    e->dxdy.quo == n->dxdy.quo &&
836 		    e->dxdy.rem == n->dxdy.rem) {
837 			if (e->prev)
838 				e->prev->next = n->next;
839 			else
840 				edges = n->next;
841 			if (n->next)
842 				n->next->prev = e->prev;
843 			else
844 				break;
845 
846 			e = n->next;
847 		} else
848 			e = n;
849 	}
850 
851 	return edges;
852 }
853 
854 static struct edge *
merge_unsorted_edges(struct edge * head,struct edge * unsorted)855 merge_unsorted_edges(struct edge *head, struct edge *unsorted)
856 {
857 	sort_edges(unsorted, UINT_MAX, &unsorted);
858 	return merge_sorted_edges(head, filter(unsorted));
859 }
860 
861 /* Test if the edges on the active list can be safely advanced by a
862  * full row without intersections or any edges ending. */
863 inline static int
can_full_step(struct active_list * active)864 can_full_step(struct active_list *active)
865 {
866 	const struct edge *e;
867 	int min_height = INT_MAX;
868 
869 	assert(active->head.next != &active->tail);
870 	for (e = active->head.next; &active->tail != e; e = e->next) {
871 		assert(e->height_left > 0);
872 
873 		if (e->dy != 0)
874 			return 0;
875 
876 		if (e->height_left < min_height) {
877 			min_height = e->height_left;
878 			if (min_height < SAMPLES_Y)
879 				return 0;
880 		}
881 	}
882 
883 	return min_height;
884 }
885 
886 inline static void
merge_edges(struct active_list * active,struct edge * edges)887 merge_edges(struct active_list *active, struct edge *edges)
888 {
889 	active->head.next = merge_unsorted_edges(active->head.next, edges);
890 }
891 
892 inline static void
fill_buckets(struct active_list * active,struct edge * edge,int ymin,struct edge ** buckets)893 fill_buckets(struct active_list *active,
894 	     struct edge *edge,
895 	     int ymin,
896 	     struct edge **buckets)
897 {
898 	while (edge) {
899 		struct edge *next = edge->next;
900 		struct edge **b = &buckets[edge->ytop - ymin];
901 		if (*b)
902 			(*b)->prev = edge;
903 		edge->next = *b;
904 		edge->prev = NULL;
905 		*b = edge;
906 		edge = next;
907 	}
908 }
909 
910 inline static void
nonzero_subrow(struct active_list * active,struct cell_list * coverages)911 nonzero_subrow(struct active_list *active, struct cell_list *coverages)
912 {
913 	struct edge *edge = active->head.next;
914 	int prev_x = INT_MIN;
915 	int winding = 0, xstart = edge->cell;
916 
917 	cell_list_rewind(coverages);
918 
919 	while (&active->tail != edge) {
920 		struct edge *next = edge->next;
921 
922 		winding += edge->dir;
923 		if (0 == winding && edge->next->cell != edge->cell) {
924 			cell_list_add_subspan(coverages, xstart, edge->cell);
925 			xstart = edge->next->cell;
926 		}
927 
928 		assert(edge->height_left > 0);
929 		if (--edge->height_left) {
930 			if (edge->dy)
931 				edge->cell = edge_advance(edge);
932 
933 			if (edge->cell < prev_x) {
934 				struct edge *pos = edge->prev;
935 				pos->next = next;
936 				next->prev = pos;
937 				do {
938 					pos = pos->prev;
939 				} while (edge->cell < pos->cell);
940 				pos->next->prev = edge;
941 				edge->next = pos->next;
942 				edge->prev = pos;
943 				pos->next = edge;
944 			} else
945 				prev_x = edge->cell;
946 		} else {
947 			edge->prev->next = next;
948 			next->prev = edge->prev;
949 		}
950 
951 		edge = next;
952 	}
953 }
954 
955 static void
nonzero_row(struct active_list * active,struct cell_list * coverages)956 nonzero_row(struct active_list *active, struct cell_list *coverages)
957 {
958 	struct edge *left = active->head.next;
959 
960 	while (&active->tail != left) {
961 		struct edge *right;
962 		int winding = left->dir;
963 
964 		left->height_left -= SAMPLES_Y;
965 		assert(left->height_left >= 0);
966 		if (!left->height_left) {
967 			left->prev->next = left->next;
968 			left->next->prev = left->prev;
969 		}
970 
971 		right = left->next;
972 		do {
973 			right->height_left -= SAMPLES_Y;
974 			assert(right->height_left >= 0);
975 			if (!right->height_left) {
976 				right->prev->next = right->next;
977 				right->next->prev = right->prev;
978 			}
979 
980 			winding += right->dir;
981 			if (0 == winding)
982 				break;
983 
984 			right = right->next;
985 		} while (1);
986 
987 		cell_list_add_span(coverages, left->cell, right->cell);
988 		left = right->next;
989 	}
990 }
991 
992 static void
tor_fini(struct tor * converter)993 tor_fini(struct tor *converter)
994 {
995 	polygon_fini(converter->polygon);
996 	cell_list_fini(converter->coverages);
997 }
998 
999 static bool
tor_init(struct tor * converter,const BoxRec * box,int num_edges)1000 tor_init(struct tor *converter, const BoxRec *box, int num_edges)
1001 {
1002 	__DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
1003 	       __FUNCTION__,
1004 	       box->x1, box->y1, box->x2, box->y2,
1005 	       SAMPLES_X, SAMPLES_Y,
1006 	       num_edges));
1007 
1008 	converter->extents = *box;
1009 
1010 	if (!cell_list_init(converter->coverages, box->x1, box->x2))
1011 		return false;
1012 
1013 	active_list_reset(converter->active);
1014 	if (!polygon_init(converter->polygon, num_edges,
1015 			  (int)box->y1 * SAMPLES_Y, (int)box->y2 * SAMPLES_Y)) {
1016 		cell_list_fini(converter->coverages);
1017 		return false;
1018 	}
1019 
1020 	return true;
1021 }
1022 
1023 static void
tor_add_trapezoid(struct tor * tor,const xTrapezoid * t,int dx,int dy)1024 tor_add_trapezoid(struct tor *tor, const xTrapezoid *t, int dx, int dy)
1025 {
1026 	if (!xTrapezoidValid(t)) {
1027 		__DBG(("%s: skipping invalid trapezoid: top=%d, bottom=%d, left=(%d, %d), (%d, %d), right=(%d, %d), (%d, %d)\n",
1028 		       __FUNCTION__,
1029 		       t->top, t->bottom,
1030 		       t->left.p1.x, t->left.p1.y,
1031 		       t->left.p2.x, t->left.p2.y,
1032 		       t->right.p1.x, t->right.p1.y,
1033 		       t->right.p2.x, t->right.p2.y));
1034 		return;
1035 	}
1036 	polygon_add_edge(tor->polygon, t, &t->left, 1, dx, dy);
1037 	polygon_add_edge(tor->polygon, t, &t->right, -1, dx, dy);
1038 }
1039 
1040 static void
step_edges(struct active_list * active,int count)1041 step_edges(struct active_list *active, int count)
1042 {
1043 	struct edge *edge;
1044 
1045 	count *= SAMPLES_Y;
1046 	for (edge = active->head.next; edge != &active->tail; edge = edge->next) {
1047 		edge->height_left -= count;
1048 		assert(edge->height_left >= 0);
1049 		if (!edge->height_left) {
1050 			edge->prev->next = edge->next;
1051 			edge->next->prev = edge->prev;
1052 		}
1053 	}
1054 }
1055 
1056 static void
tor_blt_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1057 tor_blt_span(struct sna *sna,
1058 	     struct sna_composite_spans_op *op,
1059 	     pixman_region16_t *clip,
1060 	     const BoxRec *box,
1061 	     int coverage)
1062 {
1063 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1064 
1065 	op->box(sna, op, box, AREA_TO_FLOAT(coverage));
1066 	apply_damage_box(&op->base, box);
1067 }
1068 
1069 static void
tor_blt_span__no_damage(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1070 tor_blt_span__no_damage(struct sna *sna,
1071 			struct sna_composite_spans_op *op,
1072 			pixman_region16_t *clip,
1073 			const BoxRec *box,
1074 			int coverage)
1075 {
1076 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1077 
1078 	op->box(sna, op, box, AREA_TO_FLOAT(coverage));
1079 }
1080 
1081 static void
tor_blt_span_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1082 tor_blt_span_clipped(struct sna *sna,
1083 		     struct sna_composite_spans_op *op,
1084 		     pixman_region16_t *clip,
1085 		     const BoxRec *box,
1086 		     int coverage)
1087 {
1088 	pixman_region16_t region;
1089 	float opacity;
1090 
1091 	opacity = AREA_TO_FLOAT(coverage);
1092 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
1093 
1094 	pixman_region_init_rects(&region, box, 1);
1095 	RegionIntersect(&region, &region, clip);
1096 	if (region_num_rects(&region)) {
1097 		op->boxes(sna, op,
1098 			  region_rects(&region),
1099 			  region_num_rects(&region),
1100 			  opacity);
1101 		apply_damage(&op->base, &region);
1102 	}
1103 	pixman_region_fini(&region);
1104 }
1105 
1106 static void
tor_blt(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int y,int height,int unbounded)1107 tor_blt(struct sna *sna,
1108 	struct tor *converter,
1109 	struct sna_composite_spans_op *op,
1110 	pixman_region16_t *clip,
1111 	void (*span)(struct sna *sna,
1112 		     struct sna_composite_spans_op *op,
1113 		     pixman_region16_t *clip,
1114 		     const BoxRec *box,
1115 		     int coverage),
1116 	int y, int height,
1117 	int unbounded)
1118 {
1119 	struct cell_list *cells = converter->coverages;
1120 	struct cell *cell;
1121 	BoxRec box;
1122 	int cover;
1123 
1124 	box.y1 = y + converter->extents.y1;
1125 	box.y2 = box.y1 + height;
1126 	assert(box.y2 <= converter->extents.y2);
1127 	box.x1 = converter->extents.x1;
1128 
1129 	/* Form the spans from the coverages and areas. */
1130 	cover = cells->head.covered_height*SAMPLES_X*2;
1131 	assert(cover >= 0);
1132 	for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
1133 		int x = cell->x;
1134 
1135 		assert(x >= converter->extents.x1);
1136 		assert(x < converter->extents.x2);
1137 		__DBG(("%s: cell=(%d, %d, %d), cover=%d\n", __FUNCTION__,
1138 		       cell->x, cell->covered_height, cell->uncovered_area,
1139 		       cover));
1140 
1141 		if (cell->covered_height || cell->uncovered_area) {
1142 			box.x2 = x;
1143 			if (box.x2 > box.x1 && (unbounded || cover)) {
1144 				__DBG(("%s: end span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1145 				       box.x1, box.y1,
1146 				       box.x2 - box.x1,
1147 				       box.y2 - box.y1,
1148 				       cover));
1149 				span(sna, op, clip, &box, cover);
1150 			}
1151 			box.x1 = box.x2;
1152 			cover += cell->covered_height*SAMPLES_X*2;
1153 		}
1154 
1155 		if (cell->uncovered_area) {
1156 			int area = cover - cell->uncovered_area;
1157 			box.x2 = x + 1;
1158 			if (unbounded || area) {
1159 				__DBG(("%s: new span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1160 				       box.x1, box.y1,
1161 				       box.x2 - box.x1,
1162 				       box.y2 - box.y1,
1163 				       area));
1164 				span(sna, op, clip, &box, area);
1165 			}
1166 			box.x1 = box.x2;
1167 		}
1168 	}
1169 
1170 	box.x2 = converter->extents.x2;
1171 	if (box.x2 > box.x1 && (unbounded || cover)) {
1172 		__DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
1173 		       box.x1, box.y1,
1174 		       box.x2 - box.x1,
1175 		       box.y2 - box.y1,
1176 		       cover));
1177 		span(sna, op, clip, &box, cover);
1178 	}
1179 }
1180 
1181 flatten static void
tor_render(struct sna * sna,struct tor * converter,struct sna_composite_spans_op * op,pixman_region16_t * clip,void (* span)(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage),int unbounded)1182 tor_render(struct sna *sna,
1183 	   struct tor *converter,
1184 	   struct sna_composite_spans_op *op,
1185 	   pixman_region16_t *clip,
1186 	   void (*span)(struct sna *sna,
1187 			struct sna_composite_spans_op *op,
1188 			pixman_region16_t *clip,
1189 			const BoxRec *box,
1190 			int coverage),
1191 	   int unbounded)
1192 {
1193 	struct polygon *polygon = converter->polygon;
1194 	struct cell_list *coverages = converter->coverages;
1195 	struct active_list *active = converter->active;
1196 	struct edge *buckets[SAMPLES_Y] = { 0 };
1197 	int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
1198 
1199 	__DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
1200 
1201 	/* Render each pixel row. */
1202 	for (i = 0; i < h; i = j) {
1203 		int do_full_step = 0;
1204 
1205 		j = i + 1;
1206 
1207 		/* Determine if we can ignore this row or use the full pixel
1208 		 * stepper. */
1209 		if (polygon->y_buckets[i] == NULL) {
1210 			if (active->head.next == &active->tail) {
1211 				for (; polygon->y_buckets[j] == NULL; j++)
1212 					;
1213 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1214 				       __FUNCTION__, i, j));
1215 
1216 				assert(j <= h);
1217 				if (unbounded) {
1218 					BoxRec box;
1219 
1220 					box = converter->extents;
1221 					box.y1 += i;
1222 					box.y2 = converter->extents.y1 + j;
1223 
1224 					span(sna, op, clip, &box, 0);
1225 				}
1226 				continue;
1227 			}
1228 
1229 			do_full_step = can_full_step(active);
1230 		}
1231 
1232 		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
1233 		       __FUNCTION__, i, do_full_step,
1234 		       polygon->y_buckets[i] != NULL));
1235 		if (do_full_step) {
1236 			nonzero_row(active, coverages);
1237 
1238 			while (polygon->y_buckets[j] == NULL &&
1239 			       do_full_step >= 2*SAMPLES_Y) {
1240 				do_full_step -= SAMPLES_Y;
1241 				j++;
1242 			}
1243 			assert(j >= i + 1 && j <= h);
1244 			if (j != i + 1)
1245 				step_edges(active, j - (i + 1));
1246 
1247 			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1248 			       __FUNCTION__,  i, j));
1249 		} else {
1250 			int suby;
1251 
1252 			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
1253 
1254 			/* Subsample this row. */
1255 			for (suby = 0; suby < SAMPLES_Y; suby++) {
1256 				if (buckets[suby]) {
1257 					merge_edges(active, buckets[suby]);
1258 					buckets[suby] = NULL;
1259 				}
1260 
1261 				nonzero_subrow(active, coverages);
1262 			}
1263 		}
1264 
1265 		assert(j > i);
1266 		tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
1267 		cell_list_reset(coverages);
1268 	}
1269 }
1270 
1271 static void
inplace_row(struct active_list * active,uint8_t * row,int width)1272 inplace_row(struct active_list *active, uint8_t *row, int width)
1273 {
1274 	struct edge *left = active->head.next;
1275 
1276 	while (&active->tail != left) {
1277 		struct edge *right;
1278 		int winding = left->dir;
1279 		int lfx, rfx;
1280 		int lix, rix;
1281 
1282 		left->height_left -= SAMPLES_Y;
1283 		assert(left->height_left >= 0);
1284 		if (!left->height_left) {
1285 			left->prev->next = left->next;
1286 			left->next->prev = left->prev;
1287 		}
1288 
1289 		right = left->next;
1290 		do {
1291 			right->height_left -= SAMPLES_Y;
1292 			assert(right->height_left >= 0);
1293 			if (!right->height_left) {
1294 				right->prev->next = right->next;
1295 				right->next->prev = right->prev;
1296 			}
1297 
1298 			winding += right->dir;
1299 			if (0 == winding && right->cell != right->next->cell)
1300 				break;
1301 
1302 			right = right->next;
1303 		} while (1);
1304 
1305 		if (left->cell < 0) {
1306 			lix = lfx = 0;
1307 		} else if (left->cell >= width * SAMPLES_X) {
1308 			lix = width;
1309 			lfx = 0;
1310 		} else
1311 			SAMPLES_X_TO_INT_FRAC(left->cell, lix, lfx);
1312 
1313 		if (right->cell < 0) {
1314 			rix = rfx = 0;
1315 		} else if (right->cell >= width * SAMPLES_X) {
1316 			rix = width;
1317 			rfx = 0;
1318 		} else
1319 			SAMPLES_X_TO_INT_FRAC(right->cell, rix, rfx);
1320 		if (lix == rix) {
1321 			if (rfx != lfx) {
1322 				assert(lix < width);
1323 				row[lix] += (rfx-lfx) * SAMPLES_Y;
1324 			}
1325 		} else {
1326 			assert(lix < width);
1327 			if (lfx == 0)
1328 				row[lix] = 0xff;
1329 			else
1330 				row[lix] += 255 - lfx * SAMPLES_Y;
1331 
1332 			assert(rix <= width);
1333 			if (rfx) {
1334 				assert(rix < width);
1335 				row[rix] += rfx * SAMPLES_Y;
1336 			}
1337 
1338 			if (rix > ++lix) {
1339 				uint8_t *r = row + lix;
1340 				rix -= lix;
1341 #if 0
1342 				if (rix == 1)
1343 					*row = 0xff;
1344 				else
1345 					memset(row, 0xff, rix);
1346 #else
1347 				if ((uintptr_t)r & 1 && rix) {
1348 					*r++ = 0xff;
1349 					rix--;
1350 				}
1351 				if ((uintptr_t)r & 2 && rix >= 2) {
1352 					*(uint16_t *)r = 0xffff;
1353 					r += 2;
1354 					rix -= 2;
1355 				}
1356 				if ((uintptr_t)r & 4 && rix >= 4) {
1357 					*(uint32_t *)r = 0xffffffff;
1358 					r += 4;
1359 					rix -= 4;
1360 				}
1361 				while (rix >= 8) {
1362 					*(uint64_t *)r = 0xffffffffffffffff;
1363 					r += 8;
1364 					rix -= 8;
1365 				}
1366 				if (rix & 4) {
1367 					*(uint32_t *)r = 0xffffffff;
1368 					r += 4;
1369 				}
1370 				if (rix & 2) {
1371 					*(uint16_t *)r = 0xffff;
1372 					r += 2;
1373 				}
1374 				if (rix & 1)
1375 					*r = 0xff;
1376 #endif
1377 			}
1378 		}
1379 
1380 		left = right->next;
1381 	}
1382 }
1383 
1384 inline static void
inplace_subrow(struct active_list * active,int8_t * row,int width)1385 inplace_subrow(struct active_list *active, int8_t *row, int width)
1386 {
1387 	struct edge *edge = active->head.next;
1388 	int prev_x = INT_MIN;
1389 
1390 	while (&active->tail != edge) {
1391 		struct edge *next = edge->next;
1392 		int winding = edge->dir;
1393 		int lfx, rfx;
1394 		int lix, rix;
1395 
1396 		if (edge->cell < 0) {
1397 			lix = lfx = 0;
1398 		} else if (edge->cell >= width * SAMPLES_X) {
1399 			lix = width;
1400 			lfx = 0;
1401 		} else
1402 			SAMPLES_X_TO_INT_FRAC(edge->cell, lix, lfx);
1403 
1404 		assert(edge->height_left > 0);
1405 		if (--edge->height_left) {
1406 			if (edge->dy)
1407 				edge->cell = edge_advance(edge);
1408 
1409 			if (edge->cell < prev_x) {
1410 				struct edge *pos = edge->prev;
1411 				pos->next = next;
1412 				next->prev = pos;
1413 				do {
1414 					pos = pos->prev;
1415 				} while (edge->cell < pos->cell);
1416 				pos->next->prev = edge;
1417 				edge->next = pos->next;
1418 				edge->prev = pos;
1419 				pos->next = edge;
1420 			} else
1421 				prev_x = edge->cell;
1422 		} else {
1423 			edge->prev->next = next;
1424 			next->prev = edge->prev;
1425 		}
1426 
1427 		edge = next;
1428 		do {
1429 			next = edge->next;
1430 			winding += edge->dir;
1431 			if (0 == winding && edge->cell != next->cell)
1432 				break;
1433 
1434 			assert(edge->height_left > 0);
1435 			if (--edge->height_left) {
1436 				if (edge->dy)
1437 					edge->cell = edge_advance(edge);
1438 
1439 				if (edge->cell < prev_x) {
1440 					struct edge *pos = edge->prev;
1441 					pos->next = next;
1442 					next->prev = pos;
1443 					do {
1444 						pos = pos->prev;
1445 					} while (edge->cell < pos->cell);
1446 					pos->next->prev = edge;
1447 					edge->next = pos->next;
1448 					edge->prev = pos;
1449 					pos->next = edge;
1450 				} else
1451 					prev_x = edge->cell;
1452 			} else {
1453 				edge->prev->next = next;
1454 				next->prev = edge->prev;
1455 			}
1456 
1457 			edge = next;
1458 		} while (1);
1459 
1460 		if (edge->cell < 0) {
1461 			rix = rfx = 0;
1462 		} else if (edge->cell >= width * SAMPLES_X) {
1463 			rix = width;
1464 			rfx = 0;
1465 		} else
1466 			SAMPLES_X_TO_INT_FRAC(edge->cell, rix, rfx);
1467 
1468 		assert(edge->height_left > 0);
1469 		if (--edge->height_left) {
1470 			if (edge->dy)
1471 				edge->cell = edge_advance(edge);
1472 
1473 			if (edge->cell < prev_x) {
1474 				struct edge *pos = edge->prev;
1475 				pos->next = next;
1476 				next->prev = pos;
1477 				do {
1478 					pos = pos->prev;
1479 				} while (edge->cell < pos->cell);
1480 				pos->next->prev = edge;
1481 				edge->next = pos->next;
1482 				edge->prev = pos;
1483 				pos->next = edge;
1484 			} else
1485 				prev_x = edge->cell;
1486 		} else {
1487 			edge->prev->next = next;
1488 			next->prev = edge->prev;
1489 		}
1490 
1491 		edge = next;
1492 
1493 		__DBG(("%s: left=%d.%d, right=%d.%d\n", __FUNCTION__,
1494 		       lix, lfx, rix, rfx));
1495 		if (lix == rix) {
1496 			if (rfx != lfx) {
1497 				assert(lix < width);
1498 				row[lix] += (rfx-lfx);
1499 			}
1500 		} else {
1501 			assert(lix < width);
1502 			row[lix] += SAMPLES_X - lfx;
1503 
1504 			assert(rix <= width);
1505 			if (rfx) {
1506 				assert(rix < width);
1507 				row[rix] += rfx;
1508 			}
1509 
1510 			while (++lix < rix)
1511 				row[lix] += SAMPLES_X;
1512 		}
1513 	}
1514 }
1515 
1516 flatten static void
tor_inplace(struct tor * converter,PixmapPtr scratch)1517 tor_inplace(struct tor *converter, PixmapPtr scratch)
1518 {
1519 	uint8_t buf[TOR_INPLACE_SIZE];
1520 	int i, j, h = converter->extents.y2 - converter->extents.y1;
1521 	struct polygon *polygon = converter->polygon;
1522 	struct active_list *active = converter->active;
1523 	struct edge *buckets[SAMPLES_Y] = { 0 };
1524 	uint8_t *row = scratch->devPrivate.ptr;
1525 	int stride = scratch->devKind;
1526 	int width = scratch->drawable.width;
1527 
1528 	__DBG(("%s: buf?=%d\n", __FUNCTION__, buf != NULL));
1529 	assert(converter->extents.x1 == 0);
1530 	assert(scratch->drawable.depth == 8);
1531 
1532 	row += converter->extents.y1 * stride;
1533 
1534 	/* Render each pixel row. */
1535 	for (i = 0; i < h; i = j) {
1536 		int do_full_step = 0;
1537 		void *ptr = scratch->usage_hint ? buf : row;
1538 
1539 		j = i + 1;
1540 
1541 		/* Determine if we can ignore this row or use the full pixel
1542 		 * stepper. */
1543 		if (!polygon->y_buckets[i]) {
1544 			if (active->head.next == &active->tail) {
1545 				for (; !polygon->y_buckets[j]; j++)
1546 					;
1547 				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
1548 				       __FUNCTION__, i, j));
1549 
1550 				memset(row, 0, stride*(j-i));
1551 				row += stride*(j-i);
1552 				continue;
1553 			}
1554 
1555 			do_full_step = can_full_step(active);
1556 		}
1557 
1558 		__DBG(("%s: y=%d, do_full_step=%d, new edges=%d\n",
1559 		       __FUNCTION__, i, do_full_step,
1560 		       polygon->y_buckets[i] != NULL));
1561 		if (do_full_step) {
1562 			memset(ptr, 0, width);
1563 			inplace_row(active, ptr, width);
1564 			if (row != ptr)
1565 				memcpy(row, ptr, width);
1566 
1567 			while (polygon->y_buckets[j] == NULL &&
1568 			       do_full_step >= 2*SAMPLES_Y) {
1569 				do_full_step -= SAMPLES_Y;
1570 				row += stride;
1571 				memcpy(row, ptr, width);
1572 				j++;
1573 			}
1574 			if (j != i + 1)
1575 				step_edges(active, j - (i + 1));
1576 
1577 			__DBG(("%s: vertical edges, full step (%d, %d)\n",
1578 			       __FUNCTION__,  i, j));
1579 		} else {
1580 			int suby;
1581 
1582 			fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
1583 
1584 			/* Subsample this row. */
1585 			memset(ptr, 0, width);
1586 			for (suby = 0; suby < SAMPLES_Y; suby++) {
1587 				if (buckets[suby]) {
1588 					merge_edges(active, buckets[suby]);
1589 					buckets[suby] = NULL;
1590 				}
1591 
1592 				inplace_subrow(active, ptr, width);
1593 			}
1594 			if (row != ptr)
1595 				memcpy(row, ptr, width);
1596 		}
1597 
1598 		row += stride;
1599 	}
1600 }
1601 
operator_is_bounded(uint8_t op)1602 static int operator_is_bounded(uint8_t op)
1603 {
1604 	switch (op) {
1605 	case PictOpOver:
1606 	case PictOpOutReverse:
1607 	case PictOpAdd:
1608 		return true;
1609 	default:
1610 		return false;
1611 	}
1612 }
1613 
1614 static span_func_t
choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1615 choose_span(struct sna_composite_spans_op *tmp,
1616 	    PicturePtr dst,
1617 	    PictFormatPtr maskFormat,
1618 	    RegionPtr clip)
1619 {
1620 	span_func_t span;
1621 
1622 	assert(!is_mono(dst, maskFormat));
1623 	if (clip->data)
1624 		span = tor_blt_span_clipped;
1625 	else if (tmp->base.damage == NULL)
1626 		span = tor_blt_span__no_damage;
1627 	else
1628 		span = tor_blt_span;
1629 
1630 	return span;
1631 }
1632 
1633 struct span_thread {
1634 	struct sna *sna;
1635 	const struct sna_composite_spans_op *op;
1636 	const xTrapezoid *traps;
1637 	RegionPtr clip;
1638 	span_func_t span;
1639 	BoxRec extents;
1640 	int dx, dy, draw_y;
1641 	int ntrap;
1642 	bool unbounded;
1643 };
1644 
1645 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
1646 struct span_thread_boxes {
1647 	const struct sna_composite_spans_op *op;
1648 	const BoxRec *clip_start, *clip_end;
1649 	int num_boxes;
1650 	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
1651 };
1652 
span_thread_add_box(struct sna * sna,void * data,const BoxRec * box,float alpha)1653 static void span_thread_add_box(struct sna *sna, void *data,
1654 				const BoxRec *box, float alpha)
1655 {
1656 	struct span_thread_boxes *b = data;
1657 
1658 	__DBG(("%s: adding box with alpha=%f\n", __FUNCTION__, alpha));
1659 
1660 	if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
1661 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
1662 		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
1663 		b->num_boxes = 0;
1664 	}
1665 
1666 	b->boxes[b->num_boxes].box = *box++;
1667 	b->boxes[b->num_boxes].alpha = alpha;
1668 	b->num_boxes++;
1669 	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
1670 }
1671 
1672 static void
span_thread_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1673 span_thread_box(struct sna *sna,
1674 		struct sna_composite_spans_op *op,
1675 		pixman_region16_t *clip,
1676 		const BoxRec *box,
1677 		int coverage)
1678 {
1679 	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1680 
1681 	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
1682 	if (b->num_boxes) {
1683 		struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
1684 		if (bb->box.x1 == box->x1 &&
1685 		    bb->box.x2 == box->x2 &&
1686 		    bb->box.y2 == box->y1 &&
1687 		    bb->alpha == AREA_TO_FLOAT(coverage)) {
1688 			bb->box.y2 = box->y2;
1689 			__DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
1690 			return;
1691 		}
1692 	}
1693 
1694 	span_thread_add_box(sna, op, box, AREA_TO_FLOAT(coverage));
1695 }
1696 
1697 static void
span_thread_clipped_box(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1698 span_thread_clipped_box(struct sna *sna,
1699 			struct sna_composite_spans_op *op,
1700 			pixman_region16_t *clip,
1701 			const BoxRec *box,
1702 			int coverage)
1703 {
1704 	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
1705 	const BoxRec *c;
1706 
1707 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
1708 	       AREA_TO_FLOAT(coverage)));
1709 
1710 	b->clip_start =
1711 		find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
1712 
1713 	c = b->clip_start;
1714 	while (c != b->clip_end) {
1715 		BoxRec clipped;
1716 
1717 		if (box->y2 <= c->y1)
1718 			break;
1719 
1720 		clipped = *box;
1721 		if (!box_intersect(&clipped, c++))
1722 			continue;
1723 
1724 		span_thread_add_box(sna, op, &clipped, AREA_TO_FLOAT(coverage));
1725 	}
1726 }
1727 
1728 static span_func_t
thread_choose_span(struct sna_composite_spans_op * tmp,PicturePtr dst,PictFormatPtr maskFormat,RegionPtr clip)1729 thread_choose_span(struct sna_composite_spans_op *tmp,
1730 		   PicturePtr dst,
1731 		   PictFormatPtr maskFormat,
1732 		   RegionPtr clip)
1733 {
1734 	span_func_t span;
1735 
1736 	if (tmp->base.damage) {
1737 		DBG(("%s: damaged -> no thread support\n", __FUNCTION__));
1738 		return NULL;
1739 	}
1740 
1741 	assert(!is_mono(dst, maskFormat));
1742 	assert(tmp->thread_boxes);
1743 	DBG(("%s: clipped? %d x %d\n", __FUNCTION__, clip->data != NULL, region_num_rects(clip)));
1744 	if (clip->data)
1745 		span = span_thread_clipped_box;
1746 	else
1747 		span = span_thread_box;
1748 
1749 	return span;
1750 }
1751 
1752 inline static void
span_thread_boxes_init(struct span_thread_boxes * boxes,const struct sna_composite_spans_op * op,const RegionRec * clip)1753 span_thread_boxes_init(struct span_thread_boxes *boxes,
1754 		       const struct sna_composite_spans_op *op,
1755 		       const RegionRec *clip)
1756 {
1757 	boxes->op = op;
1758 	boxes->clip_start = region_rects(clip);
1759 	boxes->clip_end = boxes->clip_start + region_num_rects(clip);
1760 	boxes->num_boxes = 0;
1761 }
1762 
1763 static void
span_thread(void * arg)1764 span_thread(void *arg)
1765 {
1766 	struct span_thread *thread = arg;
1767 	struct span_thread_boxes boxes;
1768 	struct tor tor;
1769 	const xTrapezoid *t;
1770 	int n, y1, y2;
1771 
1772 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
1773 		return;
1774 
1775 	span_thread_boxes_init(&boxes, thread->op, thread->clip);
1776 
1777 	y1 = thread->extents.y1 - thread->draw_y;
1778 	y2 = thread->extents.y2 - thread->draw_y;
1779 	for (n = thread->ntrap, t = thread->traps; n--; t++) {
1780 		if (pixman_fixed_integer_floor(t->top) >= y2 ||
1781 		    pixman_fixed_integer_ceil(t->bottom) <= y1)
1782 			continue;
1783 
1784 		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
1785 	}
1786 
1787 	tor_render(thread->sna, &tor,
1788 		   (struct sna_composite_spans_op *)&boxes, thread->clip,
1789 		   thread->span, thread->unbounded);
1790 
1791 	tor_fini(&tor);
1792 
1793 	if (boxes.num_boxes) {
1794 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
1795 		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
1796 		thread->op->thread_boxes(thread->sna, thread->op,
1797 					 boxes.boxes, boxes.num_boxes);
1798 	}
1799 }
1800 
1801 bool
precise_trapezoid_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned int flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)1802 precise_trapezoid_span_converter(struct sna *sna,
1803 				 CARD8 op, PicturePtr src, PicturePtr dst,
1804 				 PictFormatPtr maskFormat, unsigned int flags,
1805 				 INT16 src_x, INT16 src_y,
1806 				 int ntrap, xTrapezoid *traps)
1807 {
1808 	struct sna_composite_spans_op tmp;
1809 	pixman_region16_t clip;
1810 	int16_t dst_x, dst_y;
1811 	bool was_clear;
1812 	int dx, dy, n;
1813 	int num_threads;
1814 
1815 	if (NO_PRECISE)
1816 		return false;
1817 
1818 	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, flags)) {
1819 		DBG(("%s: fallback -- composite spans not supported\n",
1820 		     __FUNCTION__));
1821 		return false;
1822 	}
1823 
1824 	if (!trapezoids_bounds(ntrap, traps, &clip.extents))
1825 		return true;
1826 
1827 #if 1
1828 	if (((clip.extents.y2 - clip.extents.y1) | (clip.extents.x2 - clip.extents.x1)) < 32) {
1829 		DBG(("%s: fallback -- traps extents too small %dx%d\n", __FUNCTION__,
1830 		     clip.extents.y2 - clip.extents.y1,
1831 		     clip.extents.x2 - clip.extents.x1));
1832 		return false;
1833 	}
1834 #endif
1835 
1836 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
1837 	     __FUNCTION__,
1838 	     clip.extents.x1, clip.extents.y1,
1839 	     clip.extents.x2, clip.extents.y2));
1840 
1841 	trapezoid_origin(&traps[0].left, &dst_x, &dst_y);
1842 
1843 	if (!sna_compute_composite_region(&clip,
1844 					  src, NULL, dst,
1845 					  src_x + clip.extents.x1 - dst_x,
1846 					  src_y + clip.extents.y1 - dst_y,
1847 					  0, 0,
1848 					  clip.extents.x1, clip.extents.y1,
1849 					  clip.extents.x2 - clip.extents.x1,
1850 					  clip.extents.y2 - clip.extents.y1)) {
1851 		DBG(("%s: trapezoids do not intersect drawable clips\n",
1852 		     __FUNCTION__)) ;
1853 		return true;
1854 	}
1855 
1856 	if (!sna->render.check_composite_spans(sna, op, src, dst,
1857 					       clip.extents.x2 - clip.extents.x1,
1858 					       clip.extents.y2 - clip.extents.y1,
1859 					       flags)) {
1860 		DBG(("%s: fallback -- composite spans not supported\n",
1861 		     __FUNCTION__));
1862 		return false;
1863 	}
1864 
1865 	dx = dst->pDrawable->x;
1866 	dy = dst->pDrawable->y;
1867 
1868 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
1869 	     __FUNCTION__,
1870 	     clip.extents.x1, clip.extents.y1,
1871 	     clip.extents.x2, clip.extents.y2,
1872 	     dx, dy,
1873 	     src_x + clip.extents.x1 - dst_x - dx,
1874 	     src_y + clip.extents.y1 - dst_y - dy));
1875 
1876 	was_clear = sna_drawable_is_clear(dst->pDrawable);
1877 	switch (op) {
1878 	case PictOpAdd:
1879 	case PictOpOver:
1880 		if (was_clear)
1881 			op = PictOpSrc;
1882 		break;
1883 	case PictOpIn:
1884 		if (was_clear)
1885 			return true;
1886 		break;
1887 	}
1888 
1889 	if (!sna->render.composite_spans(sna, op, src, dst,
1890 					 src_x + clip.extents.x1 - dst_x - dx,
1891 					 src_y + clip.extents.y1 - dst_y - dy,
1892 					 clip.extents.x1,  clip.extents.y1,
1893 					 clip.extents.x2 - clip.extents.x1,
1894 					 clip.extents.y2 - clip.extents.y1,
1895 					 flags, memset(&tmp, 0, sizeof(tmp)))) {
1896 		DBG(("%s: fallback -- composite spans render op not supported\n",
1897 		     __FUNCTION__));
1898 		return false;
1899 	}
1900 
1901 	dx *= SAMPLES_X;
1902 	dy *= SAMPLES_Y;
1903 
1904 	num_threads = 1;
1905 	if (!NO_GPU_THREADS &&
1906 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
1907 	    tmp.thread_boxes &&
1908 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
1909 		num_threads = sna_use_threads(clip.extents.x2-clip.extents.x1,
1910 					      clip.extents.y2-clip.extents.y1,
1911 					      8);
1912 	DBG(("%s: using %d threads\n", __FUNCTION__, num_threads));
1913 	if (num_threads == 1) {
1914 		struct tor tor;
1915 
1916 		if (!tor_init(&tor, &clip.extents, 2*ntrap))
1917 			goto skip;
1918 
1919 		for (n = 0; n < ntrap; n++) {
1920 			if (pixman_fixed_integer_floor(traps[n].top) + dst->pDrawable->y >= clip.extents.y2 ||
1921 			    pixman_fixed_integer_ceil(traps[n].bottom) + dst->pDrawable->y <= clip.extents.y1)
1922 				continue;
1923 
1924 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
1925 		}
1926 
1927 		tor_render(sna, &tor, &tmp, &clip,
1928 			   choose_span(&tmp, dst, maskFormat, &clip),
1929 			   !was_clear && maskFormat && !operator_is_bounded(op));
1930 
1931 		tor_fini(&tor);
1932 	} else {
1933 		struct span_thread threads[num_threads];
1934 		int y, h;
1935 
1936 		DBG(("%s: using %d threads for span compositing %dx%d\n",
1937 		     __FUNCTION__, num_threads,
1938 		     clip.extents.x2 - clip.extents.x1,
1939 		     clip.extents.y2 - clip.extents.y1));
1940 
1941 		threads[0].sna = sna;
1942 		threads[0].op = &tmp;
1943 		threads[0].traps = traps;
1944 		threads[0].ntrap = ntrap;
1945 		threads[0].extents = clip.extents;
1946 		threads[0].clip = &clip;
1947 		threads[0].dx = dx;
1948 		threads[0].dy = dy;
1949 		threads[0].draw_y = dst->pDrawable->y;
1950 		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
1951 		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
1952 
1953 		y = clip.extents.y1;
1954 		h = clip.extents.y2 - clip.extents.y1;
1955 		h = (h + num_threads - 1) / num_threads;
1956 		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
1957 
1958 		for (n = 1; n < num_threads; n++) {
1959 			threads[n] = threads[0];
1960 			threads[n].extents.y1 = y;
1961 			threads[n].extents.y2 = y += h;
1962 
1963 			sna_threads_run(n, span_thread, &threads[n]);
1964 		}
1965 
1966 		assert(y < threads[0].extents.y2);
1967 		threads[0].extents.y1 = y;
1968 		span_thread(&threads[0]);
1969 
1970 		sna_threads_wait();
1971 	}
1972 skip:
1973 	tmp.done(sna, &tmp);
1974 
1975 	REGION_UNINIT(NULL, &clip);
1976 	return true;
1977 }
1978 
1979 static void
tor_blt_mask(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)1980 tor_blt_mask(struct sna *sna,
1981 	     struct sna_composite_spans_op *op,
1982 	     pixman_region16_t *clip,
1983 	     const BoxRec *box,
1984 	     int coverage)
1985 {
1986 	uint8_t *ptr = (uint8_t *)op;
1987 	int stride = (intptr_t)clip;
1988 	int h, w;
1989 
1990 	coverage = TO_ALPHA(coverage);
1991 	ptr += box->y1 * stride + box->x1;
1992 
1993 	h = box->y2 - box->y1;
1994 	w = box->x2 - box->x1;
1995 	if ((w | h) == 1) {
1996 		*ptr = coverage;
1997 	} else if (w == 1) {
1998 		do {
1999 			*ptr = coverage;
2000 			ptr += stride;
2001 		} while (--h);
2002 	} else do {
2003 		memset(ptr, coverage, w);
2004 		ptr += stride;
2005 	} while (--h);
2006 }
2007 
2008 struct mask_thread {
2009 	PixmapPtr scratch;
2010 	const xTrapezoid *traps;
2011 	BoxRec extents;
2012 	int dx, dy, dst_y;
2013 	int ntrap;
2014 };
2015 
2016 static void
mask_thread(void * arg)2017 mask_thread(void *arg)
2018 {
2019 	struct mask_thread *thread = arg;
2020 	struct tor tor;
2021 	const xTrapezoid *t;
2022 	int n, y1, y2;
2023 
2024 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2025 		return;
2026 
2027 	y1 = thread->extents.y1 + thread->dst_y;
2028 	y2 = thread->extents.y2 + thread->dst_y;
2029 	for (n = thread->ntrap, t = thread->traps; n--; t++) {
2030 		if (pixman_fixed_integer_floor(t->top) >= y2 ||
2031 		    pixman_fixed_integer_ceil(t->bottom) <= y1)
2032 			continue;
2033 
2034 		tor_add_trapezoid(&tor, t, thread->dx, thread->dy);
2035 	}
2036 
2037 	if (thread->extents.x2 <= TOR_INPLACE_SIZE) {
2038 		tor_inplace(&tor, thread->scratch);
2039 	} else {
2040 		tor_render(NULL, &tor,
2041 			   thread->scratch->devPrivate.ptr,
2042 			   (void *)(intptr_t)thread->scratch->devKind,
2043 			   tor_blt_mask,
2044 			   true);
2045 	}
2046 
2047 	tor_fini(&tor);
2048 }
2049 
2050 bool
precise_trapezoid_mask_converter(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)2051 precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
2052 				 PictFormatPtr maskFormat, unsigned flags,
2053 				 INT16 src_x, INT16 src_y,
2054 				 int ntrap, xTrapezoid *traps)
2055 {
2056 	ScreenPtr screen = dst->pDrawable->pScreen;
2057 	PixmapPtr scratch;
2058 	PicturePtr mask;
2059 	BoxRec extents;
2060 	int num_threads;
2061 	int16_t dst_x, dst_y;
2062 	int dx, dy;
2063 	int error, n;
2064 
2065 	if (NO_PRECISE)
2066 		return false;
2067 
2068 	if (maskFormat == NULL && ntrap > 1) {
2069 		DBG(("%s: individual rasterisation requested\n",
2070 		     __FUNCTION__));
2071 		do {
2072 			/* XXX unwind errors? */
2073 			if (!precise_trapezoid_mask_converter(op, src, dst, NULL, flags,
2074 							      src_x, src_y, 1, traps++))
2075 				return false;
2076 		} while (--ntrap);
2077 		return true;
2078 	}
2079 
2080 	if (!trapezoids_bounds(ntrap, traps, &extents))
2081 		return true;
2082 
2083 	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
2084 	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
2085 
2086 	if (!sna_compute_composite_extents(&extents,
2087 					   src, NULL, dst,
2088 					   src_x, src_y,
2089 					   0, 0,
2090 					   extents.x1, extents.y1,
2091 					   extents.x2 - extents.x1,
2092 					   extents.y2 - extents.y1))
2093 		return true;
2094 
2095 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2096 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
2097 
2098 	extents.y2 -= extents.y1;
2099 	extents.x2 -= extents.x1;
2100 	extents.x1 -= dst->pDrawable->x;
2101 	extents.y1 -= dst->pDrawable->y;
2102 	dst_x = extents.x1;
2103 	dst_y = extents.y1;
2104 	dx = -extents.x1 * SAMPLES_X;
2105 	dy = -extents.y1 * SAMPLES_Y;
2106 	extents.x1 = extents.y1 = 0;
2107 
2108 	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
2109 	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
2110 	scratch = sna_pixmap_create_upload(screen,
2111 					   extents.x2, extents.y2, 8,
2112 					   KGEM_BUFFER_WRITE_INPLACE);
2113 	if (!scratch)
2114 		return true;
2115 
2116 	DBG(("%s: created buffer %p, stride %d\n",
2117 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
2118 
2119 	num_threads = 1;
2120 	if (!NO_GPU_THREADS &&
2121 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
2122 		num_threads = sna_use_threads(extents.x2 - extents.x1,
2123 					      extents.y2 - extents.y1,
2124 					      4);
2125 	if (num_threads == 1) {
2126 		struct tor tor;
2127 
2128 		if (!tor_init(&tor, &extents, 2*ntrap)) {
2129 			sna_pixmap_destroy(scratch);
2130 			return true;
2131 		}
2132 
2133 		for (n = 0; n < ntrap; n++) {
2134 			if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
2135 			    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
2136 				continue;
2137 
2138 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2139 		}
2140 
2141 		if (extents.x2 <= TOR_INPLACE_SIZE) {
2142 			tor_inplace(&tor, scratch);
2143 		} else {
2144 			tor_render(NULL, &tor,
2145 				   scratch->devPrivate.ptr,
2146 				   (void *)(intptr_t)scratch->devKind,
2147 				   tor_blt_mask,
2148 				   true);
2149 		}
2150 		tor_fini(&tor);
2151 	} else {
2152 		struct mask_thread threads[num_threads];
2153 		int y, h;
2154 
2155 		DBG(("%s: using %d threads for mask compositing %dx%d\n",
2156 		     __FUNCTION__, num_threads,
2157 		     extents.x2 - extents.x1,
2158 		     extents.y2 - extents.y1));
2159 
2160 		threads[0].scratch = scratch;
2161 		threads[0].traps = traps;
2162 		threads[0].ntrap = ntrap;
2163 		threads[0].extents = extents;
2164 		threads[0].dx = dx;
2165 		threads[0].dy = dy;
2166 		threads[0].dst_y = dst_y;
2167 
2168 		y = extents.y1;
2169 		h = extents.y2 - extents.y1;
2170 		h = (h + num_threads - 1) / num_threads;
2171 		num_threads -= (num_threads-1) * h >= extents.y2 - extents.y1;
2172 
2173 		for (n = 1; n < num_threads; n++) {
2174 			threads[n] = threads[0];
2175 			threads[n].extents.y1 = y;
2176 			threads[n].extents.y2 = y += h;
2177 
2178 			sna_threads_run(n, mask_thread, &threads[n]);
2179 		}
2180 
2181 		assert(y < threads[0].extents.y2);
2182 		threads[0].extents.y1 = y;
2183 		mask_thread(&threads[0]);
2184 
2185 		sna_threads_wait();
2186 	}
2187 
2188 	mask = CreatePicture(0, &scratch->drawable,
2189 			     PictureMatchFormat(screen, 8, PICT_a8),
2190 			     0, 0, serverClient, &error);
2191 	if (mask) {
2192 		int16_t x0, y0;
2193 
2194 		trapezoid_origin(&traps[0].left, &x0, &y0);
2195 
2196 		CompositePicture(op, src, mask, dst,
2197 				 src_x + dst_x - x0,
2198 				 src_y + dst_y - y0,
2199 				 0, 0,
2200 				 dst_x, dst_y,
2201 				 extents.x2, extents.y2);
2202 		FreePicture(mask, 0);
2203 	}
2204 	sna_pixmap_destroy(scratch);
2205 
2206 	return true;
2207 }
2208 
2209 struct inplace {
2210 	uint8_t *ptr;
2211 	uint32_t stride;
2212 	union {
2213 		uint8_t opacity;
2214 		uint32_t color;
2215 	};
2216 };
2217 
coverage_opacity(int coverage,uint8_t opacity)2218 static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
2219 {
2220 	coverage = TO_ALPHA(coverage);
2221 	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
2222 }
2223 
2224 struct clipped_span {
2225 	span_func_t span;
2226 	const BoxRec *clip_start, *clip_end;
2227 };
2228 
2229 static void
tor_blt_clipped(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2230 tor_blt_clipped(struct sna *sna,
2231 		struct sna_composite_spans_op *op,
2232 		pixman_region16_t *clip,
2233 		const BoxRec *box,
2234 		int coverage)
2235 {
2236 	struct clipped_span *cs = (struct clipped_span *)clip;
2237 	const BoxRec *c;
2238 
2239 	cs->clip_start =
2240 		find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
2241 
2242 	c = cs->clip_start;
2243 	while (c != cs->clip_end) {
2244 		BoxRec clipped;
2245 
2246 		if (box->y2 <= c->y1)
2247 			break;
2248 
2249 		clipped = *box;
2250 		if (!box_intersect(&clipped, c++))
2251 			continue;
2252 
2253 		cs->span(sna, op, NULL, &clipped, coverage);
2254 	}
2255 }
2256 
2257 inline static span_func_t
clipped_span(struct clipped_span * cs,span_func_t span,const RegionRec * clip)2258 clipped_span(struct clipped_span *cs,
2259 	     span_func_t span,
2260 	     const RegionRec *clip)
2261 {
2262 	if (clip->data) {
2263 		cs->span = span;
2264 		region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
2265 		span = tor_blt_clipped;
2266 	}
2267 	return span;
2268 }
2269 
_tor_blt_src(struct inplace * in,const BoxRec * box,uint8_t v)2270 static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
2271 {
2272 	uint8_t *ptr = in->ptr;
2273 	int h, w;
2274 
2275 	ptr += box->y1 * in->stride + box->x1;
2276 
2277 	h = box->y2 - box->y1;
2278 	w = box->x2 - box->x1;
2279 	if ((w | h) == 1) {
2280 		*ptr = v;
2281 	} else if (w == 1) {
2282 		do {
2283 			*ptr = v;
2284 			ptr += in->stride;
2285 		} while (--h);
2286 	} else do {
2287 		memset(ptr, v, w);
2288 		ptr += in->stride;
2289 	} while (--h);
2290 }
2291 
2292 static void
tor_blt_src(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2293 tor_blt_src(struct sna *sna,
2294 	    struct sna_composite_spans_op *op,
2295 	    pixman_region16_t *clip,
2296 	    const BoxRec *box,
2297 	    int coverage)
2298 {
2299 	struct inplace *in = (struct inplace *)op;
2300 
2301 	_tor_blt_src(in, box, coverage_opacity(coverage, in->opacity));
2302 }
2303 
2304 static void
tor_blt_in(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2305 tor_blt_in(struct sna *sna,
2306 	   struct sna_composite_spans_op *op,
2307 	   pixman_region16_t *clip,
2308 	   const BoxRec *box,
2309 	   int coverage)
2310 {
2311 	struct inplace *in = (struct inplace *)op;
2312 	uint8_t *ptr = in->ptr;
2313 	int h, w, i;
2314 
2315 	if (coverage == 0 || in->opacity == 0) {
2316 		_tor_blt_src(in, box, 0);
2317 		return;
2318 	}
2319 
2320 	coverage = coverage_opacity(coverage, in->opacity);
2321 	if (coverage == 0xff)
2322 		return;
2323 
2324 	ptr += box->y1 * in->stride + box->x1;
2325 
2326 	h = box->y2 - box->y1;
2327 	w = box->x2 - box->x1;
2328 	do {
2329 		for (i = 0; i < w; i++)
2330 			ptr[i] = mul_8_8(ptr[i], coverage);
2331 		ptr += in->stride;
2332 	} while (--h);
2333 }
2334 
2335 static void
tor_blt_add(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2336 tor_blt_add(struct sna *sna,
2337 	    struct sna_composite_spans_op *op,
2338 	    pixman_region16_t *clip,
2339 	    const BoxRec *box,
2340 	    int coverage)
2341 {
2342 	struct inplace *in = (struct inplace *)op;
2343 	uint8_t *ptr = in->ptr;
2344 	int h, w, v, i;
2345 
2346 	if (coverage == 0)
2347 		return;
2348 
2349 	coverage = coverage_opacity(coverage, in->opacity);
2350 	if (coverage == 0xff) {
2351 		_tor_blt_src(in, box, 0xff);
2352 		return;
2353 	}
2354 
2355 	ptr += box->y1 * in->stride + box->x1;
2356 
2357 	h = box->y2 - box->y1;
2358 	w = box->x2 - box->x1;
2359 	if ((w | h) == 1) {
2360 		v = coverage + *ptr;
2361 		*ptr = v >= 255 ? 255 : v;
2362 	} else {
2363 		do {
2364 			for (i = 0; i < w; i++) {
2365 				v = coverage + ptr[i];
2366 				ptr[i] = v >= 255 ? 255 : v;
2367 			}
2368 			ptr += in->stride;
2369 		} while (--h);
2370 	}
2371 }
2372 
2373 static void
tor_blt_lerp32(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2374 tor_blt_lerp32(struct sna *sna,
2375 	       struct sna_composite_spans_op *op,
2376 	       pixman_region16_t *clip,
2377 	       const BoxRec *box,
2378 	       int coverage)
2379 {
2380 	struct inplace *in = (struct inplace *)op;
2381 	uint32_t *ptr = (uint32_t *)in->ptr;
2382 	int stride = in->stride / sizeof(uint32_t);
2383 	int h, w, i;
2384 
2385 	if (coverage == 0)
2386 		return;
2387 
2388 	sigtrap_assert_active();
2389 	ptr += box->y1 * stride + box->x1;
2390 
2391 	h = box->y2 - box->y1;
2392 	w = box->x2 - box->x1;
2393 	if (coverage == GRID_AREA) {
2394 		if ((w | h) == 1) {
2395 			*ptr = in->color;
2396 		} else {
2397 			if (w < 16) {
2398 				do {
2399 					for (i = 0; i < w; i++)
2400 						ptr[i] = in->color;
2401 					ptr += stride;
2402 				} while (--h);
2403 			} else {
2404 				pixman_fill(ptr, stride, 32,
2405 					    0, 0, w, h, in->color);
2406 			}
2407 		}
2408 	} else {
2409 		coverage = TO_ALPHA(coverage);
2410 		if ((w | h) == 1) {
2411 			*ptr = lerp8x4(in->color, coverage, *ptr);
2412 		} else if (w == 1) {
2413 			do {
2414 				*ptr = lerp8x4(in->color, coverage, *ptr);
2415 				ptr += stride;
2416 			} while (--h);
2417 		} else{
2418 			do {
2419 				for (i = 0; i < w; i++)
2420 					ptr[i] = lerp8x4(in->color, coverage, ptr[i]);
2421 				ptr += stride;
2422 			} while (--h);
2423 		}
2424 	}
2425 }
2426 
2427 struct pixman_inplace {
2428 	pixman_image_t *image, *source, *mask;
2429 	uint32_t color;
2430 	uint32_t *bits;
2431 	int dx, dy;
2432 	int sx, sy;
2433 	uint8_t op;
2434 };
2435 
2436 static void
pixmask_span_solid(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2437 pixmask_span_solid(struct sna *sna,
2438 		   struct sna_composite_spans_op *op,
2439 		   pixman_region16_t *clip,
2440 		   const BoxRec *box,
2441 		   int coverage)
2442 {
2443 	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2444 	if (coverage != GRID_AREA)
2445 		*pi->bits = mul_4x8_8(pi->color, TO_ALPHA(coverage));
2446 	else
2447 		*pi->bits = pi->color;
2448 	pixman_image_composite(pi->op, pi->source, NULL, pi->image,
2449 			       box->x1, box->y1,
2450 			       0, 0,
2451 			       pi->dx + box->x1, pi->dy + box->y1,
2452 			       box->x2 - box->x1, box->y2 - box->y1);
2453 }
2454 
2455 static void
pixmask_span(struct sna * sna,struct sna_composite_spans_op * op,pixman_region16_t * clip,const BoxRec * box,int coverage)2456 pixmask_span(struct sna *sna,
2457 	     struct sna_composite_spans_op *op,
2458 	     pixman_region16_t *clip,
2459 	     const BoxRec *box,
2460 	     int coverage)
2461 {
2462 	struct pixman_inplace *pi = (struct pixman_inplace *)op;
2463 	pixman_image_t *mask = NULL;
2464 	if (coverage != GRID_AREA) {
2465 		*pi->bits = TO_ALPHA(coverage);
2466 		mask = pi->mask;
2467 	}
2468 	pixman_image_composite(pi->op, pi->source, mask, pi->image,
2469 			       pi->sx + box->x1, pi->sy + box->y1,
2470 			       0, 0,
2471 			       pi->dx + box->x1, pi->dy + box->y1,
2472 			       box->x2 - box->x1, box->y2 - box->y1);
2473 }
2474 
2475 struct inplace_x8r8g8b8_thread {
2476 	xTrapezoid *traps;
2477 	PicturePtr dst, src;
2478 	BoxRec extents;
2479 	int dx, dy;
2480 	int ntrap;
2481 	bool lerp, is_solid;
2482 	uint32_t color;
2483 	int16_t src_x, src_y;
2484 	uint8_t op;
2485 };
2486 
inplace_x8r8g8b8_thread(void * arg)2487 static void inplace_x8r8g8b8_thread(void *arg)
2488 {
2489 	struct inplace_x8r8g8b8_thread *thread = arg;
2490 	struct tor tor;
2491 	span_func_t span;
2492 	struct clipped_span clipped;
2493 	RegionPtr clip;
2494 	int y1, y2, n;
2495 
2496 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2497 		return;
2498 
2499 	y1 = thread->extents.y1 - thread->dst->pDrawable->y;
2500 	y2 = thread->extents.y2 - thread->dst->pDrawable->y;
2501 	for (n = 0; n < thread->ntrap; n++) {
2502 		if (pixman_fixed_to_int(thread->traps[n].top) >= y2 ||
2503 		    pixman_fixed_to_int(thread->traps[n].bottom) < y1)
2504 			continue;
2505 
2506 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2507 	}
2508 
2509 	clip = thread->dst->pCompositeClip;
2510 	if (thread->lerp) {
2511 		struct inplace inplace;
2512 		int16_t dst_x, dst_y;
2513 		PixmapPtr pixmap;
2514 
2515 		pixmap = get_drawable_pixmap(thread->dst->pDrawable);
2516 
2517 		inplace.ptr = pixmap->devPrivate.ptr;
2518 		if (get_drawable_deltas(thread->dst->pDrawable, pixmap, &dst_x, &dst_y))
2519 			inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2520 		inplace.stride = pixmap->devKind;
2521 		inplace.color = thread->color;
2522 
2523 		span = clipped_span(&clipped, tor_blt_lerp32, clip);
2524 
2525 		tor_render(NULL, &tor,
2526 			   (void*)&inplace, (void *)&clipped,
2527 			   span, false);
2528 	} else if (thread->is_solid) {
2529 		struct pixman_inplace pi;
2530 
2531 		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2532 		pi.op = thread->op;
2533 		pi.color = thread->color;
2534 
2535 		pi.bits = (uint32_t *)&pi.sx;
2536 		pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2537 						     1, 1, pi.bits, 0);
2538 		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2539 
2540 		span = clipped_span(&clipped, pixmask_span_solid, clip);
2541 
2542 		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
2543 
2544 		pixman_image_unref(pi.source);
2545 		pixman_image_unref(pi.image);
2546 	} else {
2547 		struct pixman_inplace pi;
2548 		int16_t x0, y0;
2549 
2550 		trapezoid_origin(&thread->traps[0].left, &x0, &y0);
2551 
2552 		pi.image = image_from_pict(thread->dst, false, &pi.dx, &pi.dy);
2553 		pi.source = image_from_pict(thread->src, false, &pi.sx, &pi.sy);
2554 		pi.sx += thread->src_x - x0;
2555 		pi.sy += thread->src_y - y0;
2556 		pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2557 		pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2558 		pi.bits = pixman_image_get_data(pi.mask);
2559 		pi.op = thread->op;
2560 
2561 		span = clipped_span(&clipped, pixmask_span, clip);
2562 
2563 		tor_render(NULL, &tor,
2564 			   (void*)&pi, (void *)&clipped,
2565 			   span, false);
2566 
2567 		pixman_image_unref(pi.mask);
2568 		pixman_image_unref(pi.source);
2569 		pixman_image_unref(pi.image);
2570 	}
2571 
2572 	tor_fini(&tor);
2573 }
2574 
2575 static bool
trapezoid_span_inplace__x8r8g8b8(CARD8 op,PicturePtr dst,PicturePtr src,int16_t src_x,int16_t src_y,PictFormatPtr maskFormat,unsigned flags,int ntrap,xTrapezoid * traps)2576 trapezoid_span_inplace__x8r8g8b8(CARD8 op,
2577 				 PicturePtr dst,
2578 				 PicturePtr src, int16_t src_x, int16_t src_y,
2579 				 PictFormatPtr maskFormat, unsigned flags,
2580 				 int ntrap, xTrapezoid *traps)
2581 {
2582 	uint32_t color;
2583 	bool lerp, is_solid;
2584 	RegionRec region;
2585 	int dx, dy;
2586 	int num_threads, n;
2587 
2588 	lerp = false;
2589 	is_solid = sna_picture_is_solid(src, &color);
2590 	if (is_solid) {
2591 		if (op == PictOpOver && (color >> 24) == 0xff)
2592 			op = PictOpSrc;
2593 		if (op == PictOpOver && sna_drawable_is_clear(dst->pDrawable))
2594 			op = PictOpSrc;
2595 		lerp = op == PictOpSrc;
2596 	}
2597 	if (!lerp) {
2598 		switch (op) {
2599 		case PictOpOver:
2600 		case PictOpAdd:
2601 		case PictOpOutReverse:
2602 			break;
2603 		case PictOpSrc:
2604 			if (!sna_drawable_is_clear(dst->pDrawable))
2605 				return false;
2606 			break;
2607 		default:
2608 			return false;
2609 		}
2610 	}
2611 
2612 	if (maskFormat == NULL && ntrap > 1) {
2613 		DBG(("%s: individual rasterisation requested\n",
2614 		     __FUNCTION__));
2615 		do {
2616 			/* XXX unwind errors? */
2617 			if (!trapezoid_span_inplace__x8r8g8b8(op, dst,
2618 							      src, src_x, src_y,
2619 							      NULL, flags,
2620 							      1, traps++))
2621 				return false;
2622 		} while (--ntrap);
2623 		return true;
2624 	}
2625 
2626 	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2627 		return true;
2628 
2629 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2630 	     __FUNCTION__,
2631 	     region.extents.x1, region.extents.y1,
2632 	     region.extents.x2, region.extents.y2));
2633 
2634 	if (!sna_compute_composite_extents(&region.extents,
2635 					   src, NULL, dst,
2636 					   src_x, src_y,
2637 					   0, 0,
2638 					   region.extents.x1, region.extents.y1,
2639 					   region.extents.x2 - region.extents.x1,
2640 					   region.extents.y2 - region.extents.y1))
2641 		return true;
2642 
2643 	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
2644 	     __FUNCTION__,
2645 	     region.extents.x1, region.extents.y1,
2646 	     region.extents.x2, region.extents.y2));
2647 
2648 	region.data = NULL;
2649 	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2650 					    MOVE_WRITE | MOVE_READ))
2651 		return true;
2652 
2653 	if (!is_solid && src->pDrawable) {
2654 		if (!sna_drawable_move_to_cpu(src->pDrawable,
2655 					      MOVE_READ))
2656 			return true;
2657 
2658 		if (src->alphaMap &&
2659 		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
2660 					      MOVE_READ))
2661 			return true;
2662 	}
2663 
2664 	dx = dst->pDrawable->x * SAMPLES_X;
2665 	dy = dst->pDrawable->y * SAMPLES_Y;
2666 
2667 	num_threads = 1;
2668 	if (!NO_GPU_THREADS &&
2669 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0 &&
2670 	    (lerp || is_solid))
2671 		num_threads = sna_use_threads(4*(region.extents.x2 - region.extents.x1),
2672 					      region.extents.y2 - region.extents.y1,
2673 					      4);
2674 
2675 	DBG(("%s: %dx%d, format=%x, op=%d, lerp?=%d, num_threads=%d\n",
2676 	     __FUNCTION__,
2677 	     region.extents.x2 - region.extents.x1,
2678 	     region.extents.y2 - region.extents.y1,
2679 	     dst->format, op, lerp, num_threads));
2680 
2681 	if (num_threads == 1) {
2682 		struct tor tor;
2683 		span_func_t span;
2684 		struct clipped_span clipped;
2685 
2686 		if (!tor_init(&tor, &region.extents, 2*ntrap))
2687 			return true;
2688 
2689 		for (n = 0; n < ntrap; n++) {
2690 			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
2691 			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
2692 				continue;
2693 
2694 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
2695 		}
2696 
2697 		if (lerp) {
2698 			struct inplace inplace;
2699 			PixmapPtr pixmap;
2700 			int16_t dst_x, dst_y;
2701 
2702 			pixmap = get_drawable_pixmap(dst->pDrawable);
2703 
2704 			inplace.ptr = pixmap->devPrivate.ptr;
2705 			if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
2706 				inplace.ptr += dst_y * pixmap->devKind + dst_x * 4;
2707 			inplace.stride = pixmap->devKind;
2708 			inplace.color = color;
2709 
2710 			span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
2711 			DBG(("%s: render inplace op=%d, color=%08x\n",
2712 			     __FUNCTION__, op, color));
2713 
2714 			if (sigtrap_get() == 0) {
2715 				tor_render(NULL, &tor,
2716 					   (void*)&inplace, (void*)&clipped,
2717 					   span, false);
2718 				sigtrap_put();
2719 			}
2720 		} else if (is_solid) {
2721 			struct pixman_inplace pi;
2722 
2723 			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2724 			pi.op = op;
2725 			pi.color = color;
2726 
2727 			pi.bits = (uint32_t *)&pi.sx;
2728 			pi.source = pixman_image_create_bits(PIXMAN_a8r8g8b8,
2729 							     1, 1, pi.bits, 0);
2730 			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
2731 
2732 			span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
2733 			if (sigtrap_get() == 0) {
2734 				tor_render(NULL, &tor,
2735 					   (void*)&pi, (void*)&clipped,
2736 					    span, false);
2737 				sigtrap_put();
2738 			}
2739 
2740 			pixman_image_unref(pi.source);
2741 			pixman_image_unref(pi.image);
2742 		} else {
2743 			struct pixman_inplace pi;
2744 			int16_t x0, y0;
2745 
2746 			trapezoid_origin(&traps[0].left, &x0, &y0);
2747 
2748 			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
2749 			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
2750 			pi.sx += src_x - x0;
2751 			pi.sy += src_y - y0;
2752 			pi.mask = pixman_image_create_bits(PIXMAN_a8, 1, 1, NULL, 0);
2753 			pixman_image_set_repeat(pi.mask, PIXMAN_REPEAT_NORMAL);
2754 			pi.bits = pixman_image_get_data(pi.mask);
2755 			pi.op = op;
2756 
2757 			span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
2758 			if (sigtrap_get() == 0) {
2759 				tor_render(NULL, &tor,
2760 					   (void*)&pi, (void *)&clipped,
2761 					   span, false);
2762 				sigtrap_put();
2763 			}
2764 
2765 			pixman_image_unref(pi.mask);
2766 			pixman_image_unref(pi.source);
2767 			pixman_image_unref(pi.image);
2768 		}
2769 
2770 		tor_fini(&tor);
2771 	} else {
2772 		struct inplace_x8r8g8b8_thread threads[num_threads];
2773 		int y, h;
2774 
2775 		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
2776 		     __FUNCTION__, num_threads,
2777 		     region.extents.x2 - region.extents.x1,
2778 		     region.extents.y2 - region.extents.y1));
2779 
2780 		threads[0].traps = traps;
2781 		threads[0].ntrap = ntrap;
2782 		threads[0].extents = region.extents;
2783 		threads[0].lerp = lerp;
2784 		threads[0].is_solid = is_solid;
2785 		threads[0].color = color;
2786 		threads[0].dx = dx;
2787 		threads[0].dy = dy;
2788 		threads[0].dst = dst;
2789 		threads[0].src = src;
2790 		threads[0].op = op;
2791 		threads[0].src_x = src_x;
2792 		threads[0].src_y = src_y;
2793 
2794 		y = region.extents.y1;
2795 		h = region.extents.y2 - region.extents.y1;
2796 		h = (h + num_threads - 1) / num_threads;
2797 		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
2798 
2799 		if (sigtrap_get() == 0) {
2800 			for (n = 1; n < num_threads; n++) {
2801 				threads[n] = threads[0];
2802 				threads[n].extents.y1 = y;
2803 				threads[n].extents.y2 = y += h;
2804 
2805 				sna_threads_run(n, inplace_x8r8g8b8_thread, &threads[n]);
2806 			}
2807 
2808 			assert(y < threads[0].extents.y2);
2809 			threads[0].extents.y1 = y;
2810 			inplace_x8r8g8b8_thread(&threads[0]);
2811 
2812 			sna_threads_wait();
2813 			sigtrap_put();
2814 		} else
2815 			sna_threads_kill(); /* leaks thread allocations */
2816 	}
2817 
2818 	return true;
2819 }
2820 
2821 struct inplace_thread {
2822 	xTrapezoid *traps;
2823 	span_func_t span;
2824 	struct inplace inplace;
2825 	struct clipped_span clipped;
2826 	BoxRec extents;
2827 	int dx, dy;
2828 	int draw_x, draw_y;
2829 	bool unbounded;
2830 	int ntrap;
2831 };
2832 
inplace_thread(void * arg)2833 static void inplace_thread(void *arg)
2834 {
2835 	struct inplace_thread *thread = arg;
2836 	struct tor tor;
2837 	int n;
2838 
2839 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
2840 		return;
2841 
2842 	for (n = 0; n < thread->ntrap; n++) {
2843 		if (pixman_fixed_to_int(thread->traps[n].top) >= thread->extents.y2 - thread->draw_y ||
2844 		    pixman_fixed_to_int(thread->traps[n].bottom) < thread->extents.y1 - thread->draw_y)
2845 			continue;
2846 
2847 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
2848 	}
2849 
2850 	tor_render(NULL, &tor,
2851 		   (void*)&thread->inplace, (void*)&thread->clipped,
2852 		   thread->span, thread->unbounded);
2853 
2854 	tor_fini(&tor);
2855 }
2856 
2857 bool
precise_trapezoid_span_inplace(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps,bool fallback)2858 precise_trapezoid_span_inplace(struct sna *sna,
2859 			       CARD8 op, PicturePtr src, PicturePtr dst,
2860 			       PictFormatPtr maskFormat, unsigned flags,
2861 			       INT16 src_x, INT16 src_y,
2862 			       int ntrap, xTrapezoid *traps,
2863 			       bool fallback)
2864 {
2865 	struct inplace inplace;
2866 	struct clipped_span clipped;
2867 	span_func_t span;
2868 	PixmapPtr pixmap;
2869 	struct sna_pixmap *priv;
2870 	RegionRec region;
2871 	uint32_t color;
2872 	bool unbounded;
2873 	int16_t dst_x, dst_y;
2874 	int dx, dy;
2875 	int num_threads, n;
2876 
2877 	if (NO_PRECISE)
2878 		return false;
2879 
2880 	if (dst->format == PICT_a8r8g8b8 || dst->format == PICT_x8r8g8b8)
2881 		return trapezoid_span_inplace__x8r8g8b8(op, dst,
2882 							src, src_x, src_y,
2883 							maskFormat, flags,
2884 							ntrap, traps);
2885 
2886 	if (!sna_picture_is_solid(src, &color)) {
2887 		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
2888 		     __FUNCTION__));
2889 		return false;
2890 	}
2891 
2892 	if (dst->format != PICT_a8) {
2893 		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
2894 		     __FUNCTION__, dst->format));
2895 		return false;
2896 	}
2897 
2898 	pixmap = get_drawable_pixmap(dst->pDrawable);
2899 
2900 	unbounded = false;
2901 	priv = sna_pixmap(pixmap);
2902 	if (priv) {
2903 		switch (op) {
2904 		case PictOpAdd:
2905 			if (priv->clear && priv->clear_color == 0) {
2906 				unbounded = true;
2907 				op = PictOpSrc;
2908 			}
2909 			if ((color >> 24) == 0)
2910 				return true;
2911 			break;
2912 		case PictOpIn:
2913 			if (priv->clear && priv->clear_color == 0)
2914 				return true;
2915 			if (priv->clear && priv->clear_color == 0xff)
2916 				op = PictOpSrc;
2917 			unbounded = true;
2918 			break;
2919 		case PictOpSrc:
2920 			unbounded = true;
2921 			break;
2922 		default:
2923 			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2924 			     __FUNCTION__, op));
2925 			return false;
2926 		}
2927 	} else {
2928 		switch (op) {
2929 		case PictOpAdd:
2930 			if ((color >> 24) == 0)
2931 				return true;
2932 			break;
2933 		case PictOpIn:
2934 		case PictOpSrc:
2935 			unbounded = true;
2936 			break;
2937 		default:
2938 			DBG(("%s: fallback -- can not perform op [%d] in place\n",
2939 			     __FUNCTION__, op));
2940 			return false;
2941 		}
2942 	}
2943 
2944 	DBG(("%s: format=%x, op=%d, color=%x\n",
2945 	     __FUNCTION__, dst->format, op, color));
2946 
2947 	if (maskFormat == NULL && ntrap > 1) {
2948 		DBG(("%s: individual rasterisation requested\n",
2949 		     __FUNCTION__));
2950 		do {
2951 			/* XXX unwind errors? */
2952 			if (!precise_trapezoid_span_inplace(sna, op, src, dst, NULL, flags,
2953 							    src_x, src_y, 1, traps++,
2954 							    fallback))
2955 				return false;
2956 		} while (--ntrap);
2957 		return true;
2958 	}
2959 
2960 	if (!trapezoids_bounds(ntrap, traps, &region.extents))
2961 		return true;
2962 
2963 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
2964 	     __FUNCTION__,
2965 	     region.extents.x1, region.extents.y1,
2966 	     region.extents.x2, region.extents.y2));
2967 
2968 	if (!sna_compute_composite_extents(&region.extents,
2969 					   NULL, NULL, dst,
2970 					   0, 0,
2971 					   0, 0,
2972 					   region.extents.x1, region.extents.y1,
2973 					   region.extents.x2 - region.extents.x1,
2974 					   region.extents.y2 - region.extents.y1))
2975 		return true;
2976 
2977 	DBG(("%s: clipped extents (%d, %d), (%d, %d) [complex clip? %d]\n",
2978 	     __FUNCTION__,
2979 	     region.extents.x1, region.extents.y1,
2980 	     region.extents.x2, region.extents.y2,
2981 	     dst->pCompositeClip->data != NULL));
2982 
2983 	if (op == PictOpSrc) {
2984 		span = tor_blt_src;
2985 	} else if (op == PictOpIn) {
2986 		span = tor_blt_in;
2987 	} else {
2988 		assert(op == PictOpAdd);
2989 		span = tor_blt_add;
2990 	}
2991 
2992 	DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__));
2993 	region.data = NULL;
2994 	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
2995 					     op == PictOpSrc ? MOVE_WRITE | MOVE_INPLACE_HINT : MOVE_WRITE | MOVE_READ))
2996 		return true;
2997 
2998 	dx = dst->pDrawable->x * SAMPLES_X;
2999 	dy = dst->pDrawable->y * SAMPLES_Y;
3000 
3001 	inplace.ptr = pixmap->devPrivate.ptr;
3002 	if (get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y))
3003 		inplace.ptr += dst_y * pixmap->devKind + dst_x;
3004 	inplace.stride = pixmap->devKind;
3005 	inplace.opacity = color >> 24;
3006 
3007 	span = clipped_span(&clipped, span, dst->pCompositeClip);
3008 
3009 	num_threads = 1;
3010 	if (!NO_GPU_THREADS &&
3011 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3012 		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
3013 					      region.extents.y2 - region.extents.y1,
3014 					      4);
3015 	if (num_threads == 1) {
3016 		struct tor tor;
3017 
3018 		if (!tor_init(&tor, &region.extents, 2*ntrap))
3019 			return true;
3020 
3021 		for (n = 0; n < ntrap; n++) {
3022 
3023 			if (pixman_fixed_to_int(traps[n].top) >= region.extents.y2 - dst->pDrawable->y ||
3024 			    pixman_fixed_to_int(traps[n].bottom) < region.extents.y1 - dst->pDrawable->y)
3025 				continue;
3026 
3027 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3028 		}
3029 
3030 		if (sigtrap_get() == 0) {
3031 			tor_render(NULL, &tor,
3032 				   (void*)&inplace, (void *)&clipped,
3033 				   span, unbounded);
3034 			sigtrap_put();
3035 		}
3036 
3037 		tor_fini(&tor);
3038 	} else {
3039 		struct inplace_thread threads[num_threads];
3040 		int y, h;
3041 
3042 		DBG(("%s: using %d threads for inplace compositing %dx%d\n",
3043 		     __FUNCTION__, num_threads,
3044 		     region.extents.x2 - region.extents.x1,
3045 		     region.extents.y2 - region.extents.y1));
3046 
3047 		threads[0].traps = traps;
3048 		threads[0].ntrap = ntrap;
3049 		threads[0].inplace = inplace;
3050 		threads[0].extents = region.extents;
3051 		threads[0].clipped = clipped;
3052 		threads[0].span = span;
3053 		threads[0].unbounded = unbounded;
3054 		threads[0].dx = dx;
3055 		threads[0].dy = dy;
3056 		threads[0].draw_x = dst->pDrawable->x;
3057 		threads[0].draw_y = dst->pDrawable->y;
3058 
3059 		y = region.extents.y1;
3060 		h = region.extents.y2 - region.extents.y1;
3061 		h = (h + num_threads - 1) / num_threads;
3062 		num_threads -= (num_threads-1) * h >= region.extents.y2 - region.extents.y1;
3063 
3064 		if (sigtrap_get() == 0) {
3065 			for (n = 1; n < num_threads; n++) {
3066 				threads[n] = threads[0];
3067 				threads[n].extents.y1 = y;
3068 				threads[n].extents.y2 = y += h;
3069 
3070 				sna_threads_run(n, inplace_thread, &threads[n]);
3071 			}
3072 
3073 			assert(y < threads[0].extents.y2);
3074 			threads[0].extents.y1 = y;
3075 			inplace_thread(&threads[0]);
3076 
3077 			sna_threads_wait();
3078 			sigtrap_put();
3079 		} else
3080 			sna_threads_kill(); /* leaks thread allocations */
3081 	}
3082 
3083 	return true;
3084 }
3085 
3086 bool
precise_trapezoid_span_fallback(CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,unsigned flags,INT16 src_x,INT16 src_y,int ntrap,xTrapezoid * traps)3087 precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
3088 				PictFormatPtr maskFormat, unsigned flags,
3089 				INT16 src_x, INT16 src_y,
3090 				int ntrap, xTrapezoid *traps)
3091 {
3092 	ScreenPtr screen = dst->pDrawable->pScreen;
3093 	PixmapPtr scratch;
3094 	PicturePtr mask;
3095 	BoxRec extents;
3096 	int16_t dst_x, dst_y;
3097 	int dx, dy, num_threads;
3098 	int error, n;
3099 
3100 	if (NO_PRECISE)
3101 		return false;
3102 
3103 	if (maskFormat == NULL && ntrap > 1) {
3104 		DBG(("%s: individual rasterisation requested\n",
3105 		     __FUNCTION__));
3106 		do {
3107 			/* XXX unwind errors? */
3108 			if (!precise_trapezoid_span_fallback(op, src, dst, NULL, flags,
3109 							     src_x, src_y, 1, traps++))
3110 				return false;
3111 		} while (--ntrap);
3112 		return true;
3113 	}
3114 
3115 	if (!trapezoids_bounds(ntrap, traps, &extents))
3116 		return true;
3117 
3118 	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
3119 	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
3120 
3121 	if (!sna_compute_composite_extents(&extents,
3122 					   src, NULL, dst,
3123 					   src_x, src_y,
3124 					   0, 0,
3125 					   extents.x1, extents.y1,
3126 					   extents.x2 - extents.x1,
3127 					   extents.y2 - extents.y1))
3128 		return true;
3129 
3130 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3131 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3132 
3133 	extents.y2 -= extents.y1;
3134 	extents.x2 -= extents.x1;
3135 	extents.x1 -= dst->pDrawable->x;
3136 	extents.y1 -= dst->pDrawable->y;
3137 	dst_x = extents.x1;
3138 	dst_y = extents.y1;
3139 	dx = -extents.x1 * SAMPLES_X;
3140 	dy = -extents.y1 * SAMPLES_Y;
3141 	extents.x1 = extents.y1 = 0;
3142 
3143 	DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
3144 	     __FUNCTION__, extents.x2, extents.y2, dx, dy));
3145 	scratch = sna_pixmap_create_unattached(screen,
3146 					       extents.x2, extents.y2, 8);
3147 	if (!scratch)
3148 		return true;
3149 
3150 	DBG(("%s: created buffer %p, stride %d\n",
3151 	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
3152 
3153 	num_threads = 1;
3154 	if (!NO_GPU_THREADS &&
3155 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
3156 		num_threads = sna_use_threads(extents.x2 - extents.x1,
3157 					      extents.y2 - extents.y1,
3158 					      4);
3159 	if (num_threads == 1) {
3160 		struct tor tor;
3161 
3162 		if (!tor_init(&tor, &extents, 2*ntrap)) {
3163 			sna_pixmap_destroy(scratch);
3164 			return true;
3165 		}
3166 
3167 		for (n = 0; n < ntrap; n++) {
3168 			if (pixman_fixed_to_int(traps[n].top) - dst_y >= extents.y2 ||
3169 			    pixman_fixed_to_int(traps[n].bottom) - dst_y < 0)
3170 				continue;
3171 
3172 			tor_add_trapezoid(&tor, &traps[n], dx, dy);
3173 		}
3174 
3175 		if (extents.x2 <= TOR_INPLACE_SIZE) {
3176 			tor_inplace(&tor, scratch);
3177 		} else {
3178 			tor_render(NULL, &tor,
3179 				   scratch->devPrivate.ptr,
3180 				   (void *)(intptr_t)scratch->devKind,
3181 				   tor_blt_mask,
3182 				   true);
3183 		}
3184 		tor_fini(&tor);
3185 	} else {
3186 		struct mask_thread threads[num_threads];
3187 		int y, h;
3188 
3189 		DBG(("%s: using %d threads for mask compositing %dx%d\n",
3190 		     __FUNCTION__, num_threads,
3191 		     extents.x2 - extents.x1,
3192 		     extents.y2 - extents.y1));
3193 
3194 		threads[0].scratch = scratch;
3195 		threads[0].traps = traps;
3196 		threads[0].ntrap = ntrap;
3197 		threads[0].extents = extents;
3198 		threads[0].dx = dx;
3199 		threads[0].dy = dy;
3200 		threads[0].dst_y = dst_y;
3201 
3202 		y = extents.y1;
3203 		h = extents.y2 - extents.y1;
3204 		h = (h + num_threads - 1) / num_threads;
3205 		num_threads -= (num_threads-1) * h >= extents.y2 - extents.y1;
3206 
3207 		for (n = 1; n < num_threads; n++) {
3208 			threads[n] = threads[0];
3209 			threads[n].extents.y1 = y;
3210 			threads[n].extents.y2 = y += h;
3211 
3212 			sna_threads_run(n, mask_thread, &threads[n]);
3213 		}
3214 
3215 		assert(y < threads[0].extents.y2);
3216 		threads[0].extents.y1 = y;
3217 		mask_thread(&threads[0]);
3218 
3219 		sna_threads_wait();
3220 	}
3221 
3222 	mask = CreatePicture(0, &scratch->drawable,
3223 			     PictureMatchFormat(screen, 8, PICT_a8),
3224 			     0, 0, serverClient, &error);
3225 	if (mask) {
3226 		RegionRec region;
3227 		int16_t x0, y0;
3228 
3229 		region.extents.x1 = dst_x + dst->pDrawable->x;
3230 		region.extents.y1 = dst_y + dst->pDrawable->y;
3231 		region.extents.x2 = region.extents.x1 + extents.x2;
3232 		region.extents.y2 = region.extents.y1 + extents.y2;
3233 		region.data = NULL;
3234 
3235 		trapezoid_origin(&traps[0].left, &x0, &y0);
3236 
3237 		DBG(("%s: fbComposite()\n", __FUNCTION__));
3238 		sna_composite_fb(op, src, mask, dst, &region,
3239 				 src_x + dst_x - x0, src_y + dst_y - y0,
3240 				 0, 0,
3241 				 dst_x, dst_y,
3242 				 extents.x2, extents.y2);
3243 
3244 		FreePicture(mask, 0);
3245 	}
3246 	sna_pixmap_destroy(scratch);
3247 
3248 	return true;
3249 }
3250 
3251 struct tristrip_thread {
3252 	struct sna *sna;
3253 	const struct sna_composite_spans_op *op;
3254 	const xPointFixed *points;
3255 	RegionPtr clip;
3256 	span_func_t span;
3257 	BoxRec extents;
3258 	int dx, dy, draw_y;
3259 	int count;
3260 	bool unbounded;
3261 };
3262 
3263 static void
tristrip_thread(void * arg)3264 tristrip_thread(void *arg)
3265 {
3266 	struct tristrip_thread *thread = arg;
3267 	struct span_thread_boxes boxes;
3268 	struct tor tor;
3269 	int n, cw, ccw;
3270 
3271 	if (!tor_init(&tor, &thread->extents, 2*thread->count))
3272 		return;
3273 
3274 	span_thread_boxes_init(&boxes, thread->op, thread->clip);
3275 
3276 	cw = 0; ccw = 1;
3277 	polygon_add_line(tor.polygon,
3278 			 &thread->points[ccw], &thread->points[cw],
3279 			 thread->dx, thread->dy);
3280 	n = 2;
3281 	do {
3282 		polygon_add_line(tor.polygon,
3283 				 &thread->points[cw], &thread->points[n],
3284 				 thread->dx, thread->dy);
3285 		cw = n;
3286 		if (++n == thread->count)
3287 			break;
3288 
3289 		polygon_add_line(tor.polygon,
3290 				 &thread->points[n], &thread->points[ccw],
3291 				 thread->dx, thread->dy);
3292 		ccw = n;
3293 		if (++n == thread->count)
3294 			break;
3295 	} while (1);
3296 	polygon_add_line(tor.polygon,
3297 			 &thread->points[cw], &thread->points[ccw],
3298 			 thread->dx, thread->dy);
3299 	assert(tor.polygon->num_edges <= 2*thread->count);
3300 
3301 	tor_render(thread->sna, &tor,
3302 		   (struct sna_composite_spans_op *)&boxes, thread->clip,
3303 		   thread->span, thread->unbounded);
3304 
3305 	tor_fini(&tor);
3306 
3307 	if (boxes.num_boxes) {
3308 		DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
3309 		assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
3310 		thread->op->thread_boxes(thread->sna, thread->op,
3311 					 boxes.boxes, boxes.num_boxes);
3312 	}
3313 }
3314 
3315 bool
precise_tristrip_span_converter(struct sna * sna,CARD8 op,PicturePtr src,PicturePtr dst,PictFormatPtr maskFormat,INT16 src_x,INT16 src_y,int count,xPointFixed * points)3316 precise_tristrip_span_converter(struct sna *sna,
3317 				CARD8 op, PicturePtr src, PicturePtr dst,
3318 				PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
3319 				int count, xPointFixed *points)
3320 {
3321 	struct sna_composite_spans_op tmp;
3322 	BoxRec extents;
3323 	pixman_region16_t clip;
3324 	int16_t dst_x, dst_y;
3325 	int dx, dy, num_threads;
3326 	bool was_clear;
3327 
3328 	if (!sna->render.check_composite_spans(sna, op, src, dst, 0, 0, 0)) {
3329 		DBG(("%s: fallback -- composite spans not supported\n",
3330 		     __FUNCTION__));
3331 		return false;
3332 	}
3333 
3334 	dst_x = pixman_fixed_to_int(points[0].x);
3335 	dst_y = pixman_fixed_to_int(points[0].y);
3336 
3337 	miPointFixedBounds(count, points, &extents);
3338 	DBG(("%s: extents (%d, %d), (%d, %d)\n",
3339 	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
3340 
3341 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
3342 		return true;
3343 
3344 #if 0
3345 	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
3346 		DBG(("%s: fallback -- traps extents too small %dx%d\n",
3347 		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
3348 		return false;
3349 	}
3350 #endif
3351 
3352 	if (!sna_compute_composite_region(&clip,
3353 					  src, NULL, dst,
3354 					  src_x + extents.x1 - dst_x,
3355 					  src_y + extents.y1 - dst_y,
3356 					  0, 0,
3357 					  extents.x1, extents.y1,
3358 					  extents.x2 - extents.x1,
3359 					  extents.y2 - extents.y1)) {
3360 		DBG(("%s: triangles do not intersect drawable clips\n",
3361 		     __FUNCTION__)) ;
3362 		return true;
3363 	}
3364 
3365 	if (!sna->render.check_composite_spans(sna, op, src, dst,
3366 					       clip.extents.x2 - clip.extents.x1,
3367 					       clip.extents.y2 - clip.extents.y1,
3368 					       0)) {
3369 		DBG(("%s: fallback -- composite spans not supported\n",
3370 		     __FUNCTION__));
3371 		return false;
3372 	}
3373 
3374 	extents = *RegionExtents(&clip);
3375 	dx = dst->pDrawable->x;
3376 	dy = dst->pDrawable->y;
3377 
3378 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
3379 	     __FUNCTION__,
3380 	     extents.x1, extents.y1,
3381 	     extents.x2, extents.y2,
3382 	     dx, dy,
3383 	     src_x + extents.x1 - dst_x - dx,
3384 	     src_y + extents.y1 - dst_y - dy));
3385 
3386 	was_clear = sna_drawable_is_clear(dst->pDrawable);
3387 
3388 	memset(&tmp, 0, sizeof(tmp));
3389 	if (!sna->render.composite_spans(sna, op, src, dst,
3390 					 src_x + extents.x1 - dst_x - dx,
3391 					 src_y + extents.y1 - dst_y - dy,
3392 					 extents.x1,  extents.y1,
3393 					 extents.x2 - extents.x1,
3394 					 extents.y2 - extents.y1,
3395 					 0,
3396 					 &tmp)) {
3397 		DBG(("%s: fallback -- composite spans render op not supported\n",
3398 		     __FUNCTION__));
3399 		return false;
3400 	}
3401 
3402 	dx *= SAMPLES_X;
3403 	dy *= SAMPLES_Y;
3404 
3405 	num_threads = 1;
3406 	if (!NO_GPU_THREADS &&
3407 	    tmp.thread_boxes &&
3408 	    thread_choose_span(&tmp, dst, maskFormat, &clip))
3409 		num_threads = sna_use_threads(extents.x2 - extents.x1,
3410 					      extents.y2 - extents.y1,
3411 					      16);
3412 	if (num_threads == 1) {
3413 		struct tor tor;
3414 		int cw, ccw, n;
3415 
3416 		if (!tor_init(&tor, &extents, 2*count))
3417 			goto skip;
3418 
3419 		cw = 0; ccw = 1;
3420 		polygon_add_line(tor.polygon,
3421 				 &points[ccw], &points[cw],
3422 				 dx, dy);
3423 		n = 2;
3424 		do {
3425 			polygon_add_line(tor.polygon,
3426 					 &points[cw], &points[n],
3427 					 dx, dy);
3428 			cw = n;
3429 			if (++n == count)
3430 				break;
3431 
3432 			polygon_add_line(tor.polygon,
3433 					 &points[n], &points[ccw],
3434 					 dx, dy);
3435 			ccw = n;
3436 			if (++n == count)
3437 				break;
3438 		} while (1);
3439 		polygon_add_line(tor.polygon,
3440 				 &points[cw], &points[ccw],
3441 				 dx, dy);
3442 		assert(tor.polygon->num_edges <= 2*count);
3443 
3444 		tor_render(sna, &tor, &tmp, &clip,
3445 			   choose_span(&tmp, dst, maskFormat, &clip),
3446 			   !was_clear && maskFormat && !operator_is_bounded(op));
3447 
3448 		tor_fini(&tor);
3449 	} else {
3450 		struct tristrip_thread threads[num_threads];
3451 		int y, h, n;
3452 
3453 		DBG(("%s: using %d threads for tristrip compositing %dx%d\n",
3454 		     __FUNCTION__, num_threads,
3455 		     clip.extents.x2 - clip.extents.x1,
3456 		     clip.extents.y2 - clip.extents.y1));
3457 
3458 		threads[0].sna = sna;
3459 		threads[0].op = &tmp;
3460 		threads[0].points = points;
3461 		threads[0].count = count;
3462 		threads[0].extents = clip.extents;
3463 		threads[0].clip = &clip;
3464 		threads[0].dx = dx;
3465 		threads[0].dy = dy;
3466 		threads[0].draw_y = dst->pDrawable->y;
3467 		threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
3468 		threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
3469 
3470 		y = clip.extents.y1;
3471 		h = clip.extents.y2 - clip.extents.y1;
3472 		h = (h + num_threads - 1) / num_threads;
3473 		num_threads -= (num_threads-1) * h >= clip.extents.y2 - clip.extents.y1;
3474 
3475 		for (n = 1; n < num_threads; n++) {
3476 			threads[n] = threads[0];
3477 			threads[n].extents.y1 = y;
3478 			threads[n].extents.y2 = y += h;
3479 
3480 			sna_threads_run(n, tristrip_thread, &threads[n]);
3481 		}
3482 
3483 		assert(y < threads[0].extents.y2);
3484 		threads[0].extents.y1 = y;
3485 		tristrip_thread(&threads[0]);
3486 
3487 		sna_threads_wait();
3488 	}
3489 skip:
3490 	tmp.done(sna, &tmp);
3491 
3492 	REGION_UNINIT(NULL, &clip);
3493 	return true;
3494 }
3495 
3496 bool
precise_trap_span_converter(struct sna * sna,PicturePtr dst,INT16 src_x,INT16 src_y,int ntrap,xTrap * trap)3497 precise_trap_span_converter(struct sna *sna,
3498 			    PicturePtr dst,
3499 			    INT16 src_x, INT16 src_y,
3500 			    int ntrap, xTrap *trap)
3501 {
3502 	struct sna_composite_spans_op tmp;
3503 	struct tor tor;
3504 	BoxRec extents;
3505 	pixman_region16_t *clip;
3506 	int dx, dy, n;
3507 
3508 	if (dst->pDrawable->depth < 8)
3509 		return false;
3510 
3511 	if (!sna->render.check_composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3512 					       dst->pCompositeClip->extents.x2 - dst->pCompositeClip->extents.x1,
3513 					       dst->pCompositeClip->extents.y2 - dst->pCompositeClip->extents.y1,
3514 					       0)) {
3515 		DBG(("%s: fallback -- composite spans not supported\n",
3516 		     __FUNCTION__));
3517 		return false;
3518 	}
3519 
3520 	clip = dst->pCompositeClip;
3521 	extents = *RegionExtents(clip);
3522 	dx = dst->pDrawable->x;
3523 	dy = dst->pDrawable->y;
3524 
3525 	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d)\n",
3526 	     __FUNCTION__,
3527 	     extents.x1, extents.y1,
3528 	     extents.x2, extents.y2,
3529 	     dx, dy));
3530 
3531 	memset(&tmp, 0, sizeof(tmp));
3532 	if (!sna->render.composite_spans(sna, PictOpAdd, sna->render.white_picture, dst,
3533 					 0, 0,
3534 					 extents.x1,  extents.y1,
3535 					 extents.x2 - extents.x1,
3536 					 extents.y2 - extents.y1,
3537 					 0,
3538 					 &tmp)) {
3539 		DBG(("%s: fallback -- composite spans render op not supported\n",
3540 		     __FUNCTION__));
3541 		return false;
3542 	}
3543 
3544 	dx *= SAMPLES_X;
3545 	dy *= SAMPLES_Y;
3546 	if (!tor_init(&tor, &extents, 2*ntrap))
3547 		goto skip;
3548 
3549 	for (n = 0; n < ntrap; n++) {
3550 		xPointFixed p1, p2;
3551 
3552 		if (pixman_fixed_to_int(trap[n].top.y) + dst->pDrawable->y >= extents.y2 ||
3553 		    pixman_fixed_to_int(trap[n].bot.y) + dst->pDrawable->y < extents.y1)
3554 			continue;
3555 
3556 		p1.y = trap[n].top.y;
3557 		p2.y = trap[n].bot.y;
3558 		p1.x = trap[n].top.l;
3559 		p2.x = trap[n].bot.l;
3560 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3561 
3562 		p1.y = trap[n].bot.y;
3563 		p2.y = trap[n].top.y;
3564 		p1.x = trap[n].top.r;
3565 		p2.x = trap[n].bot.r;
3566 		polygon_add_line(tor.polygon, &p1, &p2, dx, dy);
3567 	}
3568 
3569 	tor_render(sna, &tor, &tmp, clip,
3570 		   choose_span(&tmp, dst, NULL, clip), false);
3571 
3572 	tor_fini(&tor);
3573 skip:
3574 	tmp.done(sna, &tmp);
3575 	return true;
3576 }
3577