1 /* cairo - a vector graphics library with display and print output
2  *
3  * Copyright © 2009 Kristian Høgsberg
4  * Copyright © 2009 Chris Wilson
5  * Copyright © 2009 Intel Corporation
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it either under the terms of the GNU Lesser General Public
9  * License version 2.1 as published by the Free Software Foundation
10  * (the "LGPL") or, at your option, under the terms of the Mozilla
11  * Public License Version 1.1 (the "MPL"). If you do not alter this
12  * notice, a recipient may use your version of this file under either
13  * the MPL or the LGPL.
14  *
15  * You should have received a copy of the LGPL along with this library
16  * in the file COPYING-LGPL-2.1; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
18  * You should have received a copy of the MPL along with this library
19  * in the file COPYING-MPL-1.1
20  *
21  * The contents of this file are subject to the Mozilla Public License
22  * Version 1.1 (the "License"); you may not use this file except in
23  * compliance with the License. You may obtain a copy of the License at
24  * http://www.mozilla.org/MPL/
25  *
26  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
27  * OF ANY KIND, either express or implied. See the LGPL or the MPL for
28  * the specific language governing rights and limitations.
29  *
30  * The Original Code is the cairo graphics library.
31  *
32  * Contributor(s):
33  *	Chris Wilson <chris@chris-wilson.co.uk>
34  *      Kristian Høgsberg <krh@bitplanet.net>
35  */
36 
37 #include "cairoint.h"
38 
39 #include "cairo-error-private.h"
40 #include "cairo-drm-i965-private.h"
41 #include "cairo-surface-subsurface-private.h"
42 #include "cairo-surface-snapshot-private.h"
43 
44 #include "cairo-drm-intel-brw-eu.h"
45 
46 /* Theory of shaders:
47  *
48  * 3 types of rectangular inputs:
49  *  (a) standard composite: x,y, use source, mask matrices to compute texcoords
50  *  (b) spans: x,y, alpha, use source matrix
51  *  (c) glyphs: x,y, s,t, use source matrix
52  *
53  * 5 types of pixel shaders:
54  *  (a) Solid colour
55  *  (b) Linear gradient (via 1D texture, with precomputed tex)
56  *  (c) Radial gradient (per-pixel s computation, 1D texture)
57  *  (d) Spans (mask only): apply opacity
58  *  (e) Texture (includes glyphs).
59  *
60  *  Clip masks are limited to 2D textures only.
61  */
62 
63 /* XXX dual source blending for LERP + ComponentAlpha!!! */
64 
65 #define BRW_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
66 
67 #define SF_KERNEL_NUM_GRF  1
68 #define SF_MAX_THREADS	   24
69 
70 #define PS_MAX_THREADS_CTG 50
71 #define PS_MAX_THREADS_BRW 32
72 
73 #define URB_CS_ENTRY_SIZE     3 /* We need 4 matrices + 2 sources */
74 #define URB_CS_ENTRIES	      4 /* 4x sets of CONSTANT_BUFFER */
75 
76 #define URB_VS_ENTRY_SIZE     1
77 #define URB_VS_ENTRIES	      8
78 
79 #define URB_GS_ENTRY_SIZE     0
80 #define URB_GS_ENTRIES	      0
81 
82 #define URB_CLIP_ENTRY_SIZE   0
83 #define URB_CLIP_ENTRIES      0
84 
85 #define URB_SF_ENTRY_SIZE     1
86 #define URB_SF_ENTRIES	      (SF_MAX_THREADS + 1)
87 
88 static void
i965_pipelined_flush(i965_device_t * device)89 i965_pipelined_flush (i965_device_t *device)
90 {
91     intel_bo_t *bo, *next;
92 
93     if (device->batch.used == 0)
94 	return;
95 
96     OUT_BATCH (BRW_PIPE_CONTROL |
97 	       BRW_PIPE_CONTROL_NOWRITE |
98 	       BRW_PIPE_CONTROL_WC_FLUSH |
99 	       2);
100     OUT_BATCH(0);   /* Destination address */
101     OUT_BATCH(0);   /* Immediate data low DW */
102     OUT_BATCH(0);   /* Immediate data high DW */
103 
104     cairo_list_foreach_entry_safe (bo, next, intel_bo_t, &device->flush, link) {
105 	bo->batch_write_domain = 0;
106 	cairo_list_init (&bo->link);
107     }
108     cairo_list_init (&device->flush);
109 }
110 
111 static cairo_status_t
i965_shader_acquire_solid(i965_shader_t * shader,union i965_shader_channel * src,const cairo_solid_pattern_t * solid,const cairo_rectangle_int_t * extents)112 i965_shader_acquire_solid (i965_shader_t *shader,
113 			   union i965_shader_channel *src,
114 			   const cairo_solid_pattern_t *solid,
115 			   const cairo_rectangle_int_t *extents)
116 {
117     src->type.fragment = FS_CONSTANT;
118     src->type.vertex = VS_NONE;
119     src->type.pattern = PATTERN_SOLID;
120 
121     src->base.content = _cairo_color_get_content (&solid->color);
122     src->base.constants[0] = solid->color.red   * solid->color.alpha;
123     src->base.constants[1] = solid->color.green * solid->color.alpha;
124     src->base.constants[2] = solid->color.blue  * solid->color.alpha;
125     src->base.constants[3] = solid->color.alpha;
126     src->base.constants_size = 4;
127 
128     return CAIRO_STATUS_SUCCESS;
129 }
130 
131 static cairo_status_t
i965_shader_acquire_linear(i965_shader_t * shader,union i965_shader_channel * src,const cairo_linear_pattern_t * linear,const cairo_rectangle_int_t * extents)132 i965_shader_acquire_linear (i965_shader_t *shader,
133 			    union i965_shader_channel *src,
134 			    const cairo_linear_pattern_t *linear,
135 			    const cairo_rectangle_int_t *extents)
136 {
137     intel_buffer_t buffer;
138     cairo_status_t status;
139     double x0, y0, sf;
140     double dx, dy, offset;
141 
142     status = intel_gradient_render (&i965_device (shader->target)->intel,
143 				    &linear->base, &buffer);
144     if (unlikely (status))
145 	return status;
146 
147     src->type.vertex = VS_NONE;
148     src->type.pattern = PATTERN_LINEAR;
149     src->type.fragment = FS_LINEAR;
150     src->base.bo = buffer.bo;
151     src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
152     src->base.format = buffer.format;
153     src->base.width  = buffer.width;
154     src->base.height = buffer.height;
155     src->base.stride = buffer.stride;
156     src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR);
157     src->base.extend = i965_extend (linear->base.base.extend);
158 
159     dx = linear->pd2.x - linear->pd1.x;
160     dy = linear->pd2.y - linear->pd1.y;
161     sf = 1. / (dx * dx + dy * dy);
162     dx *= sf;
163     dy *= sf;
164 
165     x0 = linear->pd1.x;
166     y0 = linear->pd1.y;
167     offset = dx*x0 + dy*y0;
168 
169     if (_cairo_matrix_is_identity (&linear->base.base.matrix)) {
170 	src->base.matrix.xx = dx;
171 	src->base.matrix.xy = dy;
172 	src->base.matrix.x0 = -offset;
173     } else {
174 	cairo_matrix_t m;
175 
176 	cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0);
177 	cairo_matrix_multiply (&src->base.matrix, &linear->base.base.matrix, &m);
178     }
179     src->base.matrix.yx = 0.;
180     src->base.matrix.yy = 1.;
181     src->base.matrix.y0 = 0.;
182 
183     return CAIRO_STATUS_SUCCESS;
184 }
185 
186 static cairo_status_t
i965_shader_acquire_radial(i965_shader_t * shader,union i965_shader_channel * src,const cairo_radial_pattern_t * radial,const cairo_rectangle_int_t * extents)187 i965_shader_acquire_radial (i965_shader_t *shader,
188 			    union i965_shader_channel *src,
189 			    const cairo_radial_pattern_t *radial,
190 			    const cairo_rectangle_int_t *extents)
191 {
192     intel_buffer_t buffer;
193     cairo_status_t status;
194     double dx, dy, dr, r1;
195 
196     status = intel_gradient_render (&i965_device (shader->target)->intel,
197 				    &radial->base, &buffer);
198     if (unlikely (status))
199 	return status;
200 
201     src->type.vertex = VS_NONE;
202     src->type.pattern = PATTERN_RADIAL;
203     src->type.fragment = FS_RADIAL;
204     src->base.bo = buffer.bo;
205     src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
206     src->base.format = buffer.format;
207     src->base.width  = buffer.width;
208     src->base.height = buffer.height;
209     src->base.stride = buffer.stride;
210     src->base.filter = i965_filter (CAIRO_FILTER_BILINEAR);
211     src->base.extend = i965_extend (radial->base.base.extend);
212 
213     dx = radial->cd2.center.x - radial->cd1.center.x;
214     dy = radial->cd2.center.y - radial->cd1.center.y;
215     dr = radial->cd2.radius   - radial->cd1.radius;
216 
217     r1 = radial->cd1.radius;
218 
219     if (FALSE && (radial->cd2.center.x == radial->cd1.center.x &&
220 		  radial->cd2.center.y == radial->cd1.center.y))
221     {
222 	/* XXX dr == 0, meaningless with anything other than PAD */
223 	src->base.constants[0] = radial->cd1.center.x / dr;
224 	src->base.constants[1] = radial->cd1.center.y / dr;
225 	src->base.constants[2] = 1. / dr;
226 	src->base.constants[3] = -r1 / dr;
227 
228 	src->base.constants_size = 4;
229 	src->base.mode = RADIAL_ONE;
230     } else {
231 	src->base.constants[0] = -radial->cd1.center.x;
232 	src->base.constants[1] = -radial->cd1.center.y;
233 	src->base.constants[2] = r1;
234 	src->base.constants[3] = -4 * (dx*dx + dy*dy - dr*dr);
235 
236 	src->base.constants[4] = -2 * dx;
237 	src->base.constants[5] = -2 * dy;
238 	src->base.constants[6] = -2 * r1 * dr;
239 	src->base.constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
240 
241 	src->base.constants_size = 8;
242 	src->base.mode = RADIAL_TWO;
243     }
244 
245     return CAIRO_STATUS_SUCCESS;
246 }
247 
248 static cairo_status_t
i965_surface_clone(i965_device_t * device,cairo_image_surface_t * image,i965_surface_t ** clone_out)249 i965_surface_clone (i965_device_t *device,
250 		    cairo_image_surface_t *image,
251 		    i965_surface_t **clone_out)
252 {
253     i965_surface_t *clone;
254     cairo_status_t status;
255 
256     clone = (i965_surface_t *)
257 	i965_surface_create_internal (&device->intel.base,
258 				      image->base.content,
259 				      image->width,
260 				      image->height,
261 				      I965_TILING_DEFAULT,
262 				      FALSE);
263     if (unlikely (clone->intel.drm.base.status))
264 	return clone->intel.drm.base.status;
265 
266     status = intel_bo_put_image (&device->intel,
267 				 to_intel_bo (clone->intel.drm.bo),
268 				 image,
269 				 0, 0,
270 				 image->width, image->height,
271 				 0, 0);
272 
273     if (unlikely (status)) {
274 	cairo_surface_destroy (&clone->intel.drm.base);
275 	return status;
276     }
277 
278     status = intel_snapshot_cache_insert (&device->intel, &clone->intel);
279     if (unlikely (status)) {
280 	cairo_surface_destroy (&clone->intel.drm.base);
281 	return status;
282     }
283 
284     _cairo_surface_attach_snapshot (&image->base,
285 				    &clone->intel.drm.base,
286 				    intel_surface_detach_snapshot);
287 
288     *clone_out = clone;
289     return CAIRO_STATUS_SUCCESS;
290 }
291 
292 static cairo_status_t
i965_surface_clone_subimage(i965_device_t * device,cairo_image_surface_t * image,const cairo_rectangle_int_t * extents,i965_surface_t ** clone_out)293 i965_surface_clone_subimage (i965_device_t *device,
294 			     cairo_image_surface_t *image,
295 			     const cairo_rectangle_int_t *extents,
296 			     i965_surface_t **clone_out)
297 {
298     i965_surface_t *clone;
299     cairo_status_t status;
300 
301     clone = (i965_surface_t *)
302 	i965_surface_create_internal (&device->intel.base,
303 				      image->base.content,
304 				      extents->width,
305 				      extents->height,
306 				      I965_TILING_DEFAULT,
307 				      FALSE);
308     if (unlikely (clone->intel.drm.base.status))
309 	return clone->intel.drm.base.status;
310 
311     status = intel_bo_put_image (to_intel_device (clone->intel.drm.base.device),
312 				 to_intel_bo (clone->intel.drm.bo),
313 				 image,
314 				 extents->x, extents->y,
315 				 extents->width, extents->height,
316 				 0, 0);
317     if (unlikely (status))
318 	return status;
319 
320     *clone_out = clone;
321     return CAIRO_STATUS_SUCCESS;
322 }
323 
324 static cairo_status_t
i965_shader_acquire_solid_surface(i965_shader_t * shader,union i965_shader_channel * src,cairo_surface_t * surface,const cairo_rectangle_int_t * extents)325 i965_shader_acquire_solid_surface (i965_shader_t *shader,
326 				   union i965_shader_channel *src,
327 				   cairo_surface_t *surface,
328 				   const cairo_rectangle_int_t *extents)
329 {
330     cairo_image_surface_t *image;
331     void *image_extra;
332     cairo_status_t status;
333     uint32_t argb;
334 
335     status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
336     if (unlikely (status))
337 	return status;
338 
339     if (image->format != CAIRO_FORMAT_ARGB32) {
340 	cairo_surface_t *pixel;
341 	cairo_surface_pattern_t pattern;
342 
343 	/* extract the pixel as argb32 */
344 	pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1);
345 	_cairo_pattern_init_for_surface (&pattern, &image->base);
346 	cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y);
347 	pattern.base.filter = CAIRO_FILTER_NEAREST;
348 	status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL);
349 	_cairo_pattern_fini (&pattern.base);
350 
351 	if (unlikely (status)) {
352 	    _cairo_surface_release_source_image (surface, image, image_extra);
353 	    cairo_surface_destroy (pixel);
354 	    return status;
355 	}
356 
357 	argb = *(uint32_t *) ((cairo_image_surface_t *) pixel)->data;
358 	cairo_surface_destroy (pixel);
359     } else {
360 	argb = ((uint32_t *) (image->data + extents->y * image->stride))[extents->x];
361     }
362 
363     _cairo_surface_release_source_image (surface, image, image_extra);
364 
365     if (argb >> 24 == 0)
366 	argb = 0;
367 
368     src->base.constants[0] = ((argb >> 16) & 0xff) / 255.;
369     src->base.constants[1] = ((argb >>  8) & 0xff) / 255.;
370     src->base.constants[2] = ((argb >>  0) & 0xff) / 255.;
371     src->base.constants[3] = ((argb >> 24) & 0xff) / 255.;
372     src->base.constants_size = 4;
373 
374     src->base.content  = CAIRO_CONTENT_COLOR_ALPHA;
375     if (CAIRO_ALPHA_IS_OPAQUE(src->base.constants[3]))
376 	src->base.content &= ~CAIRO_CONTENT_ALPHA;
377     src->type.fragment = FS_CONSTANT;
378     src->type.vertex   = VS_NONE;
379     src->type.pattern  = PATTERN_SOLID;
380 
381     return CAIRO_STATUS_SUCCESS;
382 }
383 
384 static cairo_status_t
i965_shader_acquire_surface(i965_shader_t * shader,union i965_shader_channel * src,const cairo_surface_pattern_t * pattern,const cairo_rectangle_int_t * extents)385 i965_shader_acquire_surface (i965_shader_t *shader,
386 			     union i965_shader_channel *src,
387 			     const cairo_surface_pattern_t *pattern,
388 			     const cairo_rectangle_int_t *extents)
389 {
390     cairo_surface_t *surface, *drm;
391     cairo_matrix_t m;
392     cairo_status_t status;
393     int src_x = 0, src_y = 0;
394 
395     assert (src->type.fragment == FS_NONE);
396     drm = surface = pattern->surface;
397 
398     if (surface->type == CAIRO_SURFACE_TYPE_DRM) {
399 	if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
400 	    drm = ((cairo_surface_subsurface_t *) surface)->target;
401 	} else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) {
402 	    drm = ((cairo_surface_snapshot_t *) surface)->target;
403 	}
404     }
405 
406     src->type.pattern = PATTERN_SURFACE;
407     src->surface.surface = NULL;
408     if (drm->type == CAIRO_SURFACE_TYPE_DRM) {
409 	i965_surface_t *s = (i965_surface_t *) drm;
410 
411 	if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
412 	    if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
413 		cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface;
414 		if (s != shader->target) {
415 		    int x;
416 
417 		    if (s->intel.drm.fallback != NULL) {
418 			status = intel_surface_flush (s, 0);
419 			if (unlikely (status))
420 			    return status;
421 		    }
422 
423 		    if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
424 			i965_pipelined_flush (i965_device (s));
425 
426 		    src->type.fragment = FS_SURFACE;
427 
428 		    src->base.bo = to_intel_bo (s->intel.drm.bo);
429 		    src->base.format = s->intel.drm.format;
430 		    src->base.content = s->intel.drm.base.content;
431 		    src->base.width = sub->extents.width;
432 		    src->base.height = sub->extents.height;
433 		    src->base.stride = s->intel.drm.stride;
434 
435 		    x = sub->extents.x;
436 		    if (s->intel.drm.format != CAIRO_FORMAT_A8)
437 			x *= 4;
438 
439 		    /* XXX tiling restrictions upon offset? */
440 		    //src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x;
441 		} else {
442 		    i965_surface_t *clone;
443 		    cairo_surface_pattern_t pattern;
444 
445 		    clone = (i965_surface_t *)
446 			i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
447 						      s->intel.drm.base.content,
448 						      sub->extents.width,
449 						      sub->extents.height,
450 						      I965_TILING_DEFAULT,
451 						      TRUE);
452 		    if (unlikely (clone->intel.drm.base.status))
453 			return clone->intel.drm.base.status;
454 
455 		    _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base);
456 		    pattern.base.filter = CAIRO_FILTER_NEAREST;
457 		    cairo_matrix_init_translate (&pattern.base.matrix,
458 						 sub->extents.x, sub->extents.y);
459 
460 		    status = _cairo_surface_paint (&clone->intel.drm.base,
461 						   CAIRO_OPERATOR_SOURCE,
462 						   &pattern.base,
463 						   NULL);
464 
465 		    _cairo_pattern_fini (&pattern.base);
466 
467 		    if (unlikely (status)) {
468 			cairo_surface_destroy (&clone->intel.drm.base);
469 			return status;
470 		    }
471 
472 		    i965_pipelined_flush (i965_device (s));
473 		    src->type.fragment = FS_SURFACE;
474 
475 		    src->base.bo = to_intel_bo (clone->intel.drm.bo);
476 		    src->base.format = clone->intel.drm.format;
477 		    src->base.content = clone->intel.drm.base.content;
478 		    src->base.width = clone->intel.drm.width;
479 		    src->base.height = clone->intel.drm.height;
480 		    src->base.stride = clone->intel.drm.stride;
481 
482 		    src->surface.surface = &clone->intel.drm.base;
483 		}
484 
485 		src_x = sub->extents.x;
486 		src_y = sub->extents.y;
487 	    }
488 	} else {
489 	    if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
490 		if (s != shader->target) {
491 		    if (s->intel.drm.fallback != NULL) {
492 			status = intel_surface_flush (s, 0);
493 			if (unlikely (status))
494 			    return status;
495 		    }
496 
497 		    if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
498 			i965_pipelined_flush (i965_device (s));
499 
500 		    src->type.fragment = FS_SURFACE;
501 
502 		    src->base.bo = to_intel_bo (s->intel.drm.bo);
503 		    src->base.format = s->intel.drm.format;
504 		    src->base.content = s->intel.drm.base.content;
505 		    src->base.width = s->intel.drm.width;
506 		    src->base.height = s->intel.drm.height;
507 		    src->base.stride = s->intel.drm.stride;
508 		} else {
509 		    i965_surface_t *clone;
510 		    cairo_surface_pattern_t pattern;
511 
512 		    clone = (i965_surface_t *)
513 			i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
514 						      s->intel.drm.base.content,
515 						      s->intel.drm.width,
516 						      s->intel.drm.height,
517 						      I965_TILING_DEFAULT,
518 						      TRUE);
519 		    if (unlikely (clone->intel.drm.base.status))
520 			return clone->intel.drm.base.status;
521 
522 		    _cairo_pattern_init_for_surface (&pattern, &s->intel.drm.base);
523 		    pattern.base.filter = CAIRO_FILTER_NEAREST;
524 		    status = _cairo_surface_paint (&clone->intel.drm.base,
525 						   CAIRO_OPERATOR_SOURCE,
526 						   &pattern.base,
527 						   NULL);
528 
529 		    _cairo_pattern_fini (&pattern.base);
530 
531 		    if (unlikely (status)) {
532 			cairo_surface_destroy (&clone->intel.drm.base);
533 			return status;
534 		    }
535 
536 		    i965_pipelined_flush (i965_device (s));
537 		    src->type.fragment = FS_SURFACE;
538 
539 		    src->base.bo = to_intel_bo (clone->intel.drm.bo);
540 		    src->base.format = clone->intel.drm.format;
541 		    src->base.content = clone->intel.drm.base.content;
542 		    src->base.width = clone->intel.drm.width;
543 		    src->base.height = clone->intel.drm.height;
544 		    src->base.stride = clone->intel.drm.stride;
545 
546 		    src->surface.surface = &clone->intel.drm.base;
547 		}
548 	    }
549 	}
550     }
551 
552     if (src->type.fragment == FS_NONE) {
553 	i965_surface_t *s;
554 
555 	if (extents->width == 1 && extents->height == 1) {
556 	    return i965_shader_acquire_solid_surface (shader, src,
557 						      surface, extents);
558 	}
559 
560 	s = (i965_surface_t *)
561 	    _cairo_surface_has_snapshot (surface,
562 					 shader->target->intel.drm.base.backend);
563 	if (s != NULL) {
564 	    i965_device_t *device = i965_device (shader->target);
565 	    intel_bo_t *bo = to_intel_bo (s->intel.drm.bo);
566 
567 	    if (bo->purgeable &&
568 		! intel_bo_madvise (&device->intel, bo, I915_MADV_WILLNEED))
569 	    {
570 		_cairo_surface_detach_snapshot (&s->intel.drm.base);
571 		s = NULL;
572 	    }
573 
574 	    if (s != NULL)
575 		cairo_surface_reference (&s->intel.drm.base);
576 	}
577 
578 	if (s == NULL) {
579 	    cairo_image_surface_t *image;
580 	    void *image_extra;
581 	    cairo_status_t status;
582 
583 	    status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
584 	    if (unlikely (status))
585 		return status;
586 
587 	    if (image->width < 8192 && image->height < 8192) {
588 		status = i965_surface_clone (i965_device (shader->target), image, &s);
589 	    } else {
590 		status = i965_surface_clone_subimage (i965_device (shader->target),
591 						      image, extents, &s);
592 		src_x = -extents->x;
593 		src_y = -extents->y;
594 	    }
595 
596 	    _cairo_surface_release_source_image (surface, image, image_extra);
597 
598 	    if (unlikely (status))
599 		return status;
600 
601 	    /* XXX? */
602 	    //intel_bo_mark_purgeable (to_intel_bo (s->intel.drm.bo), TRUE);
603 	}
604 
605 	src->type.fragment = FS_SURFACE;
606 
607 	src->base.bo = to_intel_bo (s->intel.drm.bo);
608 	src->base.content = s->intel.drm.base.content;
609 	src->base.format = s->intel.drm.format;
610 	src->base.width  = s->intel.drm.width;
611 	src->base.height = s->intel.drm.height;
612 	src->base.stride = s->intel.drm.stride;
613 
614 	src->surface.surface = &s->intel.drm.base;
615 
616 	drm = &s->intel.drm.base;
617     }
618 
619     /* XXX transform nx1 or 1xn surfaces to 1D? */
620 
621     src->type.vertex = VS_NONE;
622 
623     src->base.extend = i965_extend (pattern->base.extend);
624     if (pattern->base.extend == CAIRO_EXTEND_NONE &&
625 	extents->x >= 0 && extents->y >= 0 &&
626 	extents->x + extents->width  <= src->base.width &&
627 	extents->y + extents->height <= src->base.height)
628     {
629 	/* Convert a wholly contained NONE to a REFLECT as the contiguous sampler
630 	 * cannot not handle CLAMP_BORDER textures.
631 	 */
632 	src->base.extend = i965_extend (CAIRO_EXTEND_REFLECT);
633 	/* XXX also need to check |u,v| < 3 */
634     }
635 
636     src->base.filter = i965_filter (pattern->base.filter);
637     if (_cairo_matrix_is_pixel_exact (&pattern->base.matrix))
638 	src->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
639 
640     /* tweak the src matrix to map from dst to texture coordinates */
641     src->base.matrix = pattern->base.matrix;
642     if (src_x | src_y)
643 	cairo_matrix_translate (&src->base.matrix, src_x, src_x);
644     cairo_matrix_init_scale (&m, 1. / src->base.width, 1. / src->base.height);
645     cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m);
646 
647     return CAIRO_STATUS_SUCCESS;
648 }
649 
650 cairo_status_t
i965_shader_acquire_pattern(i965_shader_t * shader,union i965_shader_channel * src,const cairo_pattern_t * pattern,const cairo_rectangle_int_t * extents)651 i965_shader_acquire_pattern (i965_shader_t *shader,
652 			     union i965_shader_channel *src,
653 			     const cairo_pattern_t *pattern,
654 			     const cairo_rectangle_int_t *extents)
655 {
656     switch (pattern->type) {
657     case CAIRO_PATTERN_TYPE_SOLID:
658 	return i965_shader_acquire_solid (shader, src,
659 					  (cairo_solid_pattern_t *) pattern,
660 					  extents);
661 
662     case CAIRO_PATTERN_TYPE_LINEAR:
663 	return i965_shader_acquire_linear (shader, src,
664 					   (cairo_linear_pattern_t *) pattern,
665 					   extents);
666 
667     case CAIRO_PATTERN_TYPE_RADIAL:
668 	return i965_shader_acquire_radial (shader, src,
669 					   (cairo_radial_pattern_t *) pattern,
670 					   extents);
671 
672     case CAIRO_PATTERN_TYPE_SURFACE:
673 	return i965_shader_acquire_surface (shader, src,
674 					    (cairo_surface_pattern_t *) pattern,
675 					    extents);
676 
677     default:
678 	ASSERT_NOT_REACHED;
679 	return CAIRO_STATUS_SUCCESS;
680     }
681 }
682 
683 static void
i965_shader_channel_init(union i965_shader_channel * channel)684 i965_shader_channel_init (union i965_shader_channel *channel)
685 {
686     channel->type.vertex = VS_NONE;
687     channel->type.fragment = FS_NONE;
688     channel->type.pattern = PATTERN_NONE;
689 
690     channel->base.mode = 0;
691     channel->base.bo = NULL;
692     channel->base.filter = i965_extend (CAIRO_FILTER_NEAREST);
693     channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
694     channel->base.has_component_alpha = 0;
695     channel->base.constants_size = 0;
696 }
697 
698 void
i965_shader_init(i965_shader_t * shader,i965_surface_t * dst,cairo_operator_t op)699 i965_shader_init (i965_shader_t *shader,
700 		  i965_surface_t *dst,
701 		  cairo_operator_t op)
702 {
703     shader->committed = FALSE;
704     shader->device = i965_device (dst);
705     shader->target = dst;
706     shader->op = op;
707     shader->constants_size = 0;
708 
709     shader->need_combine = FALSE;
710 
711     i965_shader_channel_init (&shader->source);
712     i965_shader_channel_init (&shader->mask);
713     i965_shader_channel_init (&shader->clip);
714     i965_shader_channel_init (&shader->dst);
715 }
716 
717 void
i965_shader_fini(i965_shader_t * shader)718 i965_shader_fini (i965_shader_t *shader)
719 {
720     if (shader->source.type.pattern == PATTERN_SURFACE)
721 	cairo_surface_destroy (shader->source.surface.surface);
722     if (shader->mask.type.pattern == PATTERN_SURFACE)
723 	cairo_surface_destroy (shader->mask.surface.surface);
724     if (shader->clip.type.pattern == PATTERN_SURFACE)
725 	cairo_surface_destroy (shader->clip.surface.surface);
726     if (shader->dst.type.pattern == PATTERN_SURFACE)
727 	cairo_surface_destroy (shader->dst.surface.surface);
728 }
729 
730 void
i965_shader_set_clip(i965_shader_t * shader,cairo_clip_t * clip)731 i965_shader_set_clip (i965_shader_t *shader,
732 		      cairo_clip_t *clip)
733 {
734     cairo_surface_t *clip_surface;
735     int clip_x, clip_y;
736     union i965_shader_channel *channel;
737     i965_surface_t *s;
738 
739     clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base, &clip_x, &clip_y);
740     assert (clip_surface->status == CAIRO_STATUS_SUCCESS);
741     assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM);
742     s = (i965_surface_t *) clip_surface;
743 
744     if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
745 	i965_pipelined_flush (i965_device (s));
746 
747     channel = &shader->clip;
748     channel->type.pattern = PATTERN_BASE;
749     channel->type.vertex  = VS_NONE;
750     channel->type.fragment = FS_SURFACE;
751 
752     channel->base.bo = to_intel_bo (s->intel.drm.bo);
753     channel->base.content = CAIRO_CONTENT_ALPHA;
754     channel->base.format = CAIRO_FORMAT_A8;
755     channel->base.width  = s->intel.drm.width;
756     channel->base.height = s->intel.drm.height;
757     channel->base.stride = s->intel.drm.stride;
758 
759     channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
760     channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
761 
762     cairo_matrix_init_scale (&shader->clip.base.matrix,
763 			     1. / s->intel.drm.width,
764 			     1. / s->intel.drm.height);
765 
766     cairo_matrix_translate (&shader->clip.base.matrix,
767 			    -clip_x, -clip_y);
768 }
769 
770 static cairo_bool_t
i965_shader_check_aperture(i965_shader_t * shader,i965_device_t * device)771 i965_shader_check_aperture (i965_shader_t *shader,
772 			    i965_device_t *device)
773 {
774     uint32_t size = device->exec.gtt_size;
775 
776     if (shader->target != device->target) {
777 	const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
778 	if (bo->exec == NULL)
779 	    size += bo->base.size;
780     }
781 
782     if (shader->source.base.bo != NULL && shader->source.base.bo != device->source) {
783 	const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
784 	if (bo->exec == NULL)
785 	    size += bo->base.size;
786     }
787 
788     if (shader->mask.base.bo != NULL && shader->mask.base.bo != device->mask) {
789 	const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
790 	if (bo->exec == NULL)
791 	    size += bo->base.size;
792     }
793 
794     if (shader->clip.base.bo != NULL && shader->clip.base.bo != device->clip) {
795 	const intel_bo_t *bo = to_intel_bo (shader->target->intel.drm.bo);
796 	if (bo->exec == NULL)
797 	    size += bo->base.size;
798     }
799 
800     return size <= device->intel.gtt_avail_size;
801 }
802 
803 static cairo_status_t
i965_shader_setup_dst(i965_shader_t * shader)804 i965_shader_setup_dst (i965_shader_t *shader)
805 {
806     union i965_shader_channel *channel;
807     i965_surface_t *s, *clone;
808 
809     /* We need to manual blending if we have a clip surface and an unbounded op,
810      * or an extended blend mode.
811      */
812     if (shader->need_combine ||
813 	(shader->op < CAIRO_OPERATOR_SATURATE &&
814 	 (shader->clip.type.fragment == FS_NONE ||
815 	  _cairo_operator_bounded_by_mask (shader->op))))
816     {
817 	return CAIRO_STATUS_SUCCESS;
818     }
819 
820     shader->need_combine = TRUE;
821 
822     s = shader->target;
823 
824     /* we need to allocate a new render target and use the original as a source */
825     clone = (i965_surface_t *)
826 	i965_surface_create_internal ((cairo_drm_device_t *) s->intel.drm.base.device,
827 				      s->intel.drm.base.content,
828 				      s->intel.drm.width,
829 				      s->intel.drm.height,
830 				      I965_TILING_DEFAULT,
831 				      TRUE);
832     if (unlikely (clone->intel.drm.base.status))
833 	return clone->intel.drm.base.status;
834 
835     if (to_intel_bo (s->intel.drm.bo)->batch_write_domain)
836 	i965_pipelined_flush (i965_device (s));
837 
838     channel = &shader->dst;
839 
840     channel->type.vertex = VS_NONE;
841     channel->type.fragment = FS_SURFACE;
842     channel->type.pattern = PATTERN_SURFACE;
843 
844     /* swap buffer objects */
845     channel->base.bo = to_intel_bo (s->intel.drm.bo);
846     s->intel.drm.bo = ((cairo_drm_surface_t *) clone)->bo;
847     ((cairo_drm_surface_t *) clone)->bo = &channel->base.bo->base;
848 
849     channel->base.content = s->intel.drm.base.content;
850     channel->base.format  = s->intel.drm.format;
851     channel->base.width   = s->intel.drm.width;
852     channel->base.height  = s->intel.drm.height;
853     channel->base.stride  = s->intel.drm.stride;
854 
855     channel->base.filter = i965_filter (CAIRO_FILTER_NEAREST);
856     channel->base.extend = i965_extend (CAIRO_EXTEND_NONE);
857 
858     cairo_matrix_init_scale (&channel->base.matrix,
859 			     1. / s->intel.drm.width,
860 			     1. / s->intel.drm.height);
861 
862     channel->surface.surface = &clone->intel.drm.base;
863 
864     s->intel.drm.base.content = clone->intel.drm.base.content;
865     s->intel.drm.format = clone->intel.drm.format;
866     assert (s->intel.drm.width == clone->intel.drm.width);
867     assert (s->intel.drm.height == clone->intel.drm.height);
868     s->intel.drm.stride = clone->intel.drm.stride;
869 
870     return CAIRO_STATUS_SUCCESS;
871 }
872 
873 static inline void
constant_add_float(i965_shader_t * shader,float v)874 constant_add_float (i965_shader_t *shader, float v)
875 {
876     shader->constants[shader->constants_size++] = v;
877 }
878 
879 static inline void
i965_shader_copy_channel_constants(i965_shader_t * shader,const union i965_shader_channel * channel)880 i965_shader_copy_channel_constants (i965_shader_t *shader,
881 				    const union i965_shader_channel *channel)
882 {
883     if (channel->base.constants_size) {
884 	assert (shader->constants_size + channel->base.constants_size < ARRAY_LENGTH (shader->constants));
885 
886 	memcpy (shader->constants + shader->constants_size,
887 		channel->base.constants,
888 		sizeof (float) * channel->base.constants_size);
889 	shader->constants_size += channel->base.constants_size;
890     }
891 }
892 
893 static void
i965_shader_setup_channel_constants(i965_shader_t * shader,const union i965_shader_channel * channel)894 i965_shader_setup_channel_constants (i965_shader_t *shader,
895 				     const union i965_shader_channel *channel)
896 {
897     switch (channel->type.fragment) {
898     case FS_NONE:
899     case FS_CONSTANT:
900 	/* no plane equations */
901 	break;
902 
903     case FS_LINEAR:
904 	constant_add_float (shader, channel->base.matrix.xx);
905 	constant_add_float (shader, channel->base.matrix.xy);
906 	constant_add_float (shader, 0);
907 	constant_add_float (shader, channel->base.matrix.x0);
908 	break;
909 
910     case FS_RADIAL:
911     case FS_SURFACE:
912 	constant_add_float (shader, channel->base.matrix.xx);
913 	constant_add_float (shader, channel->base.matrix.xy);
914 	constant_add_float (shader, 0);
915 	constant_add_float (shader, channel->base.matrix.x0);
916 
917 	constant_add_float (shader, channel->base.matrix.yx);
918 	constant_add_float (shader, channel->base.matrix.yy);
919 	constant_add_float (shader, 0);
920 	constant_add_float (shader, channel->base.matrix.y0);
921 	break;
922 
923     case FS_SPANS:
924     case FS_GLYPHS:
925 	/* use pue from SF */
926 	break;
927     }
928 
929     i965_shader_copy_channel_constants (shader, channel);
930 }
931 
932 static void
i965_shader_setup_constants(i965_shader_t * shader)933 i965_shader_setup_constants (i965_shader_t *shader)
934 {
935     i965_shader_setup_channel_constants (shader, &shader->source);
936     i965_shader_setup_channel_constants (shader, &shader->mask);
937     i965_shader_setup_channel_constants (shader, &shader->clip);
938     i965_shader_setup_channel_constants (shader, &shader->dst);
939     assert (shader->constants_size < ARRAY_LENGTH (shader->constants));
940 }
941 
942 /*
943  * Highest-valued BLENDFACTOR used in i965_blend_op.
944  *
945  * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
946  * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
947  * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
948  */
949 #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
950 
951 static void
i965_shader_get_blend_cntl(const i965_shader_t * shader,uint32_t * sblend,uint32_t * dblend)952 i965_shader_get_blend_cntl (const i965_shader_t *shader,
953 			    uint32_t *sblend, uint32_t *dblend)
954 {
955     static const struct blendinfo {
956 	cairo_bool_t dst_alpha;
957 	cairo_bool_t src_alpha;
958 	uint32_t src_blend;
959 	uint32_t dst_blend;
960     } i965_blend_op[] = {
961 	/* CAIRO_OPERATOR_CLEAR treat as SOURCE with transparent */
962 	{0, 0, BRW_BLENDFACTOR_ONE,          BRW_BLENDFACTOR_ZERO},
963 	/* CAIRO_OPERATOR_SOURCE */
964 	{0, 0, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_ZERO},
965 	/* CAIRO_OPERATOR_OVER */
966 	{0, 1, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_INV_SRC_ALPHA},
967 	/* CAIRO_OPERATOR_IN */
968 	{1, 0, BRW_BLENDFACTOR_DST_ALPHA,     BRW_BLENDFACTOR_ZERO},
969 	/* CAIRO_OPERATOR_OUT */
970 	{1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
971 	/* CAIRO_OPERATOR_ATOP */
972 	{1, 1, BRW_BLENDFACTOR_DST_ALPHA,     BRW_BLENDFACTOR_INV_SRC_ALPHA},
973 
974 	/* CAIRO_OPERATOR_DEST */
975 	{0, 0, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_ONE},
976 	/* CAIRO_OPERATOR_DEST_OVER */
977 	{1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
978 	/* CAIRO_OPERATOR_DEST_IN */
979 	{0, 1, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_SRC_ALPHA},
980 	/* CAIRO_OPERATOR_DEST_OUT */
981 	{0, 1, BRW_BLENDFACTOR_ZERO,          BRW_BLENDFACTOR_INV_SRC_ALPHA},
982 	/* CAIRO_OPERATOR_DEST_ATOP */
983 	{1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
984 	/* CAIRO_OPERATOR_XOR */
985 	{1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
986 	/* CAIRO_OPERATOR_ADD */
987 	{0, 0, BRW_BLENDFACTOR_ONE,           BRW_BLENDFACTOR_ONE},
988     };
989     const struct blendinfo *op = &i965_blend_op[shader->op];
990 
991     *sblend = op->src_blend;
992     *dblend = op->dst_blend;
993 
994     /* If there's no dst alpha channel, adjust the blend op so that we'll treat
995      * it as always 1.
996      */
997     if (shader->target->intel.drm.base.content == CAIRO_CONTENT_COLOR &&
998 	op->dst_alpha)
999     {
1000 	if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
1001 	    *sblend = BRW_BLENDFACTOR_ONE;
1002 	else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
1003 	    *sblend = BRW_BLENDFACTOR_ZERO;
1004     }
1005 }
1006 
1007 static void
emit_wm_subpans_to_pixels(struct brw_compile * compile,int tmp)1008 emit_wm_subpans_to_pixels (struct brw_compile *compile,
1009 			   int tmp)
1010 {
1011     /* Inputs:
1012      * R1.5 x/y of upper-left pixel of subspan 3
1013      * R1.4 x/y of upper-left pixel of subspan 2
1014      * R1.3 x/y of upper-left pixel of subspan 1
1015      * R1.2 x/y of upper-left pixel of subspan 0
1016      *
1017      * Outputs:
1018      * M1,2: u
1019      * M3,4: v
1020      *
1021      * upper left, upper right, lower left, lower right.
1022      */
1023 
1024     /* compute pixel locations for each subspan */
1025     brw_set_compression_control (compile, BRW_COMPRESSION_NONE);
1026     brw_ADD (compile,
1027 	     brw_vec8_grf (tmp),
1028 	     brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 4,
1029 		      BRW_REGISTER_TYPE_UW,
1030 		      BRW_VERTICAL_STRIDE_2,
1031 		      BRW_WIDTH_4,
1032 		      BRW_HORIZONTAL_STRIDE_0,
1033 		      BRW_SWIZZLE_NOOP,
1034 		      WRITEMASK_XYZW),
1035 	     brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE));
1036     brw_ADD (compile,
1037 	     brw_vec8_grf (tmp+1),
1038 	     brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 8,
1039 		      BRW_REGISTER_TYPE_UW,
1040 		      BRW_VERTICAL_STRIDE_2,
1041 		      BRW_WIDTH_4,
1042 		      BRW_HORIZONTAL_STRIDE_0,
1043 		      BRW_SWIZZLE_NOOP,
1044 		      WRITEMASK_XYZW),
1045 	     brw_imm_vf4 (VF_ZERO, VF_ONE, VF_ZERO, VF_ONE));
1046     brw_ADD (compile,
1047 	     brw_vec8_grf (tmp+2),
1048 	     brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 5,
1049 		      BRW_REGISTER_TYPE_UW,
1050 		      BRW_VERTICAL_STRIDE_2,
1051 		      BRW_WIDTH_4,
1052 		      BRW_HORIZONTAL_STRIDE_0,
1053 		      BRW_SWIZZLE_NOOP,
1054 		      WRITEMASK_XYZW),
1055 	     brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE));
1056     brw_ADD (compile,
1057 	     brw_vec8_grf (tmp+3),
1058 	     brw_reg (BRW_GENERAL_REGISTER_FILE, 1, 9,
1059 		      BRW_REGISTER_TYPE_UW,
1060 		      BRW_VERTICAL_STRIDE_2,
1061 		      BRW_WIDTH_4,
1062 		      BRW_HORIZONTAL_STRIDE_0,
1063 		      BRW_SWIZZLE_NOOP,
1064 		      WRITEMASK_XYZW),
1065 	     brw_imm_vf4 (VF_ZERO, VF_ZERO, VF_ONE, VF_ONE));
1066     brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED);
1067 }
1068 
1069 static void
emit_wm_affine(struct brw_compile * compile,int tmp,int reg,int msg)1070 emit_wm_affine (struct brw_compile *compile,
1071 		int tmp, int reg, int msg)
1072 {
1073     emit_wm_subpans_to_pixels (compile, tmp);
1074 
1075     brw_LINE (compile,
1076 	      brw_null_reg (),
1077 	      brw_vec1_grf (reg, 0),
1078 	      brw_vec8_grf (tmp));
1079     brw_MAC (compile,
1080 	     brw_message_reg (msg + 1),
1081 	     brw_vec1_grf (reg, 1),
1082 	     brw_vec8_grf (tmp+2));
1083 
1084     brw_LINE (compile,
1085 	      brw_null_reg (),
1086 	      brw_vec1_grf (reg, 4),
1087 	      brw_vec8_grf (tmp));
1088     brw_MAC (compile,
1089 	     brw_message_reg (msg + 3),
1090 	     brw_vec1_grf (reg, 5),
1091 	     brw_vec8_grf (tmp+2));
1092 }
1093 
1094 static void
emit_wm_glyph(struct brw_compile * compile,int tmp,int vue,int msg)1095 emit_wm_glyph (struct brw_compile *compile,
1096 	       int tmp, int vue, int msg)
1097 {
1098     emit_wm_subpans_to_pixels (compile, tmp);
1099 
1100     brw_MUL (compile,
1101 	     brw_null_reg (),
1102 	     brw_vec8_grf (tmp),
1103 	     brw_imm_f (1./1024));
1104     brw_ADD (compile,
1105 	     brw_message_reg (msg + 1),
1106 	     brw_acc_reg (),
1107 	     brw_vec1_grf (vue, 0));
1108 
1109     brw_MUL (compile,
1110 	     brw_null_reg (),
1111 	     brw_vec8_grf (tmp + 2),
1112 	     brw_imm_f (1./1024));
1113     brw_ADD (compile,
1114 	     brw_message_reg (msg + 3),
1115 	     brw_acc_reg (),
1116 	     brw_vec1_grf (vue, 1));
1117 }
1118 
1119 static void
emit_wm_load_constant(struct brw_compile * compile,int reg,struct brw_reg * result)1120 emit_wm_load_constant (struct brw_compile *compile,
1121 		       int reg,
1122 		       struct brw_reg *result)
1123 {
1124     int n;
1125 
1126     for (n = 0; n < 4; n++) {
1127 	result[n] = result[n+4] = brw_reg (BRW_GENERAL_REGISTER_FILE, reg, n,
1128 					   BRW_REGISTER_TYPE_F,
1129 					   BRW_VERTICAL_STRIDE_0,
1130 					   BRW_WIDTH_1,
1131 					   BRW_HORIZONTAL_STRIDE_0,
1132 					   BRW_SWIZZLE_XXXX,
1133 					   WRITEMASK_XYZW);
1134     }
1135 }
1136 
1137 static void
emit_wm_load_opacity(struct brw_compile * compile,int reg,struct brw_reg * result)1138 emit_wm_load_opacity (struct brw_compile *compile,
1139 		      int reg,
1140 		      struct brw_reg *result)
1141 {
1142     result[0] = result[1] = result[2] = result[3] =
1143 	result[4] = result[5] = result[6] = result[7] =
1144 	brw_reg (BRW_GENERAL_REGISTER_FILE, reg, 0,
1145 		 BRW_REGISTER_TYPE_F,
1146 		 BRW_VERTICAL_STRIDE_0,
1147 		 BRW_WIDTH_1,
1148 		 BRW_HORIZONTAL_STRIDE_1,
1149 		 BRW_SWIZZLE_XXXX,
1150 		 WRITEMASK_XYZW);
1151 }
1152 
1153 static void
emit_wm_load_linear(struct brw_compile * compile,int tmp,int reg,int msg)1154 emit_wm_load_linear (struct brw_compile *compile,
1155 		     int tmp, int reg, int msg)
1156 {
1157     emit_wm_subpans_to_pixels (compile, tmp);
1158 
1159     brw_LINE (compile,
1160 	      brw_null_reg(),
1161 	      brw_vec1_grf (reg, 0),
1162 	      brw_vec8_grf (tmp));
1163     brw_MAC (compile,
1164 	     brw_message_reg(msg + 1),
1165 	     brw_vec1_grf (reg, 1),
1166 	     brw_vec8_grf (tmp + 2));
1167 }
1168 
1169 static void
emit_wm_load_radial(struct brw_compile * compile,int reg,int msg)1170 emit_wm_load_radial (struct brw_compile *compile,
1171 		     int reg, int msg)
1172 
1173 {
1174     struct brw_reg c1x = brw_vec1_grf (reg, 0);
1175     struct brw_reg c1y = brw_vec1_grf (reg, 1);
1176     struct brw_reg minus_r_sq = brw_vec1_grf (reg, 3);
1177     struct brw_reg cdx = brw_vec1_grf (reg, 4);
1178     struct brw_reg cdy = brw_vec1_grf (reg, 5);
1179     struct brw_reg neg_4a = brw_vec1_grf (reg + 1, 0);
1180     struct brw_reg inv_2a = brw_vec1_grf (reg + 1, 1);
1181 
1182     struct brw_reg tmp_x = brw_uw16_grf (30, 0);
1183     struct brw_reg tmp_y = brw_uw16_grf (28, 0);
1184     struct brw_reg det = brw_vec8_grf (22);
1185     struct brw_reg b = brw_vec8_grf (20);
1186     struct brw_reg c = brw_vec8_grf (18);
1187     struct brw_reg pdx = brw_vec8_grf (16);
1188     struct brw_reg pdy = brw_vec8_grf (14);
1189     struct brw_reg t = brw_message_reg (msg + 1);
1190 
1191     /* cdx = (c₂x - c₁x)
1192      * cdy = (c₂y - c₁y)
1193      *  dr =  r₂-r₁
1194      * pdx =  px - c₁x
1195      * pdy =  py - c₁y
1196      *
1197      * A = cdx² + cdy² - dr²
1198      * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
1199      * C = pdx² + pdy² - r₁²
1200      *
1201      * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
1202      */
1203 
1204     brw_ADD (compile, pdx, vec8 (tmp_x), negate (c1x));
1205     brw_ADD (compile, pdy, vec8 (tmp_y), negate (c1y));
1206 
1207     brw_LINE (compile, brw_null_reg (), cdx, pdx);
1208     brw_MAC (compile, b, cdy, pdy);
1209 
1210     brw_MUL (compile, brw_null_reg (), pdx, pdx);
1211     brw_MAC (compile, c, pdy, pdy);
1212     brw_ADD (compile, c, c, minus_r_sq);
1213 
1214     brw_MUL (compile, brw_null_reg (), b, b);
1215     brw_MAC (compile, det, neg_4a, c);
1216 
1217     /* XXX use rsqrt like i915?, it's faster and we need to mac anyway */
1218     brw_math (compile,
1219 	      det,
1220 	      BRW_MATH_FUNCTION_SQRT,
1221 	      BRW_MATH_SATURATE_NONE,
1222 	      2,
1223 	      det,
1224 	      BRW_MATH_DATA_VECTOR,
1225 	      BRW_MATH_PRECISION_FULL);
1226 
1227     /* XXX cmp, +- */
1228 
1229     brw_ADD (compile, det, negate (det), negate (b));
1230     brw_ADD (compile, det, det, negate (b));
1231     brw_MUL (compile, t, det, inv_2a);
1232 }
1233 
1234 static int
emit_wm_sample(struct brw_compile * compile,union i965_shader_channel * channel,int sampler,int msg_base,int msg_len,int dst,struct brw_reg * result)1235 emit_wm_sample (struct brw_compile *compile,
1236 		union i965_shader_channel *channel,
1237 		int sampler,
1238 		int msg_base, int msg_len,
1239 		int dst,
1240 		struct brw_reg *result)
1241 {
1242     int response_len, mask;
1243 
1244     if (channel->base.content == CAIRO_CONTENT_ALPHA) {
1245 	mask = 0x7000;
1246 	response_len = 2;
1247 	result[0] = result[1] = result[2] = result[3] = brw_vec8_grf (dst);
1248 	result[4] = result[5] = result[6] = result[7] = brw_vec8_grf (dst + 1);
1249     } else {
1250 	mask = 0;
1251 	response_len = 8;
1252 	result[0] = brw_vec8_grf (dst + 0);
1253 	result[1] = brw_vec8_grf (dst + 2);
1254 	result[2] = brw_vec8_grf (dst + 4);
1255 	result[3] = brw_vec8_grf (dst + 6);
1256 	result[4] = brw_vec8_grf (dst + 1);
1257 	result[5] = brw_vec8_grf (dst + 3);
1258 	result[6] = brw_vec8_grf (dst + 5);
1259 	result[7] = brw_vec8_grf (dst + 7);
1260     }
1261 
1262     brw_set_compression_control (compile, BRW_COMPRESSION_NONE);
1263 
1264     brw_set_mask_control (compile, BRW_MASK_DISABLE);
1265     brw_MOV (compile,
1266 	     get_element_ud (brw_vec8_grf (0), 2),
1267 	     brw_imm_ud (mask));
1268     brw_set_mask_control (compile, BRW_MASK_ENABLE);
1269 
1270     brw_SAMPLE (compile,
1271 		brw_uw16_grf (dst, 0),
1272 		msg_base,
1273 		brw_uw8_grf (0, 0),
1274 		sampler + 1, /* binding table */
1275 		sampler,
1276 		WRITEMASK_XYZW,
1277 		BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE,
1278 		response_len,
1279 		msg_len,
1280 		0 /* eot */);
1281 
1282     brw_set_compression_control (compile, BRW_COMPRESSION_COMPRESSED);
1283 
1284     return response_len;
1285 }
1286 
1287 #define MAX_MSG_REGISTER 16
1288 
1289 static void
emit_wm_load_channel(struct brw_compile * compile,union i965_shader_channel * channel,int * vue,int * cue,int * msg,int * sampler,int * grf,struct brw_reg * result)1290 emit_wm_load_channel (struct brw_compile *compile,
1291 		      union i965_shader_channel *channel,
1292 		      int *vue,
1293 		      int *cue,
1294 		      int *msg,
1295 		      int *sampler,
1296 		      int *grf,
1297 		      struct brw_reg *result)
1298 {
1299     switch (channel->type.fragment) {
1300     case FS_NONE:
1301 	break;
1302 
1303     case FS_CONSTANT:
1304 	emit_wm_load_constant (compile, *cue, result);
1305 	*cue += 1;
1306 	break;
1307 
1308     case FS_RADIAL:
1309 	emit_wm_load_radial (compile, *cue, *msg);
1310 	*cue += 2;
1311 
1312 	if (*msg + 3 > MAX_MSG_REGISTER)
1313 	    *msg = 1;
1314 
1315 	*grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result);
1316 	*sampler += 1;
1317 	*msg += 3;
1318 	break;
1319 
1320     case FS_LINEAR:
1321 	emit_wm_load_linear (compile, *grf, *cue, *msg);
1322 	*cue += 1;
1323 
1324 	if (*msg + 3 > MAX_MSG_REGISTER)
1325 	    *msg = 1;
1326 
1327 	*grf += emit_wm_sample (compile, channel, *sampler, *msg, 3, *grf, result);
1328 	*sampler += 1;
1329 	*msg += 3;
1330 	break;
1331 
1332     case FS_SURFACE:
1333 	emit_wm_affine (compile, *grf, *cue, *msg);
1334 	*cue += 2;
1335 
1336 	if (*msg + 5 > MAX_MSG_REGISTER)
1337 	    *msg = 1;
1338 
1339 	*grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result);
1340 	*sampler += 1;
1341 	*msg += 5;
1342 	break;
1343 
1344     case FS_SPANS:
1345 	emit_wm_load_opacity (compile, *vue, result);
1346 	*vue += 1;
1347 	break;
1348 
1349     case FS_GLYPHS:
1350 	emit_wm_glyph (compile, *grf, *vue, *msg);
1351 	*vue += 1;
1352 
1353 	if (*msg + 5 > MAX_MSG_REGISTER)
1354 	    *msg = 1;
1355 
1356 	*grf += emit_wm_sample (compile, channel, *sampler, *msg, 5, *grf, result);
1357 	*sampler += 1;
1358 	*msg += 5;
1359 	break;
1360     }
1361 }
1362 
1363 static unsigned long
i965_wm_kernel_hash(const i965_shader_t * shader)1364 i965_wm_kernel_hash (const i965_shader_t *shader)
1365 {
1366     unsigned long hash;
1367 
1368     hash =
1369 	(shader->source.type.fragment & 0xff) |
1370 	(shader->mask.type.fragment & 0xff) << 8 |
1371 	(shader->clip.type.fragment & 0xff) << 16;
1372     if (shader->need_combine)
1373 	hash |= (1 + shader->op) << 24;
1374 
1375     return hash;
1376 }
1377 
1378 static void
i965_wm_kernel_init(struct i965_wm_kernel * key,const i965_shader_t * shader)1379 i965_wm_kernel_init (struct i965_wm_kernel *key,
1380 		     const i965_shader_t *shader)
1381 {
1382     key->entry.hash = i965_wm_kernel_hash (shader);
1383 }
1384 
1385 static uint32_t
i965_shader_const_urb_length(i965_shader_t * shader)1386 i965_shader_const_urb_length (i965_shader_t *shader)
1387 {
1388     const int lengths[] = { 0, 1, 1, 4, 2, 0, 0 };
1389     int count = 0; /* 128-bit/16-byte increments */
1390 
1391     count += lengths[shader->source.type.fragment];
1392     count += lengths[shader->mask.type.fragment];
1393     count += lengths[shader->clip.type.fragment];
1394     count += lengths[shader->dst.type.fragment];
1395 
1396     return (count + 1) / 2; /* 256-bit/32-byte increments */
1397 }
1398 
1399 static uint32_t
i965_shader_pue_length(i965_shader_t * shader)1400 i965_shader_pue_length (i965_shader_t *shader)
1401 {
1402     return 1 + (shader->mask.type.vertex != VS_NONE);
1403 }
1404 
1405 static uint32_t
create_wm_kernel(i965_device_t * device,i965_shader_t * shader,int * num_reg)1406 create_wm_kernel (i965_device_t *device,
1407 		  i965_shader_t *shader,
1408 		  int *num_reg)
1409 {
1410     struct brw_compile compile;
1411     struct brw_reg source[8], mask[8], clip[8], dst[8];
1412     const uint32_t *program;
1413     uint32_t size;
1414     int msg, cue, vue, grf, sampler;
1415     int i;
1416 
1417     struct i965_wm_kernel key, *cache;
1418     cairo_status_t status;
1419     uint32_t offset;
1420 
1421     i965_wm_kernel_init (&key, shader);
1422     cache = _cairo_hash_table_lookup (device->wm_kernels, &key.entry);
1423     if (cache != NULL)
1424 	return cache->offset;
1425 
1426     brw_compile_init (&compile, device->is_g4x);
1427 
1428     if (key.entry.hash == FS_CONSTANT &&
1429 	to_intel_bo (shader->target->intel.drm.bo)->tiling)
1430     {
1431 	struct brw_instruction *insn;
1432 
1433 	assert (i965_shader_const_urb_length (shader) == 1);
1434 	brw_MOV (&compile, brw_message4_reg (2), brw_vec4_grf (2, 0));
1435 	grf = 3;
1436 
1437 	brw_push_insn_state (&compile);
1438 	brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */
1439 	brw_MOV (&compile,
1440 		 retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD),
1441 		 retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD));
1442 	brw_pop_insn_state (&compile);
1443 
1444 	insn = brw_next_instruction (&compile, BRW_OPCODE_SEND);
1445 	insn->header.predicate_control = 0;
1446 	insn->header.compression_control = BRW_COMPRESSION_NONE;
1447 	insn->header.destreg__conditonalmod = 0;
1448 
1449 	brw_instruction_set_destination (insn,
1450 					 retype (vec16 (brw_acc_reg ()),
1451 						 BRW_REGISTER_TYPE_UW));
1452 
1453 	brw_instruction_set_source0 (insn,
1454 				     retype (brw_vec8_grf (0),
1455 					     BRW_REGISTER_TYPE_UW));
1456 
1457 	brw_instruction_set_dp_write_message (insn,
1458 					      0,
1459 					      BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED, /* msg_control */
1460 					      BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
1461 					      3,
1462 					      1,	/* pixel scoreboard */
1463 					      0,
1464 					      TRUE);
1465     }
1466     else
1467     {
1468 	msg = 1;
1469 	cue = 2;
1470 	vue = cue + i965_shader_const_urb_length (shader);
1471 	grf = vue + i965_shader_pue_length (shader);
1472 	sampler = 0;
1473 
1474 	brw_set_compression_control (&compile, BRW_COMPRESSION_COMPRESSED);
1475 	emit_wm_load_channel (&compile, &shader->source,
1476 			      &vue, &cue, &msg, &sampler, &grf,
1477 			      source);
1478 	emit_wm_load_channel (&compile, &shader->mask,
1479 			      &vue, &cue, &msg, &sampler, &grf,
1480 			      mask);
1481 	emit_wm_load_channel (&compile, &shader->clip,
1482 			      &vue, &cue, &msg, &sampler, &grf,
1483 			      clip);
1484 	emit_wm_load_channel (&compile, &shader->dst,
1485 			      &vue, &cue, &msg, &sampler, &grf,
1486 			      dst);
1487 	brw_set_compression_control (&compile, BRW_COMPRESSION_NONE);
1488 
1489 	if (shader->need_combine) {
1490 	    if (shader->mask.type.fragment != FS_NONE &&
1491 		shader->clip.type.fragment != FS_NONE)
1492 	    {
1493 		for (i = 0; i < 8; i++)
1494 		    brw_MUL (&compile, mask[i], mask[i], clip[i]);
1495 	    }
1496 
1497 	    /* XXX LERP ! */
1498 	    for (i = 0; i < 8; i++)
1499 		brw_MOV (&compile, brw_message_reg (2 + i), source[i]);
1500 	} else {
1501 	    if (shader->mask.type.fragment != FS_NONE) {
1502 		if (shader->clip.type.fragment != FS_NONE) {
1503 		    for (i = 0; i < 8; i++)
1504 			brw_MUL (&compile, mask[i], mask[i], clip[i]);
1505 		}
1506 
1507 		for (i = 0; i < 8; i++)
1508 		    brw_MUL (&compile, brw_message_reg (2 + i), source[i], mask[i]);
1509 	    } else {
1510 		if (shader->clip.type.fragment != FS_NONE) {
1511 		    for (i = 0; i < 8; i++)
1512 			brw_MUL (&compile, brw_message_reg (2 + i), source[i], clip[i]);
1513 		} else {
1514 		    for (i = 0; i < 8; i++)
1515 			brw_MOV (&compile, brw_message_reg (2 + i), source[i]);
1516 		}
1517 	    }
1518 	}
1519 
1520 	brw_push_insn_state (&compile);
1521 	brw_set_mask_control (&compile, BRW_MASK_DISABLE); /* ? */
1522 	brw_MOV (&compile,
1523 		 retype (brw_message_reg (1), BRW_REGISTER_TYPE_UD),
1524 		 retype (brw_vec8_grf (1), BRW_REGISTER_TYPE_UD));
1525 	brw_pop_insn_state (&compile);
1526 
1527 	brw_fb_WRITE (&compile,
1528 		      retype (vec16 (brw_acc_reg ()), BRW_REGISTER_TYPE_UW),
1529 		      0,		/* base reg */
1530 		      retype (brw_vec8_grf (0), BRW_REGISTER_TYPE_UW),
1531 		      0,		/* binding table index */
1532 		      2 + 8,	/* msg length */
1533 		      0,		/* response length */
1534 		      TRUE);	/* EOT */
1535     }
1536 
1537     program = brw_get_program (&compile, &size);
1538     *num_reg = grf;
1539 
1540     i965_stream_align (&device->general, 64);
1541     offset = i965_stream_emit (&device->general, program, size);
1542 
1543     cache = _cairo_freelist_alloc (&device->wm_kernel_freelist);
1544     if (likely (cache != NULL)) {
1545 	i965_wm_kernel_init (cache, shader);
1546 	cache->offset = offset;
1547 	status = _cairo_hash_table_insert (device->wm_kernels, &cache->entry);
1548 	if (unlikely (status))
1549 	    _cairo_freelist_free (&device->wm_kernel_freelist, cache);
1550     }
1551 
1552     return offset;
1553 }
1554 
1555 static uint32_t
create_sf_kernel(i965_device_t * device,i965_shader_t * shader)1556 create_sf_kernel (i965_device_t *device,
1557 		  i965_shader_t *shader)
1558 {
1559     struct brw_compile compile;
1560     const uint32_t *program;
1561     uint32_t size;
1562     int msg_len;
1563 
1564     brw_compile_init (&compile, device->is_g4x);
1565 
1566     switch (shader->mask.type.vertex) {
1567     default:
1568     case VS_NONE:
1569 	/* use curb plane eq in WM */
1570 	msg_len = 1;
1571 	break;
1572 
1573     case VS_SPANS:
1574 	/* just a constant opacity */
1575 	brw_MOV (&compile,
1576 		 brw_message4_reg (1),
1577 		 brw_vec4_grf (3, 0));
1578 	msg_len = 2;
1579 	break;
1580 
1581     case VS_GLYPHS:
1582 	/* an offset+sf into the glyph cache */
1583 	brw_MOV (&compile,
1584 		 brw_acc_reg (),
1585 		 brw_vec2_grf (3, 0));
1586 	brw_MAC (&compile,
1587 		 brw_message4_reg (1),
1588 		 negate (brw_vec2_grf (1, 4)),
1589 		 brw_imm_f (1./1024));
1590 	msg_len = 2;
1591 	break;
1592     }
1593 
1594     brw_urb_WRITE (&compile,
1595 		   brw_null_reg (),
1596 		   0,
1597 		   brw_vec8_grf (0), /* r0, will be copied to m0 */
1598 		   0,	/* allocate */
1599 		   1,	/* used */
1600 		   msg_len,
1601 		   0,	/* response len */
1602 		   1,	/* eot */
1603 		   1,	/* writes complete */
1604 		   0,	/* offset */
1605 		   BRW_URB_SWIZZLE_NONE);
1606 
1607     program = brw_get_program (&compile, &size);
1608 
1609     i965_stream_align (&device->general, 64);
1610     return i965_stream_emit (&device->general, program, size);
1611 }
1612 
1613 static uint32_t
i965_sf_kernel(const i965_shader_t * shader)1614 i965_sf_kernel (const i965_shader_t *shader)
1615 {
1616     return shader->mask.type.vertex;
1617 }
1618 
1619 static void
i965_sf_state_init(struct i965_sf_state * key,const i965_shader_t * shader)1620 i965_sf_state_init (struct i965_sf_state *key,
1621 		    const i965_shader_t *shader)
1622 {
1623     key->entry.hash = i965_sf_kernel (shader);
1624 }
1625 
1626 cairo_bool_t
i965_sf_state_equal(const void * A,const void * B)1627 i965_sf_state_equal (const void *A, const void *B)
1628 {
1629     const cairo_hash_entry_t *a = A, *b = B;
1630     return a->hash == b->hash;
1631 }
1632 
1633 /*
1634  * Sets up the SF state pointing at an SF kernel.
1635  *
1636  * The SF kernel does coord interp: for each attribute,
1637  * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
1638  * back to SF which then hands pixels off to WM.
1639  */
1640 static uint32_t
gen4_create_sf_state(i965_device_t * device,i965_shader_t * shader)1641 gen4_create_sf_state (i965_device_t *device,
1642 		      i965_shader_t *shader)
1643 {
1644     struct brw_sf_unit_state *state;
1645     struct i965_sf_state key, *cache;
1646     cairo_status_t status;
1647     uint32_t offset;
1648 
1649     i965_sf_state_init (&key, shader);
1650     if (i965_sf_state_equal (&key, &device->sf_state))
1651 	return device->sf_state.offset;
1652 
1653     cache = _cairo_hash_table_lookup (device->sf_states, &key.entry);
1654     if (cache != NULL) {
1655 	offset = cache->offset;
1656 	goto DONE;
1657     }
1658 
1659     offset = create_sf_kernel (device, shader);
1660 
1661     state = i965_stream_alloc (&device->general, 32, sizeof (*state));
1662     memset (state, 0, sizeof (*state));
1663 
1664     state->thread0.grf_reg_count = BRW_GRF_BLOCKS (3);
1665     assert ((offset & 63) == 0);
1666     state->thread0.kernel_start_pointer = offset >> 6;
1667     state->sf1.single_program_flow = 1;
1668     state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
1669     state->thread3.urb_entry_read_offset = 1;
1670     state->thread3.dispatch_grf_start_reg = 3;
1671     state->thread4.max_threads = SF_MAX_THREADS - 1;
1672     state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
1673     state->thread4.nr_urb_entries = URB_SF_ENTRIES;
1674     state->sf6.dest_org_vbias = 0x8;
1675     state->sf6.dest_org_hbias = 0x8;
1676 
1677     offset = i965_stream_offsetof (&device->general, state);
1678 
1679     cache = _cairo_freelist_alloc (&device->sf_freelist);
1680     if (likely (cache != NULL)) {
1681 	i965_sf_state_init (cache, shader);
1682 	cache->offset = offset;
1683 	status = _cairo_hash_table_insert (device->sf_states, &cache->entry);
1684 	if (unlikely (status))
1685 	    _cairo_freelist_free (&device->sf_freelist, cache);
1686     }
1687 
1688   DONE:
1689     i965_sf_state_init (&device->sf_state, shader);
1690     device->sf_state.offset = offset;
1691 
1692     return offset;
1693 }
1694 
1695 static unsigned long
i965_shader_sampler_hash(const i965_shader_t * shader)1696 i965_shader_sampler_hash (const i965_shader_t *shader)
1697 {
1698     unsigned long hash = 0;
1699     unsigned int offset = 0;
1700 
1701     if (shader->source.base.bo != NULL) {
1702 	hash |= (shader->source.base.filter << offset) |
1703 	        (shader->source.base.extend << (offset + 4));
1704 	offset += 8;
1705     }
1706 
1707     if (shader->mask.base.bo != NULL) {
1708 	hash |= (shader->mask.base.filter << offset) |
1709 	        (shader->mask.base.extend << (offset + 4));
1710 	offset += 8;
1711     }
1712 
1713     if (shader->clip.base.bo != NULL) {
1714 	hash |= (shader->clip.base.filter << offset) |
1715 	        (shader->clip.base.extend << (offset + 4));
1716 	offset += 8;
1717     }
1718 
1719     if (shader->dst.base.bo != NULL) {
1720 	hash |= (shader->dst.base.filter << offset) |
1721 	        (shader->dst.base.extend << (offset + 4));
1722 	offset += 8;
1723     }
1724 
1725     return hash;
1726 }
1727 
1728 static void
i965_sampler_init(struct i965_sampler * key,const i965_shader_t * shader)1729 i965_sampler_init (struct i965_sampler *key,
1730 		   const i965_shader_t *shader)
1731 {
1732     key->entry.hash = i965_shader_sampler_hash (shader);
1733 }
1734 
1735 static void
emit_sampler_channel(i965_device_t * device,const union i965_shader_channel * channel,uint32_t border_color)1736 emit_sampler_channel (i965_device_t *device,
1737 		      const union i965_shader_channel *channel,
1738 		      uint32_t border_color)
1739 {
1740     struct brw_sampler_state *state;
1741 
1742     state = i965_stream_alloc (&device->general, 0, sizeof (*state));
1743     memset (state, 0, sizeof (*state));
1744 
1745     state->ss0.lod_preclamp = 1; /* GL mode */
1746 
1747     state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
1748 
1749     state->ss0.min_filter = channel->base.filter;
1750     state->ss0.mag_filter = channel->base.filter;
1751 
1752     state->ss1.r_wrap_mode = channel->base.extend;
1753     state->ss1.s_wrap_mode = channel->base.extend;
1754     state->ss1.t_wrap_mode = channel->base.extend;
1755 
1756     assert ((border_color & 31) == 0);
1757     state->ss2.border_color_pointer = border_color >> 5;
1758 }
1759 
1760 static uint32_t
emit_sampler_state_table(i965_device_t * device,i965_shader_t * shader)1761 emit_sampler_state_table (i965_device_t *device,
1762 			  i965_shader_t *shader)
1763 {
1764     struct i965_sampler key, *cache;
1765     cairo_status_t status;
1766     uint32_t offset;
1767 
1768     if (device->border_color_offset == (uint32_t) -1) {
1769 	struct brw_sampler_legacy_border_color *border_color;
1770 
1771 	border_color = i965_stream_alloc (&device->general, 32,
1772 					  sizeof (*border_color));
1773 	border_color->color[0] = 0; /* R */
1774 	border_color->color[1] = 0; /* G */
1775 	border_color->color[2] = 0; /* B */
1776 	border_color->color[3] = 0; /* A */
1777 
1778 	device->border_color_offset = i965_stream_offsetof (&device->general,
1779 							    border_color);
1780     } else {
1781 	i965_sampler_init (&key, shader);
1782 	cache = _cairo_hash_table_lookup (device->samplers, &key.entry);
1783 	if (cache != NULL)
1784 	    return cache->offset;
1785     }
1786 
1787     i965_stream_align (&device->general, 32);
1788     offset = device->general.used;
1789     if (shader->source.base.bo != NULL) {
1790 	emit_sampler_channel (device,
1791 			      &shader->source,
1792 			      device->border_color_offset);
1793     }
1794     if (shader->mask.base.bo != NULL) {
1795 	emit_sampler_channel (device,
1796 			      &shader->mask,
1797 			      device->border_color_offset);
1798     }
1799     if (shader->clip.base.bo != NULL) {
1800 	emit_sampler_channel (device,
1801 			      &shader->clip,
1802 			      device->border_color_offset);
1803     }
1804     if (shader->dst.base.bo != NULL) {
1805 	emit_sampler_channel (device,
1806 			      &shader->dst,
1807 			      device->border_color_offset);
1808     }
1809 
1810     cache = _cairo_freelist_alloc (&device->sampler_freelist);
1811     if (likely (cache != NULL)) {
1812 	i965_sampler_init (cache, shader);
1813 	cache->offset = offset;
1814 	status = _cairo_hash_table_insert (device->samplers, &cache->entry);
1815 	if (unlikely (status))
1816 	    _cairo_freelist_free (&device->sampler_freelist, cache);
1817     }
1818 
1819     return offset;
1820 }
1821 
1822 static void
i965_cc_state_init(struct i965_cc_state * key,const i965_shader_t * shader)1823 i965_cc_state_init (struct i965_cc_state *key,
1824 		    const i965_shader_t *shader)
1825 {
1826     uint32_t src_blend, dst_blend;
1827 
1828     if (shader->need_combine)
1829 	src_blend = dst_blend = 0;
1830     else
1831 	i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend);
1832 
1833     key->entry.hash = src_blend | ((dst_blend & 0xffff) << 16);
1834 }
1835 
1836 cairo_bool_t
i965_cc_state_equal(const void * A,const void * B)1837 i965_cc_state_equal (const void *A, const void *B)
1838 {
1839     const cairo_hash_entry_t *a = A, *b = B;
1840     return a->hash == b->hash;
1841 }
1842 
1843 static uint32_t
cc_state_emit(i965_device_t * device,i965_shader_t * shader)1844 cc_state_emit (i965_device_t *device, i965_shader_t *shader)
1845 {
1846     struct brw_cc_unit_state *state;
1847     struct i965_cc_state key, *cache;
1848     cairo_status_t status;
1849     uint32_t src_blend, dst_blend;
1850     uint32_t offset;
1851 
1852     i965_cc_state_init (&key, shader);
1853     if (i965_cc_state_equal (&key, &device->cc_state))
1854 	return device->cc_state.offset;
1855 
1856     cache = _cairo_hash_table_lookup (device->cc_states, &key.entry);
1857     if (cache != NULL) {
1858 	offset = cache->offset;
1859 	goto DONE;
1860     }
1861 
1862     if (shader->need_combine)
1863 	src_blend = dst_blend = 0;
1864     else
1865 	i965_shader_get_blend_cntl (shader, &src_blend, &dst_blend);
1866 
1867     state = i965_stream_alloc (&device->general, 64, sizeof (*state));
1868     memset (state, 0, sizeof (*state));
1869 
1870     /* XXX Note errata, need to flush render cache when blend_enable 0 -> 1 */
1871     /* XXX 2 source blend */
1872     state->cc3.blend_enable = ! shader->need_combine;
1873     state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
1874     state->cc5.ia_src_blend_factor  = src_blend;
1875     state->cc5.ia_dest_blend_factor = dst_blend;
1876     state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
1877     state->cc6.clamp_post_alpha_blend = 1;
1878     state->cc6.clamp_pre_alpha_blend  = 1;
1879     state->cc6.src_blend_factor  = src_blend;
1880     state->cc6.dest_blend_factor = dst_blend;
1881 
1882     offset = i965_stream_offsetof (&device->general, state);
1883 
1884     cache = _cairo_freelist_alloc (&device->cc_freelist);
1885     if (likely (cache != NULL)) {
1886 	i965_cc_state_init (cache, shader);
1887 	cache->offset = offset;
1888 	status = _cairo_hash_table_insert (device->cc_states, &cache->entry);
1889 	if (unlikely (status))
1890 	    _cairo_freelist_free (&device->cc_freelist, cache);
1891     }
1892 
1893   DONE:
1894     i965_cc_state_init (&device->cc_state, shader);
1895     device->cc_state.offset = offset;
1896 
1897     return offset;
1898 }
1899 
1900 static void
i965_wm_state_init(struct i965_wm_state * key,const i965_shader_t * shader)1901 i965_wm_state_init (struct i965_wm_state *key,
1902 		    const i965_shader_t *shader)
1903 {
1904     key->kernel = i965_wm_kernel_hash (shader);
1905     key->sampler = i965_shader_sampler_hash (shader);
1906 
1907     key->entry.hash = key->kernel ^ ((key->sampler) << 16 | (key->sampler >> 16));
1908 }
1909 
1910 cairo_bool_t
i965_wm_state_equal(const void * A,const void * B)1911 i965_wm_state_equal (const void *A, const void *B)
1912 {
1913     const struct i965_wm_state *a = A, *b = B;
1914 
1915     if (a->entry.hash != b->entry.hash)
1916 	return FALSE;
1917 
1918     return a->kernel == b->kernel && a->sampler == b->sampler;
1919 }
1920 
1921 static int
i965_shader_binding_table_count(i965_shader_t * shader)1922 i965_shader_binding_table_count (i965_shader_t *shader)
1923 {
1924     int count;
1925 
1926     count = 1;
1927     if (shader->source.type.fragment != FS_CONSTANT)
1928 	count++;
1929     switch (shader->mask.type.fragment) {
1930     case FS_NONE:
1931     case FS_CONSTANT:
1932     case FS_SPANS:
1933 	break;
1934     case FS_LINEAR:
1935     case FS_RADIAL:
1936     case FS_SURFACE:
1937     case FS_GLYPHS:
1938 	count++;
1939     }
1940     if (shader->clip.type.fragment == FS_SURFACE)
1941 	count++;
1942     if (shader->dst.type.fragment == FS_SURFACE)
1943 	count++;
1944 
1945     return count;
1946 }
1947 
1948 static uint32_t
gen4_create_wm_state(i965_device_t * device,i965_shader_t * shader)1949 gen4_create_wm_state (i965_device_t *device,
1950 		      i965_shader_t *shader)
1951 {
1952     struct brw_wm_unit_state *state;
1953     uint32_t sampler;
1954     uint32_t kernel;
1955 
1956     struct i965_wm_state key, *cache;
1957     cairo_status_t status;
1958     int num_reg;
1959 
1960     i965_wm_state_init (&key, shader);
1961     if (i965_wm_state_equal (&key, &device->wm_state))
1962 	return device->wm_state.offset;
1963 
1964     cache = _cairo_hash_table_lookup (device->wm_states, &key.entry);
1965     if (cache != NULL) {
1966 	device->wm_state = *cache;
1967 	return cache->offset;
1968     }
1969 
1970     kernel = create_wm_kernel (device, shader, &num_reg);
1971     sampler = emit_sampler_state_table (device, shader);
1972 
1973     state = i965_stream_alloc (&device->general, 32, sizeof (*state));
1974     memset (state, 0, sizeof (*state));
1975     state->thread0.grf_reg_count = BRW_GRF_BLOCKS (num_reg);
1976     assert ((kernel & 63) == 0);
1977     state->thread0.kernel_start_pointer = kernel >> 6;
1978 
1979     state->thread3.dispatch_grf_start_reg = 2;
1980 
1981     state->wm4.sampler_count = 1; /* 1-4 samplers used */
1982     assert ((sampler & 31) == 0);
1983     state->wm4.sampler_state_pointer = sampler >> 5;
1984     if (device->is_g4x)
1985 	state->wm5.max_threads = PS_MAX_THREADS_CTG - 1;
1986     else
1987 	state->wm5.max_threads = PS_MAX_THREADS_BRW - 1;
1988     state->wm5.thread_dispatch_enable = 1;
1989 
1990     if (device->is_g4x) {
1991 	/* XXX contiguous 32 pixel dispatch */
1992     }
1993     state->wm5.enable_16_pix = 1;
1994     /* 8 pixel dispatch and friends */
1995     //state->wm5.early_depth_test = 1;
1996 
1997     state->thread1.binding_table_entry_count = i965_shader_binding_table_count(shader);
1998     state->thread3.urb_entry_read_length = i965_shader_pue_length (shader);
1999     state->thread3.const_urb_entry_read_length = i965_shader_const_urb_length (shader);
2000 
2001     key.offset = i965_stream_offsetof (&device->general, state);
2002 
2003     cache = _cairo_freelist_alloc (&device->wm_state_freelist);
2004     if (likely (cache != NULL)) {
2005 	*cache = key;
2006 	status = _cairo_hash_table_insert (device->wm_states, &cache->entry);
2007 	if (unlikely (status))
2008 	    _cairo_freelist_free (&device->wm_state_freelist, cache);
2009     }
2010 
2011     device->wm_state = key;
2012     return key.offset;
2013 }
2014 
2015 static uint32_t
vs_unit_state_emit(i965_device_t * device)2016 vs_unit_state_emit (i965_device_t *device)
2017 {
2018     if (device->vs_offset == (uint32_t) -1) {
2019 	struct brw_vs_unit_state *state;
2020 
2021 	/* Set up the vertex shader to be disabled (passthrough) */
2022 	state = i965_stream_alloc (&device->general, 32, sizeof (*state));
2023 	memset (state, 0, sizeof (*state));
2024 
2025 	state->thread4.nr_urb_entries = URB_VS_ENTRIES;
2026 	state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
2027 	state->vs6.vert_cache_disable = 1;
2028 
2029 	device->vs_offset = i965_stream_offsetof (&device->general, state);
2030     }
2031 
2032     return device->vs_offset;
2033 }
2034 
2035 static uint32_t
i965_get_card_format(cairo_format_t format)2036 i965_get_card_format (cairo_format_t format)
2037 {
2038     switch (format) {
2039     case CAIRO_FORMAT_ARGB32:
2040 	return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
2041     case CAIRO_FORMAT_RGB24:
2042 	return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
2043     case CAIRO_FORMAT_RGB16_565:
2044 	return BRW_SURFACEFORMAT_B5G6R5_UNORM;
2045     case CAIRO_FORMAT_A8:
2046 	return BRW_SURFACEFORMAT_A8_UNORM;
2047     case CAIRO_FORMAT_A1:
2048     case CAIRO_FORMAT_INVALID:
2049     default:
2050 	ASSERT_NOT_REACHED;
2051 	return 0;
2052     }
2053 }
2054 
2055 static uint32_t
i965_get_dest_format(cairo_format_t format)2056 i965_get_dest_format (cairo_format_t format)
2057 {
2058     switch (format) {
2059     case CAIRO_FORMAT_ARGB32:
2060     case CAIRO_FORMAT_RGB24:
2061         return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
2062     case CAIRO_FORMAT_RGB16_565:
2063         return BRW_SURFACEFORMAT_B5G6R5_UNORM;
2064     case CAIRO_FORMAT_A8:
2065         return BRW_SURFACEFORMAT_A8_UNORM;
2066     case CAIRO_FORMAT_A1:
2067     case CAIRO_FORMAT_INVALID:
2068     default:
2069 	ASSERT_NOT_REACHED;
2070 	return 0;
2071     }
2072 }
2073 
2074 /* XXX silly inline due to compiler bug... */
2075 static inline void
i965_stream_add_pending_relocation(i965_stream_t * stream,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain,uint32_t delta)2076 i965_stream_add_pending_relocation (i965_stream_t *stream,
2077 				    uint32_t target_offset,
2078 				    uint32_t read_domains,
2079 				    uint32_t write_domain,
2080 				    uint32_t delta)
2081 {
2082     int n;
2083 
2084     n = stream->num_pending_relocations++;
2085     assert (n < stream->max_pending_relocations);
2086 
2087     stream->pending_relocations[n].offset = target_offset;
2088     stream->pending_relocations[n].read_domains = read_domains;
2089     stream->pending_relocations[n].write_domain = write_domain;
2090     stream->pending_relocations[n].delta = delta;
2091 }
2092 
2093 static uint32_t
emit_surface_state(i965_device_t * device,cairo_bool_t is_target,intel_bo_t * bo,cairo_format_t format,int width,int height,int stride,int type)2094 emit_surface_state (i965_device_t *device,
2095 		    cairo_bool_t is_target,
2096 		    intel_bo_t *bo,
2097 		    cairo_format_t format,
2098 		    int width, int height, int stride,
2099 		    int type)
2100 {
2101     struct brw_surface_state *state;
2102     uint32_t write_domain, read_domains;
2103     uint32_t offset;
2104 
2105     state = i965_stream_alloc (&device->surface, 32, sizeof (*state));
2106     memset (state, 0, sizeof (*state));
2107 
2108     state->ss0.surface_type = type;
2109     if (is_target)
2110 	state->ss0.surface_format = i965_get_dest_format (format);
2111     else
2112 	state->ss0.surface_format = i965_get_card_format (format);
2113 
2114     state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
2115     state->ss0.color_blend = 1;
2116     if (is_target && device->is_g4x)
2117 	state->ss0.render_cache_read_mode = 1;
2118 
2119     state->ss1.base_addr = bo->offset;
2120 
2121     state->ss2.height = height - 1;
2122     state->ss2.width  = width  - 1;
2123     state->ss3.pitch  = stride - 1;
2124     state->ss3.tile_walk = bo->tiling == I915_TILING_Y;
2125     state->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
2126 
2127     if (is_target) {
2128 	read_domains = I915_GEM_DOMAIN_RENDER;
2129 	write_domain = I915_GEM_DOMAIN_RENDER;
2130     } else {
2131 	read_domains = I915_GEM_DOMAIN_SAMPLER;
2132 	write_domain = 0;
2133     }
2134 
2135     offset = i965_stream_offsetof (&device->surface, state);
2136     i965_emit_relocation (device, &device->surface,
2137 			  bo, 0,
2138 			  read_domains, write_domain,
2139 			  offset + offsetof (struct brw_surface_state, ss1.base_addr));
2140     return offset;
2141 }
2142 
2143 static uint32_t
emit_surface_state_for_shader(i965_device_t * device,const union i965_shader_channel * channel)2144 emit_surface_state_for_shader (i965_device_t *device,
2145 			       const union i965_shader_channel *channel)
2146 {
2147     int type = BRW_SURFACE_2D;
2148 
2149     assert (channel->type.fragment != FS_NONE);
2150     assert (channel->type.fragment != FS_CONSTANT);
2151 
2152     if (channel->type.fragment != FS_SURFACE)
2153 	type = BRW_SURFACE_1D;
2154 
2155     return emit_surface_state (device, FALSE,
2156 			       channel->base.bo,
2157 			       channel->base.format,
2158 			       channel->base.width,
2159 			       channel->base.height,
2160 			       channel->base.stride,
2161 			       type);
2162 }
2163 
2164 cairo_bool_t
i965_wm_binding_equal(const void * A,const void * B)2165 i965_wm_binding_equal (const void *A,
2166 		       const void *B)
2167 {
2168     const struct i965_wm_binding *a = A, *b = B;
2169 
2170     if (a->entry.hash != b->entry.hash)
2171 	return FALSE;
2172 
2173     if (a->size != b->size)
2174 	return FALSE;
2175 
2176     return memcmp (a->table, b->table, sizeof (uint32_t) * a->size) == 0;
2177 }
2178 
2179 static void
i965_wm_binding_init(struct i965_wm_binding * state,const uint32_t * table,int size)2180 i965_wm_binding_init (struct i965_wm_binding *state,
2181 		      const uint32_t *table,
2182 		      int size)
2183 {
2184     int n;
2185 
2186     state->entry.hash = size;
2187     state->size = size;
2188 
2189     for (n = 0; n < size; n++) {
2190 	state->table[n] = table[n];
2191 	state->entry.hash ^= (table[n] << (8 * n)) |
2192 	                     (table[n] >> (32 - (8*n)));
2193     }
2194 }
2195 
2196 static uint32_t
emit_binding_table(i965_device_t * device,i965_shader_t * shader)2197 emit_binding_table (i965_device_t *device,
2198 		    i965_shader_t *shader)
2199 {
2200     intel_bo_t *bo;
2201     struct i965_wm_binding key, *cache;
2202     uint32_t *table;
2203     int n = 0;
2204 
2205     table = i965_stream_alloc (&device->surface, 32, 5 * sizeof (uint32_t));
2206     if (shader->target->stream != device->surface.serial) {
2207 	shader->target->stream = device->surface.serial;
2208 	shader->target->offset = emit_surface_state (device,
2209 						     TRUE,
2210 						     to_intel_bo (shader->target->intel.drm.bo),
2211 						     shader->target->intel.drm.format,
2212 						     shader->target->intel.drm.width,
2213 						     shader->target->intel.drm.height,
2214 						     shader->target->intel.drm.stride,
2215 						     BRW_SURFACE_2D);
2216     }
2217     table[n++] = shader->target->offset;
2218 
2219     bo = shader->source.base.bo;
2220     if (bo != NULL) {
2221 	if (bo->opaque0 != device->surface.serial) {
2222 	    bo->opaque0 = device->surface.serial;
2223 	    bo->opaque1 = emit_surface_state_for_shader (device, &shader->source);
2224 	}
2225 	table[n++] = bo->opaque1;
2226     }
2227 
2228     bo = shader->mask.base.bo;
2229     if (bo != NULL) {
2230 	if (bo->opaque0 != device->surface.serial) {
2231 	    bo->opaque0 = device->surface.serial;
2232 	    bo->opaque1 = emit_surface_state_for_shader (device, &shader->mask);
2233 	}
2234 	table[n++] = bo->opaque1;
2235     }
2236 
2237     bo = shader->clip.base.bo;
2238     if (bo != NULL) {
2239 	if (bo->opaque0 != device->surface.serial) {
2240 	    bo->opaque0 = device->surface.serial;
2241 	    bo->opaque1 = emit_surface_state_for_shader (device, &shader->clip);
2242 	}
2243 	table[n++] = bo->opaque1;
2244     }
2245 
2246     bo = shader->dst.base.bo;
2247     if (bo != NULL) {
2248 	if (bo->opaque0 != device->surface.serial) {
2249 	    bo->opaque0 = device->surface.serial;
2250 	    bo->opaque1 = emit_surface_state_for_shader (device, &shader->dst);
2251 	}
2252 	table[n++] = bo->opaque1;
2253     }
2254 
2255     i965_wm_binding_init (&key, table, n);
2256     key.offset = i965_stream_offsetof (&device->surface, table);
2257 
2258     if (i965_wm_binding_equal (&key, &device->wm_binding)) {
2259 	device->surface.used = key.offset;
2260 	return device->wm_binding.offset;
2261     }
2262 
2263     cache = _cairo_hash_table_lookup (device->wm_bindings, &key.entry);
2264     if (cache != NULL) {
2265 	device->surface.used = key.offset;
2266 	key.offset = cache->offset;
2267     }
2268 
2269     device->wm_binding = key;
2270     return key.offset;
2271 }
2272 
2273 static void
i965_emit_invariants(i965_device_t * device)2274 i965_emit_invariants (i965_device_t *device)
2275 {
2276     OUT_BATCH (BRW_CS_URB_STATE | 0);
2277     OUT_BATCH (((URB_CS_ENTRY_SIZE-1) << 4) | (URB_CS_ENTRIES << 0));
2278 }
2279 
2280 static void
i965_emit_urb_fences(i965_device_t * device)2281 i965_emit_urb_fences (i965_device_t *device)
2282 {
2283     int urb_vs_start, urb_vs_size;
2284     int urb_gs_start, urb_gs_size;
2285     int urb_clip_start, urb_clip_size;
2286     int urb_sf_start, urb_sf_size;
2287     int urb_cs_start, urb_cs_size;
2288 
2289     if (device->have_urb_fences)
2290 	return;
2291 
2292     /* URB fence */
2293     urb_vs_start = 0;
2294     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
2295     urb_gs_start = urb_vs_start + urb_vs_size;
2296     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
2297     urb_clip_start = urb_gs_start + urb_gs_size;
2298     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
2299     urb_sf_start = urb_clip_start + urb_clip_size;
2300     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
2301     urb_cs_start = urb_sf_start + urb_sf_size;
2302     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
2303 
2304     /* erratum: URB_FENCE must not cross a 64-byte cache-line */
2305     while ((device->batch.used & 63) > 64-12)
2306 	OUT_BATCH (MI_NOOP);
2307     OUT_BATCH (BRW_URB_FENCE |
2308 	       UF0_CS_REALLOC |
2309 	       UF0_SF_REALLOC |
2310 	       UF0_CLIP_REALLOC |
2311 	       UF0_GS_REALLOC |
2312 	       UF0_VS_REALLOC |
2313 	       1);
2314     OUT_BATCH (((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
2315 	       ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
2316 	       ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
2317     OUT_BATCH (((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
2318 	       ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
2319 
2320     device->have_urb_fences = TRUE;
2321     device->constants_size = 0;
2322 }
2323 
2324 static void
i965_emit_base(i965_device_t * device)2325 i965_emit_base (i965_device_t *device)
2326 {
2327     OUT_BATCH (BRW_STATE_BASE_ADDRESS | 4);
2328     if (likely (device->general.num_pending_relocations == 0)) {
2329 	i965_stream_add_pending_relocation (&device->general,
2330 					    device->batch.used,
2331 					    I915_GEM_DOMAIN_INSTRUCTION, 0,
2332 					    BASE_ADDRESS_MODIFY);
2333     }
2334     OUT_BATCH (0); /* pending relocation */
2335 
2336     if (likely (device->surface.num_pending_relocations == 0)) {
2337 	i965_stream_add_pending_relocation (&device->surface,
2338 					    device->batch.used,
2339 					    I915_GEM_DOMAIN_INSTRUCTION, 0,
2340 					    BASE_ADDRESS_MODIFY);
2341     }
2342     OUT_BATCH (0); /* pending relocation */
2343 
2344     OUT_BATCH (0 | BASE_ADDRESS_MODIFY);
2345     /* general state max addr, disabled */
2346     OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY);
2347     /* media object state max addr, disabled */
2348     OUT_BATCH (0x10000000 | BASE_ADDRESS_MODIFY);
2349 }
2350 
2351 static void
i965_emit_vertex_element(i965_device_t * device,i965_shader_t * shader)2352 i965_emit_vertex_element (i965_device_t *device,
2353 			  i965_shader_t *shader)
2354 {
2355     uint32_t offset;
2356     uint32_t type;
2357     int nelem;
2358 
2359     type = 0;
2360     nelem = 1;
2361     if (shader->mask.type.vertex == VS_SPANS ||
2362 	shader->mask.type.vertex == VS_GLYPHS)
2363     {
2364 	type = shader->mask.type.vertex;
2365 	nelem++;
2366     }
2367 
2368     if (type == device->vertex_type)
2369 	return;
2370     device->vertex_type = type;
2371 
2372     offset = 0;
2373 
2374     OUT_BATCH (BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
2375     OUT_BATCH ((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2376 	       VE0_VALID |
2377 	       (BRW_SURFACEFORMAT_R32G32_FLOAT	<< VE0_FORMAT_SHIFT) |
2378 	       (offset				<< VE0_OFFSET_SHIFT));
2379     OUT_BATCH ((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
2380 	       (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
2381 	       (BRW_VFCOMPONENT_STORE_0		<< VE1_VFCOMPONENT_2_SHIFT) |
2382 	       (BRW_VFCOMPONENT_STORE_1_FLT	<< VE1_VFCOMPONENT_3_SHIFT) |
2383 	       (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2384     offset += 8;
2385 
2386     assert (shader->source.type.vertex == VS_NONE);
2387     switch (shader->mask.type.vertex) {
2388     default:
2389     case VS_NONE:
2390 	break;
2391 
2392     case VS_SPANS:
2393 	OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2394 		  VE0_VALID |
2395 		  (BRW_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT) |
2396 		  (offset			<< VE0_OFFSET_SHIFT));
2397 	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
2398 		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_1_SHIFT) |
2399 		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_2_SHIFT) |
2400 		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_3_SHIFT) |
2401 		  (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2402 
2403 	offset += 4;
2404 	break;
2405 
2406     case VS_GLYPHS:
2407 	OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2408 		  VE0_VALID |
2409 		  (BRW_SURFACEFORMAT_R16G16_FLOAT << VE0_FORMAT_SHIFT) |
2410 		  (offset			<< VE0_OFFSET_SHIFT));
2411 	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_0_SHIFT) |
2412 		  (BRW_VFCOMPONENT_STORE_SRC	<< VE1_VFCOMPONENT_1_SHIFT) |
2413 		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_2_SHIFT) |
2414 		  (BRW_VFCOMPONENT_NOSTORE	<< VE1_VFCOMPONENT_3_SHIFT) |
2415 		  (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
2416 
2417 	offset += 4;
2418 	break;
2419     }
2420     assert (shader->clip.type.vertex == VS_NONE);
2421     assert (shader->dst.type.vertex == VS_NONE);
2422 
2423     device->vertex_size = offset;
2424     i965_stream_align (&device->vertex, device->vertex_size);
2425     device->vertex.committed = device->vertex.used;
2426 
2427     device->rectangle_size = 3 * offset;
2428 }
2429 
2430 static cairo_bool_t
i965_shader_needs_surface_update(const i965_shader_t * shader,const i965_device_t * device)2431 i965_shader_needs_surface_update (const i965_shader_t *shader,
2432 				  const i965_device_t *device)
2433 {
2434     return device->target != shader->target || shader->target->stream == 0 ||
2435 	(shader->source.base.bo != NULL && device->source != shader->source.base.bo) ||
2436 	(shader->mask.base.bo != NULL && device->mask != shader->mask.base.bo) ||
2437 	(shader->clip.base.bo != NULL && device->clip != shader->clip.base.bo);
2438 }
2439 
2440 static cairo_bool_t
i965_shader_needs_constants_update(const i965_shader_t * shader,const i965_device_t * device)2441 i965_shader_needs_constants_update (const i965_shader_t *shader,
2442 				    const i965_device_t *device)
2443 {
2444     if (shader->constants_size == 0)
2445 	return FALSE;
2446 
2447     if (device->constants_size != shader->constants_size)
2448 	return TRUE;
2449 
2450     return memcmp (device->constants,
2451 		   shader->constants,
2452 		   sizeof (float) * shader->constants_size);
2453 }
2454 
2455 static cairo_bool_t
i965_shader_needs_state_update(const i965_shader_t * shader,const i965_device_t * device)2456 i965_shader_needs_state_update (const i965_shader_t *shader,
2457 				const i965_device_t *device)
2458 {
2459     union {
2460 	struct i965_sf_state sf;
2461 	struct i965_wm_state wm;
2462 	struct i965_cc_state cc;
2463     } state;
2464 
2465     i965_sf_state_init (&state.sf, shader);
2466     if (! i965_sf_state_equal (&state.sf, &device->sf_state))
2467 	return TRUE;
2468 
2469     i965_wm_state_init (&state.wm, shader);
2470     if (! i965_wm_state_equal (&state.wm, &device->wm_state))
2471 	return TRUE;
2472 
2473     i965_cc_state_init (&state.cc, shader);
2474     if (! i965_cc_state_equal (&state.cc, &device->cc_state))
2475 	return TRUE;
2476 
2477     return FALSE;
2478 }
2479 
2480 static void
i965_emit_composite(i965_device_t * device,i965_shader_t * shader)2481 i965_emit_composite (i965_device_t *device,
2482 		     i965_shader_t *shader)
2483 {
2484     uint32_t draw_rectangle;
2485 
2486     if (i965_shader_needs_surface_update (shader, device)) {
2487 	uint32_t offset;
2488 
2489 	offset = emit_binding_table (device, shader);
2490 
2491 	/* Only the PS uses the binding table */
2492 	OUT_BATCH (BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
2493 	OUT_BATCH (0); /* vs */
2494 	OUT_BATCH (0); /* gs */
2495 	OUT_BATCH (0); /* clip */
2496 	OUT_BATCH (0); /* sf */
2497 	OUT_BATCH (offset);
2498 
2499 	device->target = shader->target;
2500 	device->source = shader->source.base.bo;
2501 	device->mask = shader->mask.base.bo;
2502 	device->clip = shader->clip.base.bo;
2503     }
2504 
2505     /* The drawing rectangle clipping is always on.  Set it to values that
2506      * shouldn't do any clipping.
2507      */
2508     draw_rectangle = DRAW_YMAX (shader->target->intel.drm.height) |
2509 	             DRAW_XMAX (shader->target->intel.drm.width);
2510     if (draw_rectangle != device->draw_rectangle) {
2511 	OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2512 	OUT_BATCH (0x00000000);	/* ymin, xmin */
2513 	OUT_BATCH (draw_rectangle);
2514 	OUT_BATCH (0x00000000);	/* yorigin, xorigin */
2515 	device->draw_rectangle = draw_rectangle;
2516     }
2517 
2518     /* skip the depth buffer */
2519     /* skip the polygon stipple */
2520     /* skip the polygon stipple offset */
2521     /* skip the line stipple */
2522 
2523     /* Set the pointers to the 3d pipeline state */
2524     if (i965_shader_needs_state_update (shader, device)) {
2525 	OUT_BATCH (BRW_3DSTATE_PIPELINED_POINTERS | 5);
2526 	OUT_BATCH (vs_unit_state_emit (device));
2527 	OUT_BATCH (BRW_GS_DISABLE);
2528 	OUT_BATCH (BRW_CLIP_DISABLE);
2529 	OUT_BATCH (gen4_create_sf_state (device, shader));
2530 	OUT_BATCH (gen4_create_wm_state (device, shader));
2531 	OUT_BATCH (cc_state_emit (device, shader));
2532 
2533 	/* Once the units are initialized, we need to setup the fences */
2534 	i965_emit_urb_fences (device);
2535     }
2536 
2537     if (i965_shader_needs_constants_update (shader, device)) {
2538 	uint32_t size = (sizeof (float) * shader->constants_size + 63) & -64;
2539 
2540 	/* XXX reuse clear/black/white
2541 	 * ht!
2542 	*/
2543 
2544 	/* XXX CONSTANT_BUFFER Address Offset Disable? INSTPM? */
2545 
2546 	assert (size <= 64 * URB_CS_ENTRY_SIZE);
2547 	assert (((sizeof (float) * shader->constants_size + 31) & -32) == 32 * i965_shader_const_urb_length (shader));
2548 
2549 	device->constants = i965_stream_alloc (&device->surface, 64, size);
2550 	memcpy (device->constants, shader->constants, size);
2551 	device->constants_size = shader->constants_size;
2552 
2553 	OUT_BATCH (BRW_CONSTANT_BUFFER | (1 << 8));
2554 	OUT_BATCH (i965_stream_offsetof (&device->surface, device->constants) + size / 64 - 1);
2555     }
2556 
2557     i965_emit_vertex_element (device, shader);
2558 }
2559 
2560 void
i965_flush_vertices(i965_device_t * device)2561 i965_flush_vertices (i965_device_t *device)
2562 {
2563     int vertex_count, vertex_start;
2564 
2565     if (device->vertex.used == device->vertex.committed)
2566 	return;
2567 
2568     assert (device->vertex.used > device->vertex.committed);
2569 
2570     vertex_start = device->vertex.committed / device->vertex_size;
2571     vertex_count =
2572 	(device->vertex.used - device->vertex.committed) / device->vertex_size;
2573 
2574     assert (vertex_count);
2575 
2576     if (device->vertex_size != device->last_vertex_size) {
2577 	i965_stream_add_pending_relocation (&device->vertex,
2578 					    device->batch.used + 8,
2579 					    I915_GEM_DOMAIN_VERTEX, 0,
2580 					    0);
2581 
2582 	OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3);
2583 	OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) |
2584 		   VB0_VERTEXDATA |
2585 		   (device->vertex_size << VB0_BUFFER_PITCH_SHIFT));
2586 	OUT_BATCH (0); /* pending relocation */
2587 	OUT_BATCH (0);
2588 	OUT_BATCH (0);
2589 	device->last_vertex_size = device->vertex_size;
2590     }
2591 
2592     OUT_BATCH (BRW_3DPRIMITIVE |
2593 	       BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2594 	       (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2595 	       (0 << 9) |
2596 	       4);
2597     OUT_BATCH (vertex_count);  /* vertex count per instance */
2598     OUT_BATCH (vertex_start);  /* start vertex offset */
2599     OUT_BATCH (1); /* single instance */
2600     OUT_BATCH (0);
2601     OUT_BATCH (0);
2602 
2603     device->vertex.committed = device->vertex.used;
2604 }
2605 
2606 void
i965_finish_vertices(i965_device_t * device)2607 i965_finish_vertices (i965_device_t *device)
2608 {
2609     cairo_status_t status;
2610 
2611     i965_flush_vertices (device);
2612 
2613     i965_stream_commit (device, &device->vertex);
2614 
2615     if (! i965_shader_check_aperture (device->shader, device)) {
2616 	status = i965_device_flush (device);
2617 	if (unlikely (status))
2618 	    longjmp (device->shader->unwind, status);
2619 
2620 	status = i965_shader_commit (device->shader, device);
2621 	assert (status == CAIRO_STATUS_SUCCESS);
2622     }
2623 
2624     device->last_vertex_size = 0;
2625 }
2626 
2627 static cairo_bool_t
i965_shader_needs_update(const i965_shader_t * shader,const i965_device_t * device)2628 i965_shader_needs_update (const i965_shader_t *shader,
2629 			  const i965_device_t *device)
2630 {
2631     if (i965_shader_needs_surface_update (shader, device))
2632 	return TRUE;
2633 
2634     if (i965_shader_needs_constants_update (shader, device))
2635 	return TRUE;
2636 
2637     return i965_shader_needs_state_update (shader, device);
2638 }
2639 
2640 static void
i965_shader_reduce(i965_shader_t * shader,const i965_device_t * device)2641 i965_shader_reduce (i965_shader_t *shader,
2642 		    const i965_device_t *device)
2643 {
2644     if (shader->op == CAIRO_OPERATOR_OVER &&
2645 	(i965_wm_kernel_hash (shader) & ~0xff) == 0 &&
2646 	(shader->source.base.content & CAIRO_CONTENT_ALPHA) == 0)
2647     {
2648 	shader->op = CAIRO_OPERATOR_SOURCE;
2649     }
2650 }
2651 
2652 cairo_status_t
i965_shader_commit(i965_shader_t * shader,i965_device_t * device)2653 i965_shader_commit (i965_shader_t *shader,
2654 		    i965_device_t *device)
2655 {
2656     cairo_status_t status;
2657 
2658     if (! shader->committed) {
2659 	device->shader = shader;
2660 
2661 	status = i965_shader_setup_dst (shader);
2662 	if (unlikely (status))
2663 	    return status;
2664 
2665 	i965_shader_setup_constants (shader);
2666 	i965_shader_reduce (shader, device);
2667 
2668 	if ((status = setjmp (shader->unwind)))
2669 	    return status;
2670 
2671 	shader->committed = TRUE;
2672     }
2673 
2674     if (! i965_shader_needs_update (shader, device))
2675 	return CAIRO_STATUS_SUCCESS;
2676 
2677     /* XXX too many guestimates about likely maximum sizes */
2678 recheck:
2679     if (device->batch.used + 128 > device->batch.size ||
2680 	! i965_shader_check_aperture (shader, device))
2681     {
2682 	status = i965_device_flush (device);
2683 	if (unlikely (status))
2684 	    longjmp (shader->unwind, status);
2685     }
2686 
2687     i965_flush_vertices (device);
2688 
2689     if (unlikely (device->surface.used + 128 > device->surface.size ||
2690 		  device->surface.num_relocations + 4 > device->surface.max_relocations))
2691     {
2692 	i965_stream_commit (device, &device->surface);
2693 	goto recheck;
2694     }
2695 
2696     if (unlikely (device->general.used + 512 > device->general.size)) {
2697 	i965_stream_commit (device, &device->general);
2698 	i965_general_state_reset (device);
2699 	goto recheck;
2700     }
2701 
2702     if (unlikely (device->batch.used == 0))
2703 	i965_emit_invariants (device);
2704 
2705     if (unlikely (device->surface.num_pending_relocations == 0 ||
2706 		  device->general.num_pending_relocations == 0))
2707     {
2708 	i965_emit_base (device);
2709     }
2710 
2711     i965_emit_composite (device, shader);
2712 
2713     return CAIRO_STATUS_SUCCESS;
2714 }
2715 
2716 void
i965_clipped_vertices(i965_device_t * device,struct i965_vbo * vbo,cairo_region_t * clip_region)2717 i965_clipped_vertices (i965_device_t *device,
2718 		       struct i965_vbo *vbo,
2719 		       cairo_region_t *clip_region)
2720 {
2721     int i, num_rectangles, size;
2722     cairo_status_t status;
2723 
2724     if (vbo->count == 0)
2725 	return;
2726 
2727     num_rectangles = cairo_region_num_rectangles (clip_region);
2728     assert (num_rectangles);
2729 
2730     if (vbo->next ||
2731 	vbo->count * device->vertex_size + device->vertex.used > device->vertex.size)
2732     {
2733 	i965_finish_vertices (device);
2734 
2735 	size = device->rectangle_size;
2736 	do {
2737 	    for (i = 0; i < num_rectangles; i++) {
2738 		cairo_rectangle_int_t rect;
2739 
2740 		cairo_region_get_rectangle (clip_region, i, &rect);
2741 
2742 		if (unlikely (device->vertex.used + size > device->vertex.size ||
2743 			      device->batch.used + 64 > device->batch.size ||
2744 			      ! i965_shader_check_aperture (device->shader, device)))
2745 		{
2746 		    status = i965_device_flush (device);
2747 		    if (unlikely (status))
2748 			longjmp (device->shader->unwind, status);
2749 
2750 		    status = i965_shader_commit (device->shader, device);
2751 		    assert (status == CAIRO_STATUS_SUCCESS);
2752 		}
2753 
2754 		i965_emit_relocation (device, &device->batch,
2755 				      vbo->bo, 0,
2756 				      I915_GEM_DOMAIN_VERTEX, 0,
2757 				      device->batch.used + 8);
2758 
2759 		OUT_BATCH (BRW_3DSTATE_VERTEX_BUFFERS | 3);
2760 		OUT_BATCH ((0 << VB0_BUFFER_INDEX_SHIFT) |
2761 			   VB0_VERTEXDATA |
2762 			   (device->vertex_size << VB0_BUFFER_PITCH_SHIFT));
2763 		OUT_BATCH (vbo->bo->offset);
2764 		OUT_BATCH (0);
2765 		OUT_BATCH (0);
2766 
2767 		/* XXX scissor? */
2768 		OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2769 		OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x));
2770 		OUT_BATCH (DRAW_YMAX (rect.y + rect.height) |
2771 			   DRAW_XMAX (rect.x + rect.width));
2772 		OUT_BATCH (0x00000000);	/* yorigin, xorigin */
2773 
2774 		OUT_BATCH (BRW_3DPRIMITIVE |
2775 			   BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2776 			   (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2777 			   (0 << 9) |
2778 			   4);
2779 		OUT_BATCH (vbo->count);  /* vertex count per instance */
2780 		OUT_BATCH (0);  /* start vertex offset */
2781 		OUT_BATCH (1); /* single instance */
2782 		OUT_BATCH (0);
2783 		OUT_BATCH (0);
2784 	    }
2785 	} while ((vbo = vbo->next) != NULL);
2786 	assert (device->last_vertex_size == 0);
2787     } else {
2788 	int vertex_start, vertex_count;
2789 	void *ptr;
2790 
2791 	vertex_start = device->vertex.committed / device->vertex_size;
2792 	vertex_count = vbo->count;
2793 
2794 	size = vertex_count * device->vertex_size;
2795 	ptr = intel_bo_map (&device->intel, vbo->bo);
2796 	memcpy (device->vertex.data + device->vertex.used, ptr, size);
2797 	device->vertex.committed = device->vertex.used += size;
2798 
2799 	for (i = 0; i < num_rectangles; i++) {
2800 	    cairo_rectangle_int_t rect;
2801 
2802 	    cairo_region_get_rectangle (clip_region, i, &rect);
2803 
2804 	    /* XXX scissor? */
2805 	    OUT_BATCH (BRW_3DSTATE_DRAWING_RECTANGLE | 2);
2806 	    OUT_BATCH (DRAW_YMIN (rect.y) | DRAW_XMIN (rect.x));
2807 	    OUT_BATCH (DRAW_YMAX (rect.y + rect.height) |
2808 		       DRAW_XMAX (rect.x + rect.width));
2809 	    OUT_BATCH (0x00000000);	/* yorigin, xorigin */
2810 
2811 	    OUT_BATCH (BRW_3DPRIMITIVE |
2812 		       BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
2813 		       (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
2814 		       (0 << 9) |
2815 		       4);
2816 	    OUT_BATCH (vertex_count);  /* vertex count per instance */
2817 	    OUT_BATCH (vertex_start);  /* start vertex offset */
2818 	    OUT_BATCH (1); /* single instance */
2819 	    OUT_BATCH (0);
2820 	    OUT_BATCH (0);
2821 	}
2822     }
2823 
2824     device->draw_rectangle = 0;
2825 }
2826