1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * Setup/binning code for screen-aligned quads.
30  */
31 
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_rast.h"
37 #include "lp_state_fs.h"
38 #include "lp_state_setup.h"
39 
40 
41 #define NUM_CHANNELS 4
42 
43 #define UNDETERMINED_BLIT  -1
44 
45 
46 static inline int
subpixel_snap(float a)47 subpixel_snap(float a)
48 {
49    return util_iround(FIXED_ONE * a);
50 }
51 
52 
53 static inline float
fixed_to_float(int a)54 fixed_to_float(int a)
55 {
56    return a * (1.0f / FIXED_ONE);
57 }
58 
59 
60 /**
61  * Alloc space for a new rectangle plus the input.a0/dadx/dady arrays
62  * immediately after it.
63  * The memory is allocated from the per-scene pool, not per-tile.
64  * \param size  returns number of bytes allocated
65  * \param nr_inputs  number of fragment shader inputs
66  * \return pointer to rectangle space
67  */
68 struct lp_rast_rectangle *
lp_setup_alloc_rectangle(struct lp_scene * scene,unsigned nr_inputs)69 lp_setup_alloc_rectangle(struct lp_scene *scene, unsigned nr_inputs)
70 {
71    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
72    struct lp_rast_rectangle *rect;
73    unsigned bytes;
74 
75    bytes = sizeof(*rect) + (3 * input_array_sz);
76 
77    rect = lp_scene_alloc_aligned( scene, bytes, 16 );
78    if (rect == NULL)
79       return NULL;
80 
81    rect->inputs.stride = input_array_sz;
82 
83    return rect;
84 }
85 
86 
87 /**
88  * The rectangle covers the whole tile- shade whole tile.
89  * XXX no rectangle/triangle dependencies in this file - share it with
90  * the same code in lp_setup_tri.c
91  * \param tx, ty  the tile position in tiles, not pixels
92  */
93 boolean
lp_setup_whole_tile(struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs,int tx,int ty)94 lp_setup_whole_tile(struct lp_setup_context *setup,
95                     const struct lp_rast_shader_inputs *inputs,
96                     int tx, int ty)
97 {
98    struct lp_scene *scene = setup->scene;
99 
100    LP_COUNT(nr_fully_covered_64);
101 
102    /* if variant is opaque and scissor doesn't effect the tile */
103    if (inputs->opaque) {
104       /* Several things prevent this optimization from working:
105        * - For layered rendering we can't determine if this covers the same layer
106        * as previous rendering (or in case of clears those actually always cover
107        * all layers so optimization is impossible). Need to use fb_max_layer and
108        * not setup->layer_slot to determine this since even if there's currently
109        * no slot assigned previous rendering could have used one.
110        * - If there were any Begin/End query commands in the scene then those
111        * would get removed which would be very wrong. Furthermore, if queries
112        * were just active we also can't do the optimization since to get
113        * accurate query results we unfortunately need to execute the rendering
114        * commands.
115        */
116       if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) {
117          /*
118           * All previous rendering will be overwritten so reset the bin.
119           */
120          lp_scene_bin_reset( scene, tx, ty );
121       }
122 
123       if (inputs->is_blit) {
124          LP_COUNT(nr_blit_64);
125          return lp_scene_bin_cmd_with_state( scene, tx, ty,
126                                              setup->fs.stored,
127                                              LP_RAST_OP_BLIT,
128                                              lp_rast_arg_inputs(inputs) );
129       }
130       else {
131          LP_COUNT(nr_shade_opaque_64);
132          return lp_scene_bin_cmd_with_state( scene, tx, ty,
133                                              setup->fs.stored,
134                                              LP_RAST_OP_SHADE_TILE_OPAQUE,
135                                              lp_rast_arg_inputs(inputs) );
136       }
137    }
138    else {
139       LP_COUNT(nr_shade_64);
140       return lp_scene_bin_cmd_with_state( scene, tx, ty,
141                                           setup->fs.stored,
142                                           LP_RAST_OP_SHADE_TILE,
143                                           lp_rast_arg_inputs(inputs) );
144    }
145 }
146 
147 
148 boolean
lp_setup_is_blit(const struct lp_setup_context * setup,const struct lp_rast_shader_inputs * inputs)149 lp_setup_is_blit(const struct lp_setup_context *setup,
150                  const struct lp_rast_shader_inputs *inputs)
151 {
152    const struct lp_fragment_shader_variant *variant =
153       setup->fs.current.variant;
154 
155    if (variant->blit) {
156       /*
157        * Detect blits.
158        */
159       const struct lp_jit_texture *texture =
160          &setup->fs.current.jit_context.textures[0];
161       float dsdx, dsdy, dtdx, dtdy;
162 
163       /* XXX: dadx vs dady confusion below?
164        */
165       dsdx = GET_DADX(inputs)[1][0]*texture->width;
166       dsdy = GET_DADX(inputs)[1][1]*texture->width;
167       dtdx = GET_DADY(inputs)[1][0]*texture->height;
168       dtdy = GET_DADY(inputs)[1][1]*texture->height;
169 
170       /*
171        * We don't need to check s0/t0 tolerances
172        * as we establish as pre-condition that there is no
173        * texture filtering.
174        */
175 
176       ASSERTED struct lp_sampler_static_state *samp0 = lp_fs_variant_key_sampler_idx(&variant->key, 0);
177       assert(samp0);
178       assert(samp0->sampler_state.min_img_filter == PIPE_TEX_FILTER_NEAREST);
179       assert(samp0->sampler_state.mag_img_filter == PIPE_TEX_FILTER_NEAREST);
180 
181       /*
182        * Check for 1:1 match of texels to dest pixels
183        */
184 
185       if (util_is_approx(dsdx, 1.0f, 1.0f/LP_MAX_WIDTH) &&
186           util_is_approx(dsdy, 0.0f, 1.0f/LP_MAX_HEIGHT) &&
187           util_is_approx(dtdx, 0.0f, 1.0f/LP_MAX_WIDTH) &&
188           util_is_approx(dtdy, 1.0f, 1.0f/LP_MAX_HEIGHT)) {
189          return true;
190       }
191       else {
192 #if 0
193          debug_printf("dsdx = %f\n", dsdx);
194          debug_printf("dsdy = %f\n", dsdy);
195          debug_printf("dtdx = %f\n", dtdx);
196          debug_printf("dtdy = %f\n", dtdy);
197          debug_printf("\n");
198 #endif
199          return FALSE;
200       }
201    }
202 
203    return FALSE;
204 }
205 
206 
207 static inline void
partial(struct lp_setup_context * setup,const struct lp_rast_rectangle * rect,unsigned ix,unsigned iy,unsigned mask)208 partial(struct lp_setup_context *setup,
209         const struct lp_rast_rectangle *rect,
210         unsigned ix, unsigned iy,
211         unsigned mask)
212 {
213    if (mask == 0) {
214       assert(rect->box.x0 <= ix * TILE_SIZE);
215       assert(rect->box.y0 <= iy * TILE_SIZE);
216       assert(rect->box.x1 >= (ix+1) * TILE_SIZE - 1);
217       assert(rect->box.y1 >= (iy+1) * TILE_SIZE - 1);
218 
219       lp_setup_whole_tile(setup, &rect->inputs, ix, iy);
220    }
221    else {
222       LP_COUNT(nr_partially_covered_64);
223       lp_scene_bin_cmd_with_state( setup->scene,
224                                    ix, iy,
225                                    setup->fs.stored,
226                                    LP_RAST_OP_RECTANGLE,
227                                    lp_rast_arg_rectangle(rect) );
228    }
229 }
230 
231 
232 /**
233  * Setup/bin a screen-aligned rect.
234  * We need three corner vertices in order to correctly setup
235  * interpolated parameters.  We *could* get away with just the
236  * diagonal vertices but it'd cause ugliness elsewhere.
237  *
238  *   + -------v0
239  *   |        |
240  *  v2 ------ v1
241  *
242  * By an unfortunate mixup between GL and D3D coordinate spaces, half
243  * of this file talks about clockwise rectangles (which were CCW in GL
244  * coordinate space), while the other half prefers to work with D3D
245  * CCW rectangles.
246  */
247 static boolean
try_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean frontfacing)248 try_rect_cw(struct lp_setup_context *setup,
249             const float (*v0)[4],
250             const float (*v1)[4],
251             const float (*v2)[4],
252             boolean frontfacing)
253 {
254    const struct lp_fragment_shader_variant *variant =
255       setup->fs.current.variant;
256    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
257    struct lp_scene *scene = setup->scene;
258    struct lp_rast_rectangle *rect;
259    boolean cw;
260    struct u_rect bbox;
261    unsigned viewport_index = 0;
262    unsigned layer = 0;
263    const float (*pv)[4];
264 
265    /* x/y positions in fixed point */
266    int x0 = subpixel_snap(v0[0][0] - setup->pixel_offset);
267    int x1 = subpixel_snap(v1[0][0] - setup->pixel_offset);
268    int x2 = subpixel_snap(v2[0][0] - setup->pixel_offset);
269    int y0 = subpixel_snap(v0[0][1] - setup->pixel_offset);
270    int y1 = subpixel_snap(v1[0][1] - setup->pixel_offset);
271    int y2 = subpixel_snap(v2[0][1] - setup->pixel_offset);
272 
273    LP_COUNT(nr_rects);
274 
275    /* Cull clockwise rects without overflowing.
276     */
277    cw = (x2 < x1) ^ (y0 < y2);
278    if (cw) {
279       LP_COUNT(nr_culled_rects);
280       return TRUE;
281    }
282 
283    if (setup->flatshade_first) {
284       pv = v0;
285    }
286    else {
287       pv = v2;
288    }
289    if (setup->viewport_index_slot > 0) {
290       unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
291       viewport_index = lp_clamp_viewport_idx(*udata);
292    }
293    if (setup->layer_slot > 0) {
294       layer = *(unsigned*)pv[setup->layer_slot];
295       layer = MIN2(layer, scene->fb_max_layer);
296    }
297 
298    /* Bounding rectangle (in pixels) */
299    {
300       /* Yes this is necessary to accurately calculate bounding boxes
301        * with the two fill-conventions we support.  GL (normally) ends
302        * up needing a bottom-left fill convention, which requires
303        * slightly different rounding.
304        */
305       int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
306 
307       bbox.x0 = (MIN3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
308       bbox.x1 = (MAX3(x0, x1, x2) + (FIXED_ONE-1)) >> FIXED_ORDER;
309       bbox.y0 = (MIN3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
310       bbox.y1 = (MAX3(y0, y1, y2) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
311 
312       /* Inclusive coordinates:
313        */
314       bbox.x1--;
315       bbox.y1--;
316    }
317 
318    if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
319       if (0) debug_printf("no intersection\n");
320       LP_COUNT(nr_culled_rects);
321       return TRUE;
322    }
323 
324    u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);
325 
326    rect = lp_setup_alloc_rectangle(scene, key->num_inputs);
327    if (!rect)
328       return FALSE;
329 
330 #ifdef DEBUG
331    rect->v[0][0] = v0[0][0];
332    rect->v[0][1] = v0[0][1];
333    rect->v[1][0] = v1[0][0];
334    rect->v[1][1] = v1[0][1];
335 #endif
336 
337    rect->box.x0 = bbox.x0;
338    rect->box.x1 = bbox.x1;
339    rect->box.y0 = bbox.y0;
340    rect->box.y1 = bbox.y1;
341 
342    /* Setup parameter interpolants:
343     */
344    setup->setup.variant->jit_function( v0,
345 				       v1,
346 				       v2,
347 				       frontfacing,
348 				       GET_A0(&rect->inputs),
349 				       GET_DADX(&rect->inputs),
350 				       GET_DADY(&rect->inputs),
351                                        &setup->setup.variant->key );
352 
353    rect->inputs.frontfacing = frontfacing;
354    rect->inputs.disable = FALSE;
355    rect->inputs.is_blit = lp_setup_is_blit(setup, &rect->inputs);
356    rect->inputs.opaque = variant->opaque;
357    rect->inputs.layer = layer;
358    rect->inputs.viewport_index = viewport_index;
359    rect->inputs.view_index = setup->view_index;
360 
361    return lp_setup_bin_rectangle(setup, rect);
362 }
363 
364 
365 boolean
lp_setup_bin_rectangle(struct lp_setup_context * setup,struct lp_rast_rectangle * rect)366 lp_setup_bin_rectangle(struct lp_setup_context *setup,
367                        struct lp_rast_rectangle *rect)
368 {
369    struct lp_scene *scene = setup->scene;
370    unsigned ix0, iy0, ix1, iy1;
371    unsigned i, j;
372    unsigned left_mask = 0;
373    unsigned right_mask = 0;
374    unsigned top_mask = 0;
375    unsigned bottom_mask = 0;
376 
377    /*
378     * All fields of 'rect' are now set.  The remaining code here is
379     * concerned with binning.
380     */
381 
382    /* Convert to inclusive tile coordinates:
383     */
384    ix0 = rect->box.x0 / TILE_SIZE;
385    iy0 = rect->box.y0 / TILE_SIZE;
386    ix1 = rect->box.x1 / TILE_SIZE;
387    iy1 = rect->box.y1 / TILE_SIZE;
388 
389    /*
390     * Clamp to framebuffer size
391     */
392    assert(ix0 == MAX2(ix0, 0));
393    assert(iy0 == MAX2(iy0, 0));
394    assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
395    assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
396 
397    if (ix0 * TILE_SIZE != rect->box.x0)
398       left_mask = RECT_PLANE_LEFT;
399 
400    if (ix1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.x1)
401       right_mask  = RECT_PLANE_RIGHT;
402 
403    if (iy0 * TILE_SIZE != rect->box.y0)
404       top_mask    = RECT_PLANE_TOP;
405 
406    if (iy1 * TILE_SIZE + TILE_SIZE - 1 != rect->box.y1)
407       bottom_mask = RECT_PLANE_BOTTOM;
408 
409    /* Determine which tile(s) intersect the rectangle's bounding box
410     */
411    if (iy0 == iy1 && ix0 == ix1) {
412       partial(setup, rect, ix0, iy0,
413               (left_mask | right_mask | top_mask | bottom_mask));
414    }
415    else if (ix0 == ix1) {
416       unsigned mask = left_mask | right_mask;
417       partial(setup, rect, ix0, iy0, mask | top_mask);
418       for (i = iy0 + 1; i < iy1; i++)
419          partial(setup, rect, ix0, i, mask);
420       partial(setup, rect, ix0, iy1, mask | bottom_mask);
421    }
422    else if (iy0 == iy1) {
423       unsigned mask = top_mask | bottom_mask;
424       partial(setup, rect, ix0, iy0, mask | left_mask);
425       for (i = ix0 + 1; i < ix1; i++)
426          partial(setup, rect, i, iy0, mask);
427       partial(setup, rect, ix1, iy0, mask | right_mask);
428    }
429    else {
430       partial(setup, rect, ix0, iy0, left_mask  | top_mask);
431       partial(setup, rect, ix0, iy1, left_mask  | bottom_mask);
432       partial(setup, rect, ix1, iy0, right_mask | top_mask);
433       partial(setup, rect, ix1, iy1, right_mask | bottom_mask);
434 
435       /* Top/Bottom fringes
436        */
437       for (i = ix0 + 1; i < ix1; i++) {
438          partial(setup, rect, i, iy0, top_mask);
439          partial(setup, rect, i, iy1, bottom_mask);
440       }
441 
442       /* Left/Right fringes
443        */
444       for (i = iy0 + 1; i < iy1; i++) {
445          partial(setup, rect, ix0, i, left_mask);
446          partial(setup, rect, ix1, i, right_mask);
447       }
448 
449       /* Full interior tiles
450        */
451       for (j = iy0 + 1; j < iy1; j++) {
452          for (i = ix0 + 1; i < ix1; i++) {
453             lp_setup_whole_tile(setup, &rect->inputs, i, j);
454          }
455       }
456    }
457 
458    /* Catch any out-of-memory which occurred during binning.  Do this
459     * once here rather than checking all the return values throughout.
460     */
461    if (lp_scene_is_oom(scene)) {
462       /* Disable rasterization of this partially-binned rectangle.
463        * We'll flush this scene and re-bin the entire rectangle:
464        */
465       rect->inputs.disable = TRUE;
466       return FALSE;
467    }
468 
469    return TRUE;
470 }
471 
472 
473 void
lp_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],boolean frontfacing)474 lp_rect_cw(struct lp_setup_context *setup,
475          const float (*v0)[4],
476          const float (*v1)[4],
477          const float (*v2)[4],
478          boolean frontfacing)
479 {
480    if (!try_rect_cw(setup, v0, v1, v2, frontfacing)) {
481       if (!lp_setup_flush_and_restart(setup))
482          return;
483 
484       if (!try_rect_cw(setup, v0, v1, v2, frontfacing))
485          return;
486    }
487 }
488 
489 
490 /**
491  * Take the six vertices for two triangles and try to determine if they
492  * form a screen-aligned quad/rectangle.  If so, draw the rect directly,
493  * else, draw as two regular triangles.
494  */
495 static boolean
do_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4],boolean front)496 do_rect_ccw(struct lp_setup_context *setup,
497             const float (*v0)[4],
498             const float (*v1)[4],
499             const float (*v2)[4],
500             const float (*v3)[4],
501             const float (*v4)[4],
502             const float (*v5)[4],
503             boolean front)
504 {
505    const float (*rv0)[4], (*rv1)[4], (*rv2)[4], (*rv3)[4];  /* rect verts */
506 
507 #define SAME_POS(A, B)   (A[0][0] == B[0][0] && \
508                           A[0][1] == B[0][1] && \
509                           A[0][2] == B[0][2] && \
510                           A[0][3] == B[0][3])
511 
512    /* Only need to consider CCW orientations.  There are nine ways
513     * that two counter-clockwise triangles can join up:
514     */
515    if (SAME_POS(v0, v3)) {
516       if (SAME_POS(v2, v4)) {
517          /*
518           *    v5   v4/v2
519           *     +-----+
520           *     |   / |
521           *     |  /  |
522           *     | /   |
523           *     +-----+
524           *   v3/v0   v1
525           */
526          rv0 = v5;
527          rv1 = v0;
528          rv2 = v1;
529          rv3 = v2;
530       }
531       else if (SAME_POS(v1, v5)) {
532          /*
533           *    v4   v3/v0
534           *     +-----+
535           *     |   / |
536           *     |  /  |
537           *     | /   |
538           *     +-----+
539           *   v5/v1   v2
540           */
541          rv0 = v4;
542          rv1 = v1;
543          rv2 = v2;
544          rv3 = v0;
545       }
546       else {
547          goto emit_triangles;
548       }
549    }
550    else if (SAME_POS(v0, v5)) {
551       if (SAME_POS(v2, v3)) {
552          /*
553           *    v4   v3/v2
554           *     +-----+
555           *     |   / |
556           *     |  /  |
557           *     | /   |
558           *     +-----+
559           *   v5/v0   v1
560           */
561          rv0 = v4;
562          rv1 = v0;
563          rv2 = v1;
564          rv3 = v2;
565       }
566       else if (SAME_POS(v1, v4)) {
567          /*
568           *    v3   v5/v0
569           *     +-----+
570           *     |   / |
571           *     |  /  |
572           *     | /   |
573           *     +-----+
574           *   v4/v1   v2
575           */
576          rv0 = v3;
577          rv1 = v1;
578          rv2 = v2;
579          rv3 = v0;
580       }
581       else {
582          goto emit_triangles;
583       }
584    }
585    else if (SAME_POS(v0, v4)) {
586       if (SAME_POS(v2, v5)) {
587          /*
588           *    v3   v5/v2
589           *     +-----+
590           *     |   / |
591           *     |  /  |
592           *     | /   |
593           *     +-----+
594           *   v4/v0   v1
595           */
596          rv0 = v3;
597          rv1 = v0;
598          rv2 = v1;
599          rv3 = v2;
600       }
601       else if (SAME_POS(v1, v3)) {
602          /*
603           *    v5   v4/v0
604           *     +-----+
605           *     |   / |
606           *     |  /  |
607           *     | /   |
608           *     +-----+
609           *   v3/v1   v2
610           */
611          rv0 = v5;
612          rv1 = v1;
613          rv2 = v2;
614          rv3 = v0;
615       }
616       else {
617          goto emit_triangles;
618       }
619    }
620    else if (SAME_POS(v2, v3)) {
621       if (SAME_POS(v1, v4)) {
622          /*
623           *    v5   v4/v1
624           *     +-----+
625           *     |   / |
626           *     |  /  |
627           *     | /   |
628           *     +-----+
629           *   v3/v2   v0
630           */
631          rv0 = v5;
632          rv1 = v2;
633          rv2 = v0;
634          rv3 = v1;
635       }
636       else {
637          goto emit_triangles;
638       }
639    }
640    else if (SAME_POS(v2, v5)) {
641       if (SAME_POS(v1, v3)) {
642          /*
643           *    v4   v3/v1
644           *     +-----+
645           *     |   / |
646           *     |  /  |
647           *     | /   |
648           *     +-----+
649           *   v5/v2   v0
650           */
651          rv0 = v4;
652          rv1 = v2;
653          rv2 = v0;
654          rv3 = v1;
655       }
656       else {
657          goto emit_triangles;
658       }
659    }
660    else if (SAME_POS(v2, v4)) {
661       if (SAME_POS(v1, v5)) {
662          /*
663           *    v3   v5/v1
664           *     +-----+
665           *     |   / |
666           *     |  /  |
667           *     | /   |
668           *     +-----+
669           *   v4/v2   v0
670           */
671          rv0 = v3;
672          rv1 = v2;
673          rv2 = v0;
674          rv3 = v1;
675       }
676       else {
677          goto emit_triangles;
678       }
679    }
680    else {
681       goto emit_triangles;
682    }
683 
684 
685 #define SAME_X(A, B)   (A[0][0] == B[0][0])
686 #define SAME_Y(A, B)   (A[0][1] == B[0][1])
687 
688    /* The vertices are now counter clockwise, as such:
689     *
690     *  rv0 -------rv3
691     *    |        |
692     *  rv1 ------ rv2
693     *
694     * To render as a rectangle,
695     *   * The X values should be the same at v0, v1 and v2, v3.
696     *   * The Y values should be the same at v0, v3 and v1, v2.
697     */
698    if (SAME_Y(rv0, rv1)) {
699       const float (*tmp)[4];
700       tmp = rv0;
701       rv0 = rv1;
702       rv1 = rv2;
703       rv2 = rv3;
704       rv3 = tmp;
705    }
706 
707    if (SAME_X(rv0, rv1) && SAME_X(rv2, rv3) &&
708        SAME_Y(rv0, rv3) && SAME_Y(rv1, rv2)) {
709 
710       const struct lp_setup_variant_key *key = &setup->setup.variant->key;
711       const unsigned n = key->num_inputs;
712       unsigned i, j;
713 
714       /* We have a rectangle.  Check that the other attributes are
715        * coplanar.
716        */
717       for (i = 0; i < n; i++) {
718          for (j = 0; j < 4; j++) {
719             if (key->inputs[i].usage_mask & (1<<j)) {
720                unsigned k = key->inputs[i].src_index;
721                float dxdx1, dxdx2, dxdy1, dxdy2;
722                dxdx1 = rv0[k][j] - rv3[k][j];
723                dxdx2 = rv1[k][j] - rv2[k][j];
724                dxdy1 = rv0[k][j] - rv1[k][j];
725                dxdy2 = rv3[k][j] - rv2[k][j];
726                if (dxdx1 != dxdx2 ||
727                    dxdy1 != dxdy2) {
728                   goto emit_triangles;
729                }
730             }
731          }
732       }
733 
734       /* Note we're changing to clockwise here.  Fix this by reworking
735        * lp_rect_cw to expect/operate on ccw rects.  Note that
736        * function was previously misnamed.
737        */
738       lp_rect_cw(setup, rv0, rv2, rv1, front);
739       return TRUE;
740    }
741    else {
742       /* setup->quad(setup, rv0, rv1, rv2, rv3); */
743    }
744 
745 emit_triangles:
746    return FALSE;
747 }
748 
749 
750 enum winding {
751    WINDING_NONE = 0,
752    WINDING_CCW,
753    WINDING_CW
754 };
755 
756 
757 static inline enum winding
winding(const float (* v0)[4],const float (* v1)[4],const float (* v2)[4])758 winding(const float (*v0)[4],
759         const float (*v1)[4],
760         const float (*v2)[4])
761 {
762    /* edge vectors e = v0 - v2, f = v1 - v2 */
763    const float ex = v0[0][0] - v2[0][0];
764    const float ey = v0[0][1] - v2[0][1];
765    const float fx = v1[0][0] - v2[0][0];
766    const float fy = v1[0][1] - v2[0][1];
767 
768    /* det = cross(e,f).z */
769    const float det = ex * fy - ey * fx;
770 
771    if (det < 0.0f)
772       return WINDING_CCW;
773    else if (det > 0.0f)
774       return WINDING_CW;
775    else
776       return WINDING_NONE;
777 }
778 
779 
780 static boolean
setup_rect_cw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])781 setup_rect_cw(struct lp_setup_context *setup,
782               const float (*v0)[4],
783               const float (*v1)[4],
784               const float (*v2)[4],
785               const float (*v3)[4],
786               const float (*v4)[4],
787               const float (*v5)[4])
788 {
789    enum winding winding0 = winding(v0, v1, v2);
790    enum winding winding1 = winding(v3, v4, v5);
791 
792    if (winding0 == WINDING_CW &&
793        winding1 == WINDING_CW) {
794       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
795    } else if (winding0 == WINDING_CW) {
796       setup->triangle(setup, v0, v1, v2);
797       return TRUE;
798    } else if (winding1 == WINDING_CW) {
799       setup->triangle(setup, v3, v4, v5);
800       return TRUE;
801    } else {
802       return TRUE;
803    }
804 }
805 
806 
807 static boolean
setup_rect_ccw(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])808 setup_rect_ccw(struct lp_setup_context *setup,
809                const float (*v0)[4],
810                const float (*v1)[4],
811                const float (*v2)[4],
812                const float (*v3)[4],
813                const float (*v4)[4],
814                const float (*v5)[4])
815 {
816    enum winding winding0 = winding(v0, v1, v2);
817    enum winding winding1 = winding(v3, v4, v5);
818 
819    if (winding0 == WINDING_CCW &&
820        winding1 == WINDING_CCW) {
821       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
822    } else if (winding0 == WINDING_CCW) {
823       setup->triangle(setup, v0, v1, v2);
824       return TRUE;
825    } else if (winding1 == WINDING_CCW) {
826       return FALSE;
827       setup->triangle(setup, v3, v4, v5);
828       return TRUE;
829    } else {
830       return TRUE;
831    }
832 }
833 
834 
835 static boolean
setup_rect_noop(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])836 setup_rect_noop(struct lp_setup_context *setup,
837                 const float (*v0)[4],
838                 const float (*v1)[4],
839                 const float (*v2)[4],
840                 const float (*v3)[4],
841                 const float (*v4)[4],
842                 const float (*v5)[4])
843 {
844    return TRUE;
845 }
846 
847 
848 static boolean
setup_rect_both(struct lp_setup_context * setup,const float (* v0)[4],const float (* v1)[4],const float (* v2)[4],const float (* v3)[4],const float (* v4)[4],const float (* v5)[4])849 setup_rect_both(struct lp_setup_context *setup,
850                 const float (*v0)[4],
851                 const float (*v1)[4],
852                 const float (*v2)[4],
853                 const float (*v3)[4],
854                 const float (*v4)[4],
855                 const float (*v5)[4])
856 {
857    enum winding winding0 = winding(v0, v1, v2);
858    enum winding winding1 = winding(v3, v4, v5);
859 
860    if (winding0 != winding1) {
861       /* If we knew that the "front" parameter wasn't going to be
862        * referenced, could rearrange one of the two triangles such
863        * that they were both CCW.  Aero actually does send mixed
864        * CW/CCW rectangles under some circumstances, but we catch them
865        * explicitly.
866        */
867       return FALSE;
868    }
869    else if (winding0 == WINDING_CCW) {
870       return do_rect_ccw(setup, v0, v1, v2, v3, v4, v5, setup->ccw_is_frontface);
871    }
872    else if (winding0 == WINDING_CW) {
873       return do_rect_ccw(setup, v0, v2, v1, v3, v5, v4, !setup->ccw_is_frontface);
874    } else {
875       return TRUE;
876    }
877 }
878 
879 
880 void
lp_setup_choose_rect(struct lp_setup_context * setup)881 lp_setup_choose_rect( struct lp_setup_context *setup )
882 {
883    if (setup->rasterizer_discard) {
884       setup->rect = setup_rect_noop;
885       return;
886    }
887 
888    switch (setup->cullmode) {
889    case PIPE_FACE_NONE:
890       setup->rect = setup_rect_both;
891       break;
892    case PIPE_FACE_BACK:
893       setup->rect = setup->ccw_is_frontface ? setup_rect_ccw : setup_rect_cw;
894       break;
895    case PIPE_FACE_FRONT:
896       setup->rect = setup->ccw_is_frontface ? setup_rect_cw : setup_rect_ccw;
897       break;
898    default:
899       setup->rect = setup_rect_noop;
900       break;
901    }
902 }
903