1 /*
2  * Copyright © 2006 - 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_compiler.h"
25 #include "brw_eu.h"
26 
27 #include "dev/intel_debug.h"
28 
29 struct brw_sf_compile {
30    struct brw_codegen func;
31    struct brw_sf_prog_key key;
32    struct brw_sf_prog_data prog_data;
33 
34    struct brw_reg pv;
35    struct brw_reg det;
36    struct brw_reg dx0;
37    struct brw_reg dx2;
38    struct brw_reg dy0;
39    struct brw_reg dy2;
40 
41    /* z and 1/w passed in seperately:
42     */
43    struct brw_reg z[3];
44    struct brw_reg inv_w[3];
45 
46    /* The vertices:
47     */
48    struct brw_reg vert[3];
49 
50     /* Temporaries, allocated after last vertex reg.
51     */
52    struct brw_reg inv_det;
53    struct brw_reg a1_sub_a0;
54    struct brw_reg a2_sub_a0;
55    struct brw_reg tmp;
56 
57    struct brw_reg m1Cx;
58    struct brw_reg m2Cy;
59    struct brw_reg m3C0;
60 
61    GLuint nr_verts;
62    GLuint nr_attr_regs;
63    GLuint nr_setup_regs;
64    int urb_entry_read_offset;
65 
66    /** The last known value of the f0.0 flag register. */
67    unsigned flag_value;
68 
69    struct brw_vue_map vue_map;
70 };
71 
72 /**
73  * Determine the vue slot corresponding to the given half of the given register.
74  */
vert_reg_to_vue_slot(struct brw_sf_compile * c,GLuint reg,int half)75 static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
76                                        int half)
77 {
78    return (reg + c->urb_entry_read_offset) * 2 + half;
79 }
80 
81 /**
82  * Determine the varying corresponding to the given half of the given
83  * register.  half=0 means the first half of a register, half=1 means the
84  * second half.
85  */
vert_reg_to_varying(struct brw_sf_compile * c,GLuint reg,int half)86 static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
87                                       int half)
88 {
89    int vue_slot = vert_reg_to_vue_slot(c, reg, half);
90    return c->vue_map.slot_to_varying[vue_slot];
91 }
92 
93 /**
94  * Determine the register corresponding to the given vue slot
95  */
get_vue_slot(struct brw_sf_compile * c,struct brw_reg vert,int vue_slot)96 static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
97                                    struct brw_reg vert,
98                                    int vue_slot)
99 {
100    GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
101    GLuint sub = vue_slot % 2;
102 
103    return brw_vec4_grf(vert.nr + off, sub * 4);
104 }
105 
106 /**
107  * Determine the register corresponding to the given varying.
108  */
get_varying(struct brw_sf_compile * c,struct brw_reg vert,GLuint varying)109 static struct brw_reg get_varying(struct brw_sf_compile *c,
110                                   struct brw_reg vert,
111                                   GLuint varying)
112 {
113    int vue_slot = c->vue_map.varying_to_slot[varying];
114    assert (vue_slot >= c->urb_entry_read_offset);
115    return get_vue_slot(c, vert, vue_slot);
116 }
117 
118 static bool
have_attr(struct brw_sf_compile * c,GLuint attr)119 have_attr(struct brw_sf_compile *c, GLuint attr)
120 {
121    return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
122 }
123 
124 /***********************************************************************
125  * Twoside lighting
126  */
copy_bfc(struct brw_sf_compile * c,struct brw_reg vert)127 static void copy_bfc( struct brw_sf_compile *c,
128 		      struct brw_reg vert )
129 {
130    struct brw_codegen *p = &c->func;
131    GLuint i;
132 
133    for (i = 0; i < 2; i++) {
134       if (have_attr(c, VARYING_SLOT_COL0+i) &&
135 	  have_attr(c, VARYING_SLOT_BFC0+i))
136 	 brw_MOV(p,
137 		 get_varying(c, vert, VARYING_SLOT_COL0+i),
138 		 get_varying(c, vert, VARYING_SLOT_BFC0+i));
139    }
140 }
141 
142 
do_twoside_color(struct brw_sf_compile * c)143 static void do_twoside_color( struct brw_sf_compile *c )
144 {
145    struct brw_codegen *p = &c->func;
146    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
147 
148    /* Already done in clip program:
149     */
150    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
151       return;
152 
153    /* If the vertex shader provides backface color, do the selection. The VS
154     * promises to set up the front color if the backface color is provided, but
155     * it may contain junk if never written to.
156     */
157    if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
158        !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
159       return;
160 
161    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
162     * to get all channels active inside the IF.  In the clipping code
163     * we run with NoMask, so it's not an option and we can use
164     * BRW_EXECUTE_1 for all comparisions.
165     */
166    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
167    brw_IF(p, BRW_EXECUTE_4);
168    {
169       switch (c->nr_verts) {
170       case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;
171       case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;
172       case 1: copy_bfc(c, c->vert[0]);
173       }
174    }
175    brw_ENDIF(p);
176 }
177 
178 
179 
180 /***********************************************************************
181  * Flat shading
182  */
183 
copy_flatshaded_attributes(struct brw_sf_compile * c,struct brw_reg dst,struct brw_reg src)184 static void copy_flatshaded_attributes(struct brw_sf_compile *c,
185                                        struct brw_reg dst,
186                                        struct brw_reg src)
187 {
188    struct brw_codegen *p = &c->func;
189    int i;
190 
191    for (i = 0; i < c->vue_map.num_slots; i++) {
192       if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {
193          brw_MOV(p,
194                  get_vue_slot(c, dst, i),
195                  get_vue_slot(c, src, i));
196       }
197    }
198 }
199 
count_flatshaded_attributes(struct brw_sf_compile * c)200 static int count_flatshaded_attributes(struct brw_sf_compile *c)
201 {
202    int i;
203    int count = 0;
204 
205    for (i = 0; i < c->vue_map.num_slots; i++)
206       if (c->key.interp_mode[i] == INTERP_MODE_FLAT)
207          count++;
208 
209    return count;
210 }
211 
212 
213 
214 /* Need to use a computed jump to copy flatshaded attributes as the
215  * vertices are ordered according to y-coordinate before reaching this
216  * point, so the PV could be anywhere.
217  */
do_flatshade_triangle(struct brw_sf_compile * c)218 static void do_flatshade_triangle( struct brw_sf_compile *c )
219 {
220    struct brw_codegen *p = &c->func;
221    GLuint nr;
222    GLuint jmpi = 1;
223 
224    /* Already done in clip program:
225     */
226    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
227       return;
228 
229    if (p->devinfo->ver == 5)
230        jmpi = 2;
231 
232    nr = count_flatshaded_attributes(c);
233 
234    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
235    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
236 
237    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
238    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
239    brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
240 
241    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
242    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
243    brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
244 
245    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
246    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
247 }
248 
249 
do_flatshade_line(struct brw_sf_compile * c)250 static void do_flatshade_line( struct brw_sf_compile *c )
251 {
252    struct brw_codegen *p = &c->func;
253    GLuint nr;
254    GLuint jmpi = 1;
255 
256    /* Already done in clip program:
257     */
258    if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)
259       return;
260 
261    if (p->devinfo->ver == 5)
262        jmpi = 2;
263 
264    nr = count_flatshaded_attributes(c);
265 
266    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
267    brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
268    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
269 
270    brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
271    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
272 }
273 
274 
275 /***********************************************************************
276  * Triangle setup.
277  */
278 
279 
alloc_regs(struct brw_sf_compile * c)280 static void alloc_regs( struct brw_sf_compile *c )
281 {
282    GLuint reg, i;
283 
284    /* Values computed by fixed function unit:
285     */
286    c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
287    c->det = brw_vec1_grf(1, 2);
288    c->dx0 = brw_vec1_grf(1, 3);
289    c->dx2 = brw_vec1_grf(1, 4);
290    c->dy0 = brw_vec1_grf(1, 5);
291    c->dy2 = brw_vec1_grf(1, 6);
292 
293    /* z and 1/w passed in seperately:
294     */
295    c->z[0]     = brw_vec1_grf(2, 0);
296    c->inv_w[0] = brw_vec1_grf(2, 1);
297    c->z[1]     = brw_vec1_grf(2, 2);
298    c->inv_w[1] = brw_vec1_grf(2, 3);
299    c->z[2]     = brw_vec1_grf(2, 4);
300    c->inv_w[2] = brw_vec1_grf(2, 5);
301 
302    /* The vertices:
303     */
304    reg = 3;
305    for (i = 0; i < c->nr_verts; i++) {
306       c->vert[i] = brw_vec8_grf(reg, 0);
307       reg += c->nr_attr_regs;
308    }
309 
310    /* Temporaries, allocated after last vertex reg.
311     */
312    c->inv_det = brw_vec1_grf(reg, 0);  reg++;
313    c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
314    c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
315    c->tmp = brw_vec8_grf(reg, 0);  reg++;
316 
317    /* Note grf allocation:
318     */
319    c->prog_data.total_grf = reg;
320 
321 
322    /* Outputs of this program - interpolation coefficients for
323     * rasterization:
324     */
325    c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
326    c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
327    c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
328 }
329 
330 
copy_z_inv_w(struct brw_sf_compile * c)331 static void copy_z_inv_w( struct brw_sf_compile *c )
332 {
333    struct brw_codegen *p = &c->func;
334    GLuint i;
335 
336    /* Copy both scalars with a single MOV:
337     */
338    for (i = 0; i < c->nr_verts; i++)
339       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
340 }
341 
342 
invert_det(struct brw_sf_compile * c)343 static void invert_det( struct brw_sf_compile *c)
344 {
345    /* Looks like we invert all 8 elements just to get 1/det in
346     * position 2 !?!
347     */
348    gfx4_math(&c->func,
349 	     c->inv_det,
350 	     BRW_MATH_FUNCTION_INV,
351 	     0,
352 	     c->det,
353 	     BRW_MATH_PRECISION_FULL);
354 
355 }
356 
357 
358 static bool
calculate_masks(struct brw_sf_compile * c,GLuint reg,GLushort * pc,GLushort * pc_persp,GLushort * pc_linear)359 calculate_masks(struct brw_sf_compile *c,
360                 GLuint reg,
361                 GLushort *pc,
362                 GLushort *pc_persp,
363                 GLushort *pc_linear)
364 {
365    bool is_last_attr = (reg == c->nr_setup_regs - 1);
366    enum glsl_interp_mode interp;
367 
368    *pc_persp = 0;
369    *pc_linear = 0;
370    *pc = 0xf;
371 
372    interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];
373    if (interp == INTERP_MODE_SMOOTH) {
374       *pc_linear = 0xf;
375       *pc_persp = 0xf;
376    } else if (interp == INTERP_MODE_NOPERSPECTIVE)
377       *pc_linear = 0xf;
378 
379    /* Maybe only processs one attribute on the final round:
380     */
381    if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
382       *pc |= 0xf0;
383 
384       interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];
385       if (interp == INTERP_MODE_SMOOTH) {
386          *pc_linear |= 0xf0;
387          *pc_persp |= 0xf0;
388       } else if (interp == INTERP_MODE_NOPERSPECTIVE)
389          *pc_linear |= 0xf0;
390    }
391 
392    return is_last_attr;
393 }
394 
395 /* Calculates the predicate control for which channels of a reg
396  * (containing 2 attrs) to do point sprite coordinate replacement on.
397  */
398 static uint16_t
calculate_point_sprite_mask(struct brw_sf_compile * c,GLuint reg)399 calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
400 {
401    int varying1, varying2;
402    uint16_t pc = 0;
403 
404    varying1 = vert_reg_to_varying(c, reg, 0);
405    if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
406       if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
407 	 pc |= 0x0f;
408    }
409    if (varying1 == BRW_VARYING_SLOT_PNTC)
410       pc |= 0x0f;
411 
412    varying2 = vert_reg_to_varying(c, reg, 1);
413    if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
414       if (c->key.point_sprite_coord_replace & (1 << (varying2 -
415                                                      VARYING_SLOT_TEX0)))
416          pc |= 0xf0;
417    }
418    if (varying2 == BRW_VARYING_SLOT_PNTC)
419       pc |= 0xf0;
420 
421    return pc;
422 }
423 
424 static void
set_predicate_control_flag_value(struct brw_codegen * p,struct brw_sf_compile * c,unsigned value)425 set_predicate_control_flag_value(struct brw_codegen *p,
426                                  struct brw_sf_compile *c,
427                                  unsigned value)
428 {
429    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
430 
431    if (value != 0xff) {
432       if (value != c->flag_value) {
433          brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
434          c->flag_value = value;
435       }
436 
437       brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
438    }
439 }
440 
brw_emit_tri_setup(struct brw_sf_compile * c,bool allocate)441 static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
442 {
443    struct brw_codegen *p = &c->func;
444    GLuint i;
445 
446    c->flag_value = 0xff;
447    c->nr_verts = 3;
448 
449    if (allocate)
450       alloc_regs(c);
451 
452    invert_det(c);
453    copy_z_inv_w(c);
454 
455    if (c->key.do_twoside_color)
456       do_twoside_color(c);
457 
458    if (c->key.contains_flat_varying)
459       do_flatshade_triangle(c);
460 
461 
462    for (i = 0; i < c->nr_setup_regs; i++)
463    {
464       /* Pair of incoming attributes:
465        */
466       struct brw_reg a0 = offset(c->vert[0], i);
467       struct brw_reg a1 = offset(c->vert[1], i);
468       struct brw_reg a2 = offset(c->vert[2], i);
469       GLushort pc, pc_persp, pc_linear;
470       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
471 
472       if (pc_persp)
473       {
474 	 set_predicate_control_flag_value(p, c, pc_persp);
475 	 brw_MUL(p, a0, a0, c->inv_w[0]);
476 	 brw_MUL(p, a1, a1, c->inv_w[1]);
477 	 brw_MUL(p, a2, a2, c->inv_w[2]);
478       }
479 
480 
481       /* Calculate coefficients for interpolated values:
482        */
483       if (pc_linear)
484       {
485 	 set_predicate_control_flag_value(p, c, pc_linear);
486 
487 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
488 	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
489 
490 	 /* calculate dA/dx
491 	  */
492 	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
493 	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
494 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
495 
496 	 /* calculate dA/dy
497 	  */
498 	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
499 	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
500 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
501       }
502 
503       {
504 	 set_predicate_control_flag_value(p, c, pc);
505 	 /* start point for interpolation
506 	  */
507 	 brw_MOV(p, c->m3C0, a0);
508 
509 	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
510 	  * the send instruction:
511 	  */
512 	 brw_urb_WRITE(p,
513 		       brw_null_reg(),
514 		       0,
515 		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
516                        last ? BRW_URB_WRITE_EOT_COMPLETE
517                        : BRW_URB_WRITE_NO_FLAGS,
518 		       4, 	/* msg len */
519 		       0,	/* response len */
520 		       i*4,	/* offset */
521 		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
522       }
523    }
524 
525    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
526 }
527 
528 
529 
brw_emit_line_setup(struct brw_sf_compile * c,bool allocate)530 static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
531 {
532    struct brw_codegen *p = &c->func;
533    GLuint i;
534 
535    c->flag_value = 0xff;
536    c->nr_verts = 2;
537 
538    if (allocate)
539       alloc_regs(c);
540 
541    invert_det(c);
542    copy_z_inv_w(c);
543 
544    if (c->key.contains_flat_varying)
545       do_flatshade_line(c);
546 
547    for (i = 0; i < c->nr_setup_regs; i++)
548    {
549       /* Pair of incoming attributes:
550        */
551       struct brw_reg a0 = offset(c->vert[0], i);
552       struct brw_reg a1 = offset(c->vert[1], i);
553       GLushort pc, pc_persp, pc_linear;
554       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
555 
556       if (pc_persp)
557       {
558 	 set_predicate_control_flag_value(p, c, pc_persp);
559 	 brw_MUL(p, a0, a0, c->inv_w[0]);
560 	 brw_MUL(p, a1, a1, c->inv_w[1]);
561       }
562 
563       /* Calculate coefficients for position, color:
564        */
565       if (pc_linear) {
566 	 set_predicate_control_flag_value(p, c, pc_linear);
567 
568 	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
569 
570 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
571 	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
572 
573 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
574 	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
575       }
576 
577       {
578 	 set_predicate_control_flag_value(p, c, pc);
579 
580 	 /* start point for interpolation
581 	  */
582 	 brw_MOV(p, c->m3C0, a0);
583 
584 	 /* Copy m0..m3 to URB.
585 	  */
586 	 brw_urb_WRITE(p,
587 		       brw_null_reg(),
588 		       0,
589 		       brw_vec8_grf(0, 0),
590                        last ? BRW_URB_WRITE_EOT_COMPLETE
591                        : BRW_URB_WRITE_NO_FLAGS,
592 		       4, 	/* msg len */
593 		       0,	/* response len */
594 		       i*4,	/* urb destination offset */
595 		       BRW_URB_SWIZZLE_TRANSPOSE);
596       }
597    }
598 
599    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
600 }
601 
brw_emit_point_sprite_setup(struct brw_sf_compile * c,bool allocate)602 static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
603 {
604    struct brw_codegen *p = &c->func;
605    GLuint i;
606 
607    c->flag_value = 0xff;
608    c->nr_verts = 1;
609 
610    if (allocate)
611       alloc_regs(c);
612 
613    copy_z_inv_w(c);
614    for (i = 0; i < c->nr_setup_regs; i++)
615    {
616       struct brw_reg a0 = offset(c->vert[0], i);
617       GLushort pc, pc_persp, pc_linear, pc_coord_replace;
618       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
619 
620       pc_coord_replace = calculate_point_sprite_mask(c, i);
621       pc_persp &= ~pc_coord_replace;
622 
623       if (pc_persp) {
624 	 set_predicate_control_flag_value(p, c, pc_persp);
625 	 brw_MUL(p, a0, a0, c->inv_w[0]);
626       }
627 
628       /* Point sprite coordinate replacement: A texcoord with this
629        * enabled gets replaced with the value (x, y, 0, 1) where x and
630        * y vary from 0 to 1 across the horizontal and vertical of the
631        * point.
632        */
633       if (pc_coord_replace) {
634 	 set_predicate_control_flag_value(p, c, pc_coord_replace);
635 	 /* Caculate 1.0/PointWidth */
636 	 gfx4_math(&c->func,
637 		   c->tmp,
638 		   BRW_MATH_FUNCTION_INV,
639 		   0,
640 		   c->dx0,
641 		   BRW_MATH_PRECISION_FULL);
642 
643 	 brw_set_default_access_mode(p, BRW_ALIGN_16);
644 
645 	 /* dA/dx, dA/dy */
646 	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
647 	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
648 	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
649 	 if (c->key.sprite_origin_lower_left) {
650 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
651 	 } else {
652 	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
653 	 }
654 
655 	 /* attribute constant offset */
656 	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
657 	 if (c->key.sprite_origin_lower_left) {
658 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
659 	 } else {
660 	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
661 	 }
662 
663 	 brw_set_default_access_mode(p, BRW_ALIGN_1);
664       }
665 
666       if (pc & ~pc_coord_replace) {
667 	 set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
668 	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
669 	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
670 	 brw_MOV(p, c->m3C0, a0); /* constant value */
671       }
672 
673 
674       set_predicate_control_flag_value(p, c, pc);
675       /* Copy m0..m3 to URB. */
676       brw_urb_WRITE(p,
677 		    brw_null_reg(),
678 		    0,
679 		    brw_vec8_grf(0, 0),
680                     last ? BRW_URB_WRITE_EOT_COMPLETE
681                     : BRW_URB_WRITE_NO_FLAGS,
682 		    4, 	/* msg len */
683 		    0,	/* response len */
684 		    i*4,	/* urb destination offset */
685 		    BRW_URB_SWIZZLE_TRANSPOSE);
686    }
687 
688    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
689 }
690 
691 /* Points setup - several simplifications as all attributes are
692  * constant across the face of the point (point sprites excluded!)
693  */
brw_emit_point_setup(struct brw_sf_compile * c,bool allocate)694 static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
695 {
696    struct brw_codegen *p = &c->func;
697    GLuint i;
698 
699    c->flag_value = 0xff;
700    c->nr_verts = 1;
701 
702    if (allocate)
703       alloc_regs(c);
704 
705    copy_z_inv_w(c);
706 
707    brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
708    brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
709 
710    for (i = 0; i < c->nr_setup_regs; i++)
711    {
712       struct brw_reg a0 = offset(c->vert[0], i);
713       GLushort pc, pc_persp, pc_linear;
714       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
715 
716       if (pc_persp)
717       {
718 	 /* This seems odd as the values are all constant, but the
719 	  * fragment shader will be expecting it:
720 	  */
721 	 set_predicate_control_flag_value(p, c, pc_persp);
722 	 brw_MUL(p, a0, a0, c->inv_w[0]);
723       }
724 
725 
726       /* The delta values are always zero, just send the starting
727        * coordinate.  Again, this is to fit in with the interpolation
728        * code in the fragment shader.
729        */
730       {
731 	 set_predicate_control_flag_value(p, c, pc);
732 
733 	 brw_MOV(p, c->m3C0, a0); /* constant value */
734 
735 	 /* Copy m0..m3 to URB.
736 	  */
737 	 brw_urb_WRITE(p,
738 		       brw_null_reg(),
739 		       0,
740 		       brw_vec8_grf(0, 0),
741                        last ? BRW_URB_WRITE_EOT_COMPLETE
742                        : BRW_URB_WRITE_NO_FLAGS,
743 		       4, 	/* msg len */
744 		       0,	/* response len */
745 		       i*4,	/* urb destination offset */
746 		       BRW_URB_SWIZZLE_TRANSPOSE);
747       }
748    }
749 
750    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
751 }
752 
brw_emit_anyprim_setup(struct brw_sf_compile * c)753 static void brw_emit_anyprim_setup( struct brw_sf_compile *c )
754 {
755    struct brw_codegen *p = &c->func;
756    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
757    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
758    struct brw_reg primmask;
759    int jmp;
760    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
761 
762    c->nr_verts = 3;
763    alloc_regs(c);
764 
765    primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
766 
767    brw_MOV(p, primmask, brw_imm_ud(1));
768    brw_SHL(p, primmask, primmask, payload_prim);
769 
770    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
771 					       (1<<_3DPRIM_TRISTRIP) |
772 					       (1<<_3DPRIM_TRIFAN) |
773 					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
774 					       (1<<_3DPRIM_POLYGON) |
775 					       (1<<_3DPRIM_RECTLIST) |
776 					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
777    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
778    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
779    brw_emit_tri_setup(c, false);
780    brw_land_fwd_jump(p, jmp);
781 
782    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
783 					       (1<<_3DPRIM_LINESTRIP) |
784 					       (1<<_3DPRIM_LINELOOP) |
785 					       (1<<_3DPRIM_LINESTRIP_CONT) |
786 					       (1<<_3DPRIM_LINESTRIP_BF) |
787 					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
788    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
789    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
790    brw_emit_line_setup(c, false);
791    brw_land_fwd_jump(p, jmp);
792 
793    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
794    brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
795    jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
796    brw_emit_point_sprite_setup(c, false);
797    brw_land_fwd_jump(p, jmp);
798 
799    brw_emit_point_setup( c, false );
800 }
801 
802 const unsigned *
brw_compile_sf(const struct brw_compiler * compiler,void * mem_ctx,const struct brw_sf_prog_key * key,struct brw_sf_prog_data * prog_data,struct brw_vue_map * vue_map,unsigned * final_assembly_size)803 brw_compile_sf(const struct brw_compiler *compiler,
804                void *mem_ctx,
805                const struct brw_sf_prog_key *key,
806                struct brw_sf_prog_data *prog_data,
807                struct brw_vue_map *vue_map,
808                unsigned *final_assembly_size)
809 {
810    struct brw_sf_compile c;
811    memset(&c, 0, sizeof(c));
812 
813    /* Begin the compilation:
814     */
815    brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);
816 
817    c.key = *key;
818    c.vue_map = *vue_map;
819    if (c.key.do_point_coord) {
820       /*
821        * gl_PointCoord is a FS instead of VS builtin variable, thus it's
822        * not included in c.vue_map generated in VS stage. Here we add
823        * it manually to let SF shader generate the needed interpolation
824        * coefficient for FS shader.
825        */
826       c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
827       c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
828    }
829    c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
830    c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
831    c.nr_setup_regs = c.nr_attr_regs;
832 
833    c.prog_data.urb_read_length = c.nr_attr_regs;
834    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
835 
836    /* Which primitive?  Or all three?
837     */
838    switch (key->primitive) {
839    case BRW_SF_PRIM_TRIANGLES:
840       c.nr_verts = 3;
841       brw_emit_tri_setup( &c, true );
842       break;
843    case BRW_SF_PRIM_LINES:
844       c.nr_verts = 2;
845       brw_emit_line_setup( &c, true );
846       break;
847    case BRW_SF_PRIM_POINTS:
848       c.nr_verts = 1;
849       if (key->do_point_sprite)
850 	  brw_emit_point_sprite_setup( &c, true );
851       else
852 	  brw_emit_point_setup( &c, true );
853       break;
854    case BRW_SF_PRIM_UNFILLED_TRIS:
855       c.nr_verts = 3;
856       brw_emit_anyprim_setup( &c );
857       break;
858    default:
859       unreachable("not reached");
860    }
861 
862    /* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register
863     * source). Compacting would be difficult.
864     */
865    /* brw_compact_instructions(&c.func, 0, 0, NULL); */
866 
867    *prog_data = c.prog_data;
868 
869    const unsigned *program = brw_get_program(&c.func, final_assembly_size);
870 
871    if (INTEL_DEBUG(DEBUG_SF)) {
872       fprintf(stderr, "sf:\n");
873       brw_disassemble_with_labels(compiler->devinfo,
874                                   program, 0, *final_assembly_size, stderr);
875       fprintf(stderr, "\n");
876    }
877 
878    return program;
879 }
880