1 /*
2  * Copyright © 2015 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * Implements most of the fixed function fragment pipeline in shader code.
26  *
27  * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
28  * or color mask.  Instead, you read the current contents of the destination
29  * from the tile buffer after having waited for the scoreboard (which is
30  * handled by vc4_qpu_emit.c), then do math using your output color and that
31  * destination value, and update the output color appropriately.
32  *
33  * Once this pass is done, the color write will either have one component (for
34  * single sample) with packed argb8888, or 4 components with the per-sample
35  * argb8888 result.
36  */
37 
38 /**
39  * Lowers fixed-function blending to a load of the destination color and a
40  * series of ALU operations before the store of the output.
41  */
42 #include "util/format/u_format.h"
43 #include "vc4_qir.h"
44 #include "compiler/nir/nir_builder.h"
45 #include "compiler/nir/nir_format_convert.h"
46 #include "vc4_context.h"
47 
48 static bool
blend_depends_on_dst_color(struct vc4_compile * c)49 blend_depends_on_dst_color(struct vc4_compile *c)
50 {
51         return (c->fs_key->blend.blend_enable ||
52                 c->fs_key->blend.colormask != 0xf ||
53                 c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
54 }
55 
56 /** Emits a load of the previous fragment color from the tile buffer. */
57 static nir_ssa_def *
vc4_nir_get_dst_color(nir_builder * b,int sample)58 vc4_nir_get_dst_color(nir_builder *b, int sample)
59 {
60         return nir_load_input(b, 1, 32, nir_imm_int(b, 0),
61                               .base = VC4_NIR_TLB_COLOR_READ_INPUT + sample);
62 }
63 
64 static nir_ssa_def *
vc4_blend_channel_f(nir_builder * b,nir_ssa_def ** src,nir_ssa_def ** dst,unsigned factor,int channel)65 vc4_blend_channel_f(nir_builder *b,
66                     nir_ssa_def **src,
67                     nir_ssa_def **dst,
68                     unsigned factor,
69                     int channel)
70 {
71         switch(factor) {
72         case PIPE_BLENDFACTOR_ONE:
73                 return nir_imm_float(b, 1.0);
74         case PIPE_BLENDFACTOR_SRC_COLOR:
75                 return src[channel];
76         case PIPE_BLENDFACTOR_SRC_ALPHA:
77                 return src[3];
78         case PIPE_BLENDFACTOR_DST_ALPHA:
79                 return dst[3];
80         case PIPE_BLENDFACTOR_DST_COLOR:
81                 return dst[channel];
82         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
83                 if (channel != 3) {
84                         return nir_fmin(b,
85                                         src[3],
86                                         nir_fsub(b,
87                                                  nir_imm_float(b, 1.0),
88                                                  dst[3]));
89                 } else {
90                         return nir_imm_float(b, 1.0);
91                 }
92         case PIPE_BLENDFACTOR_CONST_COLOR:
93                 return nir_load_system_value(b,
94                                              nir_intrinsic_load_blend_const_color_r_float +
95                                              channel,
96                                              0, 1, 32);
97         case PIPE_BLENDFACTOR_CONST_ALPHA:
98                 return nir_load_blend_const_color_a_float(b);
99         case PIPE_BLENDFACTOR_ZERO:
100                 return nir_imm_float(b, 0.0);
101         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
102                 return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
103         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
104                 return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
105         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
106                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
107         case PIPE_BLENDFACTOR_INV_DST_COLOR:
108                 return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
109         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
110                 return nir_fsub(b, nir_imm_float(b, 1.0),
111                                 nir_load_system_value(b,
112                                                       nir_intrinsic_load_blend_const_color_r_float +
113                                                       channel,
114                                                       0, 1, 32));
115         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
116                 return nir_fsub(b, nir_imm_float(b, 1.0),
117                                 nir_load_blend_const_color_a_float(b));
118 
119         default:
120         case PIPE_BLENDFACTOR_SRC1_COLOR:
121         case PIPE_BLENDFACTOR_SRC1_ALPHA:
122         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
123         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
124                 /* Unsupported. */
125                 fprintf(stderr, "Unknown blend factor %d\n", factor);
126                 return nir_imm_float(b, 1.0);
127         }
128 }
129 
130 static nir_ssa_def *
vc4_nir_set_packed_chan(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1,int chan)131 vc4_nir_set_packed_chan(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1,
132                         int chan)
133 {
134         unsigned chan_mask = 0xff << (chan * 8);
135         return nir_ior(b,
136                        nir_iand(b, src0, nir_imm_int(b, ~chan_mask)),
137                        nir_iand(b, src1, nir_imm_int(b, chan_mask)));
138 }
139 
140 static nir_ssa_def *
vc4_blend_channel_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,nir_ssa_def * src_a,nir_ssa_def * dst_a,unsigned factor,int a_chan)141 vc4_blend_channel_i(nir_builder *b,
142                     nir_ssa_def *src,
143                     nir_ssa_def *dst,
144                     nir_ssa_def *src_a,
145                     nir_ssa_def *dst_a,
146                     unsigned factor,
147                     int a_chan)
148 {
149         switch (factor) {
150         case PIPE_BLENDFACTOR_ONE:
151                 return nir_imm_int(b, ~0);
152         case PIPE_BLENDFACTOR_SRC_COLOR:
153                 return src;
154         case PIPE_BLENDFACTOR_SRC_ALPHA:
155                 return src_a;
156         case PIPE_BLENDFACTOR_DST_ALPHA:
157                 return dst_a;
158         case PIPE_BLENDFACTOR_DST_COLOR:
159                 return dst;
160         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
161                 return vc4_nir_set_packed_chan(b,
162                                                nir_umin_4x8_vc4(b,
163                                                             src_a,
164                                                             nir_inot(b, dst_a)),
165                                                nir_imm_int(b, ~0),
166                                                a_chan);
167         case PIPE_BLENDFACTOR_CONST_COLOR:
168                 return nir_load_blend_const_color_rgba8888_unorm(b);
169         case PIPE_BLENDFACTOR_CONST_ALPHA:
170                 return nir_load_blend_const_color_aaaa8888_unorm(b);
171         case PIPE_BLENDFACTOR_ZERO:
172                 return nir_imm_int(b, 0);
173         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
174                 return nir_inot(b, src);
175         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
176                 return nir_inot(b, src_a);
177         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
178                 return nir_inot(b, dst_a);
179         case PIPE_BLENDFACTOR_INV_DST_COLOR:
180                 return nir_inot(b, dst);
181         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
182                 return nir_inot(b,
183                                 nir_load_blend_const_color_rgba8888_unorm(b));
184         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
185                 return nir_inot(b,
186                                 nir_load_blend_const_color_aaaa8888_unorm(b));
187 
188         default:
189         case PIPE_BLENDFACTOR_SRC1_COLOR:
190         case PIPE_BLENDFACTOR_SRC1_ALPHA:
191         case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
192         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
193                 /* Unsupported. */
194                 fprintf(stderr, "Unknown blend factor %d\n", factor);
195                 return nir_imm_int(b, ~0);
196         }
197 }
198 
199 static nir_ssa_def *
vc4_blend_func_f(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)200 vc4_blend_func_f(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
201                  unsigned func)
202 {
203         switch (func) {
204         case PIPE_BLEND_ADD:
205                 return nir_fadd(b, src, dst);
206         case PIPE_BLEND_SUBTRACT:
207                 return nir_fsub(b, src, dst);
208         case PIPE_BLEND_REVERSE_SUBTRACT:
209                 return nir_fsub(b, dst, src);
210         case PIPE_BLEND_MIN:
211                 return nir_fmin(b, src, dst);
212         case PIPE_BLEND_MAX:
213                 return nir_fmax(b, src, dst);
214 
215         default:
216                 /* Unsupported. */
217                 fprintf(stderr, "Unknown blend func %d\n", func);
218                 return src;
219 
220         }
221 }
222 
223 static nir_ssa_def *
vc4_blend_func_i(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned func)224 vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
225                  unsigned func)
226 {
227         switch (func) {
228         case PIPE_BLEND_ADD:
229                 return nir_usadd_4x8_vc4(b, src, dst);
230         case PIPE_BLEND_SUBTRACT:
231                 return nir_ussub_4x8_vc4(b, src, dst);
232         case PIPE_BLEND_REVERSE_SUBTRACT:
233                 return nir_ussub_4x8_vc4(b, dst, src);
234         case PIPE_BLEND_MIN:
235                 return nir_umin_4x8_vc4(b, src, dst);
236         case PIPE_BLEND_MAX:
237                 return nir_umax_4x8_vc4(b, src, dst);
238 
239         default:
240                 /* Unsupported. */
241                 fprintf(stderr, "Unknown blend func %d\n", func);
242                 return src;
243 
244         }
245 }
246 
247 static void
vc4_do_blending_f(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** result,nir_ssa_def ** src_color,nir_ssa_def ** dst_color)248 vc4_do_blending_f(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
249                   nir_ssa_def **src_color, nir_ssa_def **dst_color)
250 {
251         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
252 
253         if (!blend->blend_enable) {
254                 for (int i = 0; i < 4; i++)
255                         result[i] = src_color[i];
256                 return;
257         }
258 
259         /* Clamp the src color to [0, 1].  Dest is already clamped. */
260         for (int i = 0; i < 4; i++)
261                 src_color[i] = nir_fsat(b, src_color[i]);
262 
263         nir_ssa_def *src_blend[4], *dst_blend[4];
264         for (int i = 0; i < 4; i++) {
265                 int src_factor = ((i != 3) ? blend->rgb_src_factor :
266                                   blend->alpha_src_factor);
267                 int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
268                                   blend->alpha_dst_factor);
269                 src_blend[i] = nir_fmul(b, src_color[i],
270                                         vc4_blend_channel_f(b,
271                                                             src_color, dst_color,
272                                                             src_factor, i));
273                 dst_blend[i] = nir_fmul(b, dst_color[i],
274                                         vc4_blend_channel_f(b,
275                                                             src_color, dst_color,
276                                                             dst_factor, i));
277         }
278 
279         for (int i = 0; i < 4; i++) {
280                 result[i] = vc4_blend_func_f(b, src_blend[i], dst_blend[i],
281                                              ((i != 3) ? blend->rgb_func :
282                                               blend->alpha_func));
283         }
284 }
285 
286 static nir_ssa_def *
vc4_nir_splat(nir_builder * b,nir_ssa_def * src)287 vc4_nir_splat(nir_builder *b, nir_ssa_def *src)
288 {
289         nir_ssa_def *or1 = nir_ior(b, src, nir_ishl(b, src, nir_imm_int(b, 8)));
290         return nir_ior(b, or1, nir_ishl(b, or1, nir_imm_int(b, 16)));
291 }
292 
293 static nir_ssa_def *
vc4_do_blending_i(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src_color,nir_ssa_def * dst_color,nir_ssa_def * src_float_a)294 vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
295                   nir_ssa_def *src_color, nir_ssa_def *dst_color,
296                   nir_ssa_def *src_float_a)
297 {
298         struct pipe_rt_blend_state *blend = &c->fs_key->blend;
299 
300         if (!blend->blend_enable)
301                 return src_color;
302 
303         enum pipe_format color_format = c->fs_key->color_format;
304         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
305         nir_ssa_def *imm_0xff = nir_imm_int(b, 0xff);
306         nir_ssa_def *src_a = nir_pack_unorm_4x8(b, src_float_a);
307         nir_ssa_def *dst_a;
308         int alpha_chan;
309         for (alpha_chan = 0; alpha_chan < 4; alpha_chan++) {
310                 if (format_swiz[alpha_chan] == 3)
311                         break;
312         }
313         if (alpha_chan != 4) {
314                 nir_ssa_def *shift = nir_imm_int(b, alpha_chan * 8);
315                 dst_a = vc4_nir_splat(b, nir_iand(b, nir_ushr(b, dst_color,
316                                                               shift), imm_0xff));
317         } else {
318                 dst_a = nir_imm_int(b, ~0);
319         }
320 
321         nir_ssa_def *src_factor = vc4_blend_channel_i(b,
322                                                       src_color, dst_color,
323                                                       src_a, dst_a,
324                                                       blend->rgb_src_factor,
325                                                       alpha_chan);
326         nir_ssa_def *dst_factor = vc4_blend_channel_i(b,
327                                                       src_color, dst_color,
328                                                       src_a, dst_a,
329                                                       blend->rgb_dst_factor,
330                                                       alpha_chan);
331 
332         if (alpha_chan != 4 &&
333             blend->alpha_src_factor != blend->rgb_src_factor) {
334                 nir_ssa_def *src_alpha_factor =
335                         vc4_blend_channel_i(b,
336                                             src_color, dst_color,
337                                             src_a, dst_a,
338                                             blend->alpha_src_factor,
339                                             alpha_chan);
340                 src_factor = vc4_nir_set_packed_chan(b, src_factor,
341                                                      src_alpha_factor,
342                                                      alpha_chan);
343         }
344         if (alpha_chan != 4 &&
345             blend->alpha_dst_factor != blend->rgb_dst_factor) {
346                 nir_ssa_def *dst_alpha_factor =
347                         vc4_blend_channel_i(b,
348                                             src_color, dst_color,
349                                             src_a, dst_a,
350                                             blend->alpha_dst_factor,
351                                             alpha_chan);
352                 dst_factor = vc4_nir_set_packed_chan(b, dst_factor,
353                                                      dst_alpha_factor,
354                                                      alpha_chan);
355         }
356         nir_ssa_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);
357         nir_ssa_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);
358 
359         nir_ssa_def *result =
360                 vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
361         if (alpha_chan != 4 && blend->alpha_func != blend->rgb_func) {
362                 nir_ssa_def *result_a = vc4_blend_func_i(b,
363                                                          src_blend,
364                                                          dst_blend,
365                                                          blend->alpha_func);
366                 result = vc4_nir_set_packed_chan(b, result, result_a,
367                                                  alpha_chan);
368         }
369         return result;
370 }
371 
372 static nir_ssa_def *
vc4_logicop(nir_builder * b,int logicop_func,nir_ssa_def * src,nir_ssa_def * dst)373 vc4_logicop(nir_builder *b, int logicop_func,
374             nir_ssa_def *src, nir_ssa_def *dst)
375 {
376         switch (logicop_func) {
377         case PIPE_LOGICOP_CLEAR:
378                 return nir_imm_int(b, 0);
379         case PIPE_LOGICOP_NOR:
380                 return nir_inot(b, nir_ior(b, src, dst));
381         case PIPE_LOGICOP_AND_INVERTED:
382                 return nir_iand(b, nir_inot(b, src), dst);
383         case PIPE_LOGICOP_COPY_INVERTED:
384                 return nir_inot(b, src);
385         case PIPE_LOGICOP_AND_REVERSE:
386                 return nir_iand(b, src, nir_inot(b, dst));
387         case PIPE_LOGICOP_INVERT:
388                 return nir_inot(b, dst);
389         case PIPE_LOGICOP_XOR:
390                 return nir_ixor(b, src, dst);
391         case PIPE_LOGICOP_NAND:
392                 return nir_inot(b, nir_iand(b, src, dst));
393         case PIPE_LOGICOP_AND:
394                 return nir_iand(b, src, dst);
395         case PIPE_LOGICOP_EQUIV:
396                 return nir_inot(b, nir_ixor(b, src, dst));
397         case PIPE_LOGICOP_NOOP:
398                 return dst;
399         case PIPE_LOGICOP_OR_INVERTED:
400                 return nir_ior(b, nir_inot(b, src), dst);
401         case PIPE_LOGICOP_OR_REVERSE:
402                 return nir_ior(b, src, nir_inot(b, dst));
403         case PIPE_LOGICOP_OR:
404                 return nir_ior(b, src, dst);
405         case PIPE_LOGICOP_SET:
406                 return nir_imm_int(b, ~0);
407         default:
408                 fprintf(stderr, "Unknown logic op %d\n", logicop_func);
409                 FALLTHROUGH;
410         case PIPE_LOGICOP_COPY:
411                 return src;
412         }
413 }
414 
415 static nir_ssa_def *
vc4_nir_swizzle_and_pack(struct vc4_compile * c,nir_builder * b,nir_ssa_def ** colors)416 vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
417                          nir_ssa_def **colors)
418 {
419         enum pipe_format color_format = c->fs_key->color_format;
420         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
421 
422         nir_ssa_def *swizzled[4];
423         for (int i = 0; i < 4; i++) {
424                 swizzled[i] = vc4_nir_get_swizzled_channel(b, colors,
425                                                            format_swiz[i]);
426         }
427 
428         return nir_pack_unorm_4x8(b,
429                                   nir_vec4(b,
430                                            swizzled[0], swizzled[1],
431                                            swizzled[2], swizzled[3]));
432 
433 }
434 
435 static nir_ssa_def *
vc4_nir_blend_pipeline(struct vc4_compile * c,nir_builder * b,nir_ssa_def * src,int sample)436 vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
437                        int sample)
438 {
439         enum pipe_format color_format = c->fs_key->color_format;
440         const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
441         bool srgb = util_format_is_srgb(color_format);
442 
443         /* Pull out the float src/dst color components. */
444         nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
445         nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
446         nir_ssa_def *src_color[4], *unpacked_dst_color[4];
447         for (unsigned i = 0; i < 4; i++) {
448                 src_color[i] = nir_channel(b, src, i);
449                 unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
450         }
451 
452         if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
453                 src_color[3] = nir_imm_float(b, 1.0);
454 
455         nir_ssa_def *packed_color;
456         if (srgb) {
457                 /* Unswizzle the destination color. */
458                 nir_ssa_def *dst_color[4];
459                 for (unsigned i = 0; i < 4; i++) {
460                         dst_color[i] = vc4_nir_get_swizzled_channel(b,
461                                                                     unpacked_dst_color,
462                                                                     format_swiz[i]);
463                 }
464 
465                 /* Turn dst color to linear. */
466                 for (int i = 0; i < 3; i++)
467                         dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]);
468 
469                 nir_ssa_def *blend_color[4];
470                 vc4_do_blending_f(c, b, blend_color, src_color, dst_color);
471 
472                 /* sRGB encode the output color */
473                 for (int i = 0; i < 3; i++)
474                         blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]);
475 
476                 packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color);
477         } else {
478                 nir_ssa_def *packed_src_color =
479                         vc4_nir_swizzle_and_pack(c, b, src_color);
480 
481                 packed_color =
482                         vc4_do_blending_i(c, b,
483                                           packed_src_color, packed_dst_color,
484                                           src_color[3]);
485         }
486 
487         packed_color = vc4_logicop(b, c->fs_key->logicop_func,
488                                    packed_color, packed_dst_color);
489 
490         /* If the bit isn't set in the color mask, then just return the
491          * original dst color, instead.
492          */
493         uint32_t colormask = 0xffffffff;
494         for (int i = 0; i < 4; i++) {
495                 if (format_swiz[i] < 4 &&
496                     !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
497                         colormask &= ~(0xff << (i * 8));
498                 }
499         }
500 
501         return nir_ior(b,
502                        nir_iand(b, packed_color,
503                                 nir_imm_int(b, colormask)),
504                        nir_iand(b, packed_dst_color,
505                                 nir_imm_int(b, ~colormask)));
506 }
507 
508 static void
vc4_nir_store_sample_mask(struct vc4_compile * c,nir_builder * b,nir_ssa_def * val)509 vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
510                           nir_ssa_def *val)
511 {
512         nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
513                                                         glsl_uint_type(),
514                                                         "sample_mask");
515         sample_mask->data.driver_location = c->s->num_outputs++;
516         sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
517 
518         nir_store_output(b, val, nir_imm_int(b, 0),
519                          .base = sample_mask->data.driver_location);
520 }
521 
522 static void
vc4_nir_lower_blend_instr(struct vc4_compile * c,nir_builder * b,nir_intrinsic_instr * intr)523 vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
524                           nir_intrinsic_instr *intr)
525 {
526         nir_ssa_def *frag_color = intr->src[0].ssa;
527 
528         if (c->fs_key->sample_alpha_to_coverage) {
529                 nir_ssa_def *a = nir_channel(b, frag_color, 3);
530 
531                 /* XXX: We should do a nice dither based on the fragment
532                  * coordinate, instead.
533                  */
534                 nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
535                 nir_ssa_def *num_bits = nir_f2i32(b, nir_fmul(b, a, num_samples));
536                 nir_ssa_def *bitmask = nir_isub(b,
537                                                 nir_ishl(b,
538                                                          nir_imm_int(b, 1),
539                                                          num_bits),
540                                                 nir_imm_int(b, 1));
541                 vc4_nir_store_sample_mask(c, b, bitmask);
542         }
543 
544         /* The TLB color read returns each sample in turn, so if our blending
545          * depends on the destination color, we're going to have to run the
546          * blending function separately for each destination sample value, and
547          * then output the per-sample color using TLB_COLOR_MS.
548          */
549         nir_ssa_def *blend_output;
550         if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
551                 c->msaa_per_sample_output = true;
552 
553                 nir_ssa_def *samples[4];
554                 for (int i = 0; i < VC4_MAX_SAMPLES; i++)
555                         samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
556                 blend_output = nir_vec4(b,
557                                         samples[0], samples[1],
558                                         samples[2], samples[3]);
559         } else {
560                 blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
561         }
562 
563         nir_instr_rewrite_src(&intr->instr, &intr->src[0],
564                               nir_src_for_ssa(blend_output));
565         intr->num_components = blend_output->num_components;
566 }
567 
568 static bool
vc4_nir_lower_blend_block(nir_block * block,struct vc4_compile * c)569 vc4_nir_lower_blend_block(nir_block *block, struct vc4_compile *c)
570 {
571         nir_foreach_instr_safe(instr, block) {
572                 if (instr->type != nir_instr_type_intrinsic)
573                         continue;
574                 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
575                 if (intr->intrinsic != nir_intrinsic_store_output)
576                         continue;
577 
578                 nir_variable *output_var = NULL;
579                 nir_foreach_shader_out_variable(var, c->s) {
580                         if (var->data.driver_location ==
581                             nir_intrinsic_base(intr)) {
582                                 output_var = var;
583                                 break;
584                         }
585                 }
586                 assert(output_var);
587 
588                 if (output_var->data.location != FRAG_RESULT_COLOR &&
589                     output_var->data.location != FRAG_RESULT_DATA0) {
590                         continue;
591                 }
592 
593                 nir_function_impl *impl =
594                         nir_cf_node_get_function(&block->cf_node);
595                 nir_builder b;
596                 nir_builder_init(&b, impl);
597                 b.cursor = nir_before_instr(&intr->instr);
598                 vc4_nir_lower_blend_instr(c, &b, intr);
599         }
600         return true;
601 }
602 
603 void
vc4_nir_lower_blend(nir_shader * s,struct vc4_compile * c)604 vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c)
605 {
606         nir_foreach_function(function, s) {
607                 if (function->impl) {
608                         nir_foreach_block(block, function->impl) {
609                                 vc4_nir_lower_blend_block(block, c);
610                         }
611 
612                         nir_metadata_preserve(function->impl,
613                                               nir_metadata_block_index |
614                                               nir_metadata_dominance);
615                 }
616         }
617 
618         /* If we didn't do alpha-to-coverage on the output color, we still
619          * need to pass glSampleMask() through.
620          */
621         if (c->fs_key->sample_coverage && !c->fs_key->sample_alpha_to_coverage) {
622                 nir_function_impl *impl = nir_shader_get_entrypoint(s);
623                 nir_builder b;
624                 nir_builder_init(&b, impl);
625                 b.cursor = nir_after_block(nir_impl_last_block(impl));
626 
627                 vc4_nir_store_sample_mask(c, &b, nir_load_sample_mask_in(&b));
628         }
629 }
630