1 /*
2 * Copyright (C) 2019-2021 Collabora, Ltd.
3 * Copyright (C) 2019 Alyssa Rosenzweig
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 /**
26 * @file
27 *
28 * Implements the fragment pipeline (blending and writeout) in software, to be
29 * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
30 * shader variant on typical GPUs. This pass is useful if hardware lacks
31 * fixed-function blending in part or in full.
32 */
33
34 #include "compiler/nir/nir.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_format_convert.h"
37 #include "nir_lower_blend.h"
38
39 /* Given processed factors, combine them per a blend function */
40
41 static nir_ssa_def *
nir_blend_func(nir_builder * b,enum blend_func func,nir_ssa_def * src,nir_ssa_def * dst)42 nir_blend_func(
43 nir_builder *b,
44 enum blend_func func,
45 nir_ssa_def *src, nir_ssa_def *dst)
46 {
47 switch (func) {
48 case BLEND_FUNC_ADD:
49 return nir_fadd(b, src, dst);
50 case BLEND_FUNC_SUBTRACT:
51 return nir_fsub(b, src, dst);
52 case BLEND_FUNC_REVERSE_SUBTRACT:
53 return nir_fsub(b, dst, src);
54 case BLEND_FUNC_MIN:
55 return nir_fmin(b, src, dst);
56 case BLEND_FUNC_MAX:
57 return nir_fmax(b, src, dst);
58 }
59
60 unreachable("Invalid blend function");
61 }
62
63 /* Does this blend function multiply by a blend factor? */
64
65 static bool
nir_blend_factored(enum blend_func func)66 nir_blend_factored(enum blend_func func)
67 {
68 switch (func) {
69 case BLEND_FUNC_ADD:
70 case BLEND_FUNC_SUBTRACT:
71 case BLEND_FUNC_REVERSE_SUBTRACT:
72 return true;
73 default:
74 return false;
75 }
76 }
77
78 /* Compute a src_alpha_saturate factor */
79 static nir_ssa_def *
nir_alpha_saturate(nir_builder * b,nir_ssa_def * src,nir_ssa_def * dst,unsigned chan)80 nir_alpha_saturate(
81 nir_builder *b,
82 nir_ssa_def *src, nir_ssa_def *dst,
83 unsigned chan)
84 {
85 nir_ssa_def *Asrc = nir_channel(b, src, 3);
86 nir_ssa_def *Adst = nir_channel(b, dst, 3);
87 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
88 nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
89
90 return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
91 }
92
93 /* Returns a scalar single factor, unmultiplied */
94
95 static nir_ssa_def *
nir_blend_factor_value(nir_builder * b,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor)96 nir_blend_factor_value(
97 nir_builder *b,
98 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
99 unsigned chan,
100 enum blend_factor factor)
101 {
102 switch (factor) {
103 case BLEND_FACTOR_ZERO:
104 return nir_imm_floatN_t(b, 0.0, src->bit_size);
105 case BLEND_FACTOR_SRC_COLOR:
106 return nir_channel(b, src, chan);
107 case BLEND_FACTOR_SRC1_COLOR:
108 return nir_channel(b, src1, chan);
109 case BLEND_FACTOR_DST_COLOR:
110 return nir_channel(b, dst, chan);
111 case BLEND_FACTOR_SRC_ALPHA:
112 return nir_channel(b, src, 3);
113 case BLEND_FACTOR_SRC1_ALPHA:
114 return nir_channel(b, src1, 3);
115 case BLEND_FACTOR_DST_ALPHA:
116 return nir_channel(b, dst, 3);
117 case BLEND_FACTOR_CONSTANT_COLOR:
118 return nir_channel(b, bconst, chan);
119 case BLEND_FACTOR_CONSTANT_ALPHA:
120 return nir_channel(b, bconst, 3);
121 case BLEND_FACTOR_SRC_ALPHA_SATURATE:
122 return nir_alpha_saturate(b, src, dst, chan);
123 }
124
125 unreachable("Invalid blend factor");
126 }
127
128 static nir_ssa_def *
nir_blend_factor(nir_builder * b,nir_ssa_def * raw_scalar,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst,nir_ssa_def * bconst,unsigned chan,enum blend_factor factor,bool inverted)129 nir_blend_factor(
130 nir_builder *b,
131 nir_ssa_def *raw_scalar,
132 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
133 unsigned chan,
134 enum blend_factor factor,
135 bool inverted)
136 {
137 nir_ssa_def *f =
138 nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
139
140 if (inverted)
141 f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
142
143 return nir_fmul(b, raw_scalar, f);
144 }
145
146 /* Given a colormask, "blend" with the destination */
147
148 static nir_ssa_def *
nir_color_mask(nir_builder * b,unsigned mask,nir_ssa_def * src,nir_ssa_def * dst)149 nir_color_mask(
150 nir_builder *b,
151 unsigned mask,
152 nir_ssa_def *src,
153 nir_ssa_def *dst)
154 {
155 return nir_vec4(b,
156 nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
157 nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
158 nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
159 nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
160 }
161
162 static nir_ssa_def *
nir_logicop_func(nir_builder * b,unsigned func,nir_ssa_def * src,nir_ssa_def * dst)163 nir_logicop_func(
164 nir_builder *b,
165 unsigned func,
166 nir_ssa_def *src, nir_ssa_def *dst)
167 {
168 switch (func) {
169 case PIPE_LOGICOP_CLEAR:
170 return nir_imm_ivec4(b, 0, 0, 0, 0);
171 case PIPE_LOGICOP_NOR:
172 return nir_inot(b, nir_ior(b, src, dst));
173 case PIPE_LOGICOP_AND_INVERTED:
174 return nir_iand(b, nir_inot(b, src), dst);
175 case PIPE_LOGICOP_COPY_INVERTED:
176 return nir_inot(b, src);
177 case PIPE_LOGICOP_AND_REVERSE:
178 return nir_iand(b, src, nir_inot(b, dst));
179 case PIPE_LOGICOP_INVERT:
180 return nir_inot(b, dst);
181 case PIPE_LOGICOP_XOR:
182 return nir_ixor(b, src, dst);
183 case PIPE_LOGICOP_NAND:
184 return nir_inot(b, nir_iand(b, src, dst));
185 case PIPE_LOGICOP_AND:
186 return nir_iand(b, src, dst);
187 case PIPE_LOGICOP_EQUIV:
188 return nir_inot(b, nir_ixor(b, src, dst));
189 case PIPE_LOGICOP_NOOP:
190 return dst;
191 case PIPE_LOGICOP_OR_INVERTED:
192 return nir_ior(b, nir_inot(b, src), dst);
193 case PIPE_LOGICOP_COPY:
194 return src;
195 case PIPE_LOGICOP_OR_REVERSE:
196 return nir_ior(b, src, nir_inot(b, dst));
197 case PIPE_LOGICOP_OR:
198 return nir_ior(b, src, dst);
199 case PIPE_LOGICOP_SET:
200 return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
201 }
202
203 unreachable("Invalid logciop function");
204 }
205
206 static nir_ssa_def *
nir_blend_logicop(nir_builder * b,nir_lower_blend_options options,unsigned rt,nir_ssa_def * src,nir_ssa_def * dst)207 nir_blend_logicop(
208 nir_builder *b,
209 nir_lower_blend_options options,
210 unsigned rt,
211 nir_ssa_def *src, nir_ssa_def *dst)
212 {
213 unsigned bit_size = src->bit_size;
214 const struct util_format_description *format_desc =
215 util_format_description(options.format[rt]);
216
217 if (bit_size != 32) {
218 src = nir_f2f32(b, src);
219 dst = nir_f2f32(b, dst);
220 }
221
222 assert(src->num_components <= 4);
223 assert(dst->num_components <= 4);
224
225 unsigned bits[4];
226 for (int i = 0; i < 4; ++i)
227 bits[i] = format_desc->channel[i].size;
228
229 src = nir_format_float_to_unorm(b, src, bits);
230 dst = nir_format_float_to_unorm(b, dst, bits);
231
232 nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
233
234 if (bits[0] < 32) {
235 nir_const_value mask[4];
236 for (int i = 0; i < 4; ++i)
237 mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
238
239 out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
240 }
241
242 out = nir_format_unorm_to_float(b, out, bits);
243
244 if (bit_size == 16)
245 out = nir_f2f16(b, out);
246
247 return out;
248 }
249
250 /* Given a blend state, the source color, and the destination color,
251 * return the blended color
252 */
253
254 static nir_ssa_def *
nir_blend(nir_builder * b,nir_lower_blend_options options,unsigned rt,nir_ssa_def * src,nir_ssa_def * src1,nir_ssa_def * dst)255 nir_blend(
256 nir_builder *b,
257 nir_lower_blend_options options,
258 unsigned rt,
259 nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
260 {
261 /* Grab the blend constant ahead of time */
262 nir_ssa_def *bconst;
263 if (options.scalar_blend_const) {
264 bconst = nir_vec4(b,
265 nir_load_blend_const_color_r_float(b),
266 nir_load_blend_const_color_g_float(b),
267 nir_load_blend_const_color_b_float(b),
268 nir_load_blend_const_color_a_float(b));
269 } else {
270 bconst = nir_load_blend_const_color_rgba(b);
271 }
272
273 if (src->bit_size == 16)
274 bconst = nir_f2f16(b, bconst);
275
276 /* Fixed-point framebuffers require their inputs clamped. */
277 enum pipe_format format = options.format[rt];
278
279 if (!util_format_is_float(format))
280 src = nir_fsat(b, src);
281
282 /* DST_ALPHA reads back 1.0 if there is no alpha channel */
283 const struct util_format_description *desc =
284 util_format_description(format);
285
286 if (desc->nr_channels < 4) {
287 nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
288 nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
289
290 dst = nir_vec4(b, nir_channel(b, dst, 0),
291 desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
292 desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
293 desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
294 }
295
296 /* We blend per channel and recombine later */
297 nir_ssa_def *channels[4];
298
299 for (unsigned c = 0; c < 4; ++c) {
300 /* Decide properties based on channel */
301 nir_lower_blend_channel chan =
302 (c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha;
303
304 nir_ssa_def *psrc = nir_channel(b, src, c);
305 nir_ssa_def *pdst = nir_channel(b, dst, c);
306
307 if (nir_blend_factored(chan.func)) {
308 psrc = nir_blend_factor(
309 b, psrc,
310 src, src1, dst, bconst, c,
311 chan.src_factor, chan.invert_src_factor);
312
313 pdst = nir_blend_factor(
314 b, pdst,
315 src, src1, dst, bconst, c,
316 chan.dst_factor, chan.invert_dst_factor);
317 }
318
319 channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
320 }
321
322 return nir_vec(b, channels, 4);
323 }
324
325 static bool
nir_lower_blend_instr(nir_builder * b,nir_instr * instr,void * data)326 nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
327 {
328 nir_lower_blend_options *options = data;
329 if (instr->type != nir_instr_type_intrinsic)
330 return false;
331
332 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
333 if (intr->intrinsic != nir_intrinsic_store_deref)
334 return false;
335
336 nir_variable *var = nir_intrinsic_get_var(intr, 0);
337 if (var->data.mode != nir_var_shader_out ||
338 (var->data.location != FRAG_RESULT_COLOR &&
339 var->data.location < FRAG_RESULT_DATA0))
340 return false;
341
342 /* Determine render target for per-RT blending */
343 unsigned rt =
344 (var->data.location == FRAG_RESULT_COLOR) ? 0 :
345 (var->data.location - FRAG_RESULT_DATA0);
346
347 /* No blend lowering requested on this RT */
348 if (options->format[rt] == PIPE_FORMAT_NONE)
349 return false;
350
351 b->cursor = nir_before_instr(instr);
352
353 /* Grab the input color */
354 unsigned src_num_comps = nir_src_num_components(intr->src[1]);
355 nir_ssa_def *src =
356 nir_pad_vector(b, nir_ssa_for_src(b, intr->src[1], src_num_comps), 4);
357
358 /* Grab the previous fragment color */
359 var->data.fb_fetch_output = true;
360 b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
361 b->shader->info.fs.uses_fbfetch_output = true;
362 nir_ssa_def *dst = nir_load_var(b, var);
363
364 /* Blend the two colors per the passed options */
365 nir_ssa_def *blended = src;
366
367 if (options->logicop_enable) {
368 blended = nir_blend_logicop(b, *options, rt, src, dst);
369 } else if (!util_format_is_pure_integer(options->format[rt])) {
370 assert(!util_format_is_scaled(options->format[rt]));
371 blended = nir_blend(b, *options, rt, src, options->src1, dst);
372 }
373
374 /* Apply a colormask */
375 blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
376
377 if (src_num_comps != 4)
378 blended = nir_channels(b, blended, BITFIELD_MASK(src_num_comps));
379
380 /* Write out the final color instead of the input */
381 nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended);
382 return true;
383 }
384
385 void
nir_lower_blend(nir_shader * shader,nir_lower_blend_options options)386 nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
387 {
388 assert(shader->info.stage == MESA_SHADER_FRAGMENT);
389
390 nir_shader_instructions_pass(shader, nir_lower_blend_instr,
391 nir_metadata_block_index | nir_metadata_dominance, &options);
392 }
393