1 /*
2  * Copyright 2018 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #define AC_SURFACE_INCLUDE_NIR
26 #include "ac_surface.h"
27 #include "si_pipe.h"
28 
create_nir_cs(struct si_context * sctx,nir_builder * b)29 static void *create_nir_cs(struct si_context *sctx, nir_builder *b)
30 {
31    nir_shader_gather_info(b->shader, nir_shader_get_entrypoint(b->shader));
32 
33    struct pipe_compute_state state = {0};
34    state.ir_type = PIPE_SHADER_IR_NIR;
35    state.prog = b->shader;
36    sctx->b.screen->finalize_nir(sctx->b.screen, (void*)state.prog);
37    return sctx->b.create_compute_state(&sctx->b, &state);
38 }
39 
get_global_ids(nir_builder * b,unsigned num_components)40 static nir_ssa_def *get_global_ids(nir_builder *b, unsigned num_components)
41 {
42    unsigned mask = BITFIELD_MASK(num_components);
43 
44    nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
45    nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
46    nir_ssa_def *block_size = nir_channels(b, nir_load_workgroup_size(b), mask);
47    return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
48 }
49 
unpack_2x16(nir_builder * b,nir_ssa_def * src,nir_ssa_def ** x,nir_ssa_def ** y)50 static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_ssa_def **y)
51 {
52    *x = nir_iand(b, src, nir_imm_int(b, 0xffff));
53    *y = nir_ushr(b, src, nir_imm_int(b, 16));
54 }
55 
si_create_dcc_retile_cs(struct si_context * sctx,struct radeon_surf * surf)56 void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf)
57 {
58    const nir_shader_compiler_options *options =
59       sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
60 
61    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "dcc_retile");
62    b.shader->info.workgroup_size[0] = 8;
63    b.shader->info.workgroup_size[1] = 8;
64    b.shader->info.workgroup_size[2] = 1;
65    b.shader->info.cs.user_data_components_amd = 3;
66    b.shader->info.num_ssbos = 1;
67 
68    /* Get user data SGPRs. */
69    nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b);
70 
71    /* Relative offset from the displayable DCC to the non-displayable DCC in the same buffer. */
72    nir_ssa_def *src_dcc_offset = nir_channel(&b, user_sgprs, 0);
73 
74    nir_ssa_def *src_dcc_pitch, *dst_dcc_pitch, *src_dcc_height, *dst_dcc_height;
75    unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &src_dcc_pitch, &src_dcc_height);
76    unpack_2x16(&b, nir_channel(&b, user_sgprs, 2), &dst_dcc_pitch, &dst_dcc_height);
77 
78    /* Get the 2D coordinates. */
79    nir_ssa_def *coord = get_global_ids(&b, 2);
80    nir_ssa_def *zero = nir_imm_int(&b, 0);
81 
82    /* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */
83    coord = nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width,
84                                              surf->u.gfx9.color.dcc_block_height));
85 
86    nir_ssa_def *src_offset =
87       ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,
88                                  src_dcc_pitch, src_dcc_height, zero, /* DCC slice size */
89                                  nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */
90                                  zero, zero, zero); /* z, sample, pipe_xor */
91    src_offset = nir_iadd(&b, src_offset, src_dcc_offset);
92    nir_ssa_def *value = nir_load_ssbo(&b, 1, 8, zero, src_offset, .align_mul=1);
93 
94    nir_ssa_def *dst_offset =
95       ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
96                                  dst_dcc_pitch, dst_dcc_height, zero, /* DCC slice size */
97                                  nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */
98                                  zero, zero, zero); /* z, sample, pipe_xor */
99    nir_store_ssbo(&b, value, zero, dst_offset, .write_mask=0x1, .align_mul=1);
100 
101    return create_nir_cs(sctx, &b);
102 }
103 
gfx9_create_clear_dcc_msaa_cs(struct si_context * sctx,struct si_texture * tex)104 void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex)
105 {
106    const nir_shader_compiler_options *options =
107       sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
108 
109    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_dcc_msaa");
110    b.shader->info.workgroup_size[0] = 8;
111    b.shader->info.workgroup_size[1] = 8;
112    b.shader->info.workgroup_size[2] = 1;
113    b.shader->info.cs.user_data_components_amd = 2;
114    b.shader->info.num_ssbos = 1;
115 
116    /* Get user data SGPRs. */
117    nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b);
118    nir_ssa_def *dcc_pitch, *dcc_height, *clear_value, *pipe_xor;
119    unpack_2x16(&b, nir_channel(&b, user_sgprs, 0), &dcc_pitch, &dcc_height);
120    unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &clear_value, &pipe_xor);
121    clear_value = nir_u2u16(&b, clear_value);
122 
123    /* Get the 2D coordinates. */
124    nir_ssa_def *coord = get_global_ids(&b, 3);
125    nir_ssa_def *zero = nir_imm_int(&b, 0);
126 
127    /* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */
128    coord = nir_imul(&b, coord,
129                     nir_channels(&b, nir_imm_ivec4(&b, tex->surface.u.gfx9.color.dcc_block_width,
130                                                    tex->surface.u.gfx9.color.dcc_block_height,
131                                                    tex->surface.u.gfx9.color.dcc_block_depth, 0), 0x7));
132 
133    nir_ssa_def *offset =
134       ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, tex->surface.bpe,
135                                  &tex->surface.u.gfx9.color.dcc_equation,
136                                  dcc_pitch, dcc_height, zero, /* DCC slice size */
137                                  nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */
138                                  tex->buffer.b.b.array_size > 1 ? nir_channel(&b, coord, 2) : zero, /* z */
139                                  zero, pipe_xor); /* sample, pipe_xor */
140 
141    /* The trick here is that DCC elements for an even and the next odd sample are next to each other
142     * in memory, so we only need to compute the address for sample 0 and the next DCC byte is always
143     * sample 1. That's why the clear value has 2 bytes - we're clearing 2 samples at the same time.
144     */
145    nir_store_ssbo(&b, clear_value, zero, offset, .write_mask=0x1, .align_mul=2);
146 
147    return create_nir_cs(sctx, &b);
148 }
149