1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "fd6_const.h"
26 #include "fd6_pack.h"
27 
28 #define emit_const_user fd6_emit_const_user
29 #define emit_const_bo   fd6_emit_const_bo
30 #include "ir3_const.h"
31 
32 /* regid:          base const register
33  * prsc or dwords: buffer containing constant values
34  * sizedwords:     size of const value buffer
35  */
36 void
fd6_emit_const_user(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t sizedwords,const uint32_t * dwords)37 fd6_emit_const_user(struct fd_ringbuffer *ring,
38                     const struct ir3_shader_variant *v, uint32_t regid,
39                     uint32_t sizedwords, const uint32_t *dwords)
40 {
41    emit_const_asserts(ring, v, regid, sizedwords);
42 
43    /* NOTE we cheat a bit here, since we know mesa is aligning
44     * the size of the user buffer to 16 bytes.  And we want to
45     * cut cycles in a hot path.
46     */
47    uint32_t align_sz = align(sizedwords, 4);
48 
49    if (fd6_geom_stage(v->type)) {
50       OUT_PKTBUF(
51          ring, CP_LOAD_STATE6_GEOM, dwords, align_sz,
52          CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
53                           .state_src = SS6_DIRECT,
54                           .state_block = fd6_stage2shadersb(v->type),
55                           .num_unit = DIV_ROUND_UP(sizedwords, 4)),
56          CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2());
57    } else {
58       OUT_PKTBUF(
59          ring, CP_LOAD_STATE6_FRAG, dwords, align_sz,
60          CP_LOAD_STATE6_0(.dst_off = regid / 4, .state_type = ST6_CONSTANTS,
61                           .state_src = SS6_DIRECT,
62                           .state_block = fd6_stage2shadersb(v->type),
63                           .num_unit = DIV_ROUND_UP(sizedwords, 4)),
64          CP_LOAD_STATE6_1(), CP_LOAD_STATE6_2());
65    }
66 }
67 void
fd6_emit_const_bo(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t regid,uint32_t offset,uint32_t sizedwords,struct fd_bo * bo)68 fd6_emit_const_bo(struct fd_ringbuffer *ring,
69                   const struct ir3_shader_variant *v, uint32_t regid,
70                   uint32_t offset, uint32_t sizedwords, struct fd_bo *bo)
71 {
72    uint32_t dst_off = regid / 4;
73    assert(dst_off % 4 == 0);
74    uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4);
75    assert(num_unit % 4 == 0);
76 
77    emit_const_asserts(ring, v, regid, sizedwords);
78 
79    if (fd6_geom_stage(v->type)) {
80       OUT_PKT(ring, CP_LOAD_STATE6_GEOM,
81               CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
82                                .state_src = SS6_INDIRECT,
83                                .state_block = fd6_stage2shadersb(v->type),
84                                .num_unit = num_unit, ),
85               CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
86    } else {
87       OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
88               CP_LOAD_STATE6_0(.dst_off = dst_off, .state_type = ST6_CONSTANTS,
89                                .state_src = SS6_INDIRECT,
90                                .state_block = fd6_stage2shadersb(v->type),
91                                .num_unit = num_unit, ),
92               CP_LOAD_STATE6_EXT_SRC_ADDR(.bo = bo, .bo_offset = offset));
93    }
94 }
95 
96 static bool
is_stateobj(struct fd_ringbuffer * ring)97 is_stateobj(struct fd_ringbuffer *ring)
98 {
99    return true;
100 }
101 
102 static void
emit_const_ptrs(struct fd_ringbuffer * ring,const struct ir3_shader_variant * v,uint32_t dst_offset,uint32_t num,struct fd_bo ** bos,uint32_t * offsets)103 emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
104                 uint32_t dst_offset, uint32_t num, struct fd_bo **bos,
105                 uint32_t *offsets)
106 {
107    unreachable("shouldn't be called on a6xx");
108 }
109 
110 static void
emit_tess_bos(struct fd_ringbuffer * ring,struct fd6_emit * emit,struct ir3_shader_variant * s)111 emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit,
112               struct ir3_shader_variant *s) assert_dt
113 {
114    struct fd_context *ctx = emit->ctx;
115    const struct ir3_const_state *const_state = ir3_const_state(s);
116    const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
117    uint32_t dwords = 16;
118 
119    OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
120    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
121                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
122                      CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
123                      CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
124                      CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
125    OUT_RB(ring, ctx->batch->tess_addrs_constobj);
126 }
127 
128 static void
emit_stage_tess_consts(struct fd_ringbuffer * ring,struct ir3_shader_variant * v,uint32_t * params,int num_params)129 emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
130                        uint32_t *params, int num_params)
131 {
132    const struct ir3_const_state *const_state = ir3_const_state(v);
133    const unsigned regid = const_state->offsets.primitive_param;
134    int size = MIN2(1 + regid, v->constlen) - regid;
135    if (size > 0)
136       fd6_emit_const_user(ring, v, regid * 4, num_params, params);
137 }
138 
139 struct fd_ringbuffer *
fd6_build_tess_consts(struct fd6_emit * emit)140 fd6_build_tess_consts(struct fd6_emit *emit)
141 {
142    struct fd_context *ctx = emit->ctx;
143 
144    struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
145       ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
146 
147    /* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
148     * size is dwords, since that's what LDG/STG use.
149     */
150    unsigned num_vertices = emit->hs
151                               ? emit->patch_vertices
152                               : emit->gs->shader->nir->info.gs.vertices_in;
153 
154    uint32_t vs_params[4] = {
155       emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
156       emit->vs->output_size * 4,                /* vs vertex stride */
157       0, 0};
158 
159    emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
160 
161    if (emit->hs) {
162       uint32_t hs_params[4] = {
163          emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
164          emit->vs->output_size * 4,                /* vs vertex stride */
165          emit->hs->output_size, emit->patch_vertices};
166 
167       emit_stage_tess_consts(constobj, emit->hs, hs_params,
168                              ARRAY_SIZE(hs_params));
169       emit_tess_bos(constobj, emit, emit->hs);
170 
171       if (emit->gs)
172          num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
173 
174       uint32_t ds_params[4] = {
175          emit->ds->output_size * num_vertices * 4, /* ds primitive stride */
176          emit->ds->output_size * 4,                /* ds vertex stride */
177          emit->hs->output_size, /* hs vertex stride (dwords) */
178          emit->hs->shader->nir->info.tess.tcs_vertices_out};
179 
180       emit_stage_tess_consts(constobj, emit->ds, ds_params,
181                              ARRAY_SIZE(ds_params));
182       emit_tess_bos(constobj, emit, emit->ds);
183    }
184 
185    if (emit->gs) {
186       struct ir3_shader_variant *prev;
187       if (emit->ds)
188          prev = emit->ds;
189       else
190          prev = emit->vs;
191 
192       uint32_t gs_params[4] = {
193          prev->output_size * num_vertices * 4, /* ds primitive stride */
194          prev->output_size * 4,                /* ds vertex stride */
195          0,
196          0,
197       };
198 
199       num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
200       emit_stage_tess_consts(constobj, emit->gs, gs_params,
201                              ARRAY_SIZE(gs_params));
202    }
203 
204    return constobj;
205 }
206 
207 static void
fd6_emit_ubos(struct fd_context * ctx,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_constbuf_stateobj * constbuf)208 fd6_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
209               struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
210 {
211    const struct ir3_const_state *const_state = ir3_const_state(v);
212    int num_ubos = const_state->num_ubos;
213 
214    if (!num_ubos)
215       return;
216 
217    OUT_PKT7(ring, fd6_stage2opcode(v->type), 3 + (2 * num_ubos));
218    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
219                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
220                      CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
221                      CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(v->type)) |
222                      CP_LOAD_STATE6_0_NUM_UNIT(num_ubos));
223    OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
224    OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
225 
226    for (int i = 0; i < num_ubos; i++) {
227       /* NIR constant data is packed into the end of the shader. */
228       if (i == const_state->constant_data_ubo) {
229          int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
230          OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
231                    (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
232          continue;
233       }
234 
235       struct pipe_constant_buffer *cb = &constbuf->cb[i];
236 
237       /* If we have user pointers (constbuf 0, aka GL uniforms), upload them
238        * to a buffer now, and save it in the constbuf so that we don't have
239        * to reupload until they get changed.
240        */
241       if (cb->user_buffer) {
242          struct pipe_context *pctx = &ctx->base;
243          u_upload_data(pctx->stream_uploader, 0, cb->buffer_size, 64,
244                        cb->user_buffer, &cb->buffer_offset, &cb->buffer);
245          cb->user_buffer = NULL;
246       }
247 
248       if (cb->buffer) {
249          int size_vec4s = DIV_ROUND_UP(cb->buffer_size, 16);
250          OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset,
251                    (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
252       } else {
253          OUT_RING(ring, 0xbad00000 | (i << 16));
254          OUT_RING(ring, A6XX_UBO_1_SIZE(0));
255       }
256    }
257 }
258 
259 static unsigned
user_consts_cmdstream_size(struct ir3_shader_variant * v)260 user_consts_cmdstream_size(struct ir3_shader_variant *v)
261 {
262    struct ir3_const_state *const_state = ir3_const_state(v);
263    struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
264 
265    if (unlikely(!ubo_state->cmdstream_size)) {
266       unsigned packets, size;
267 
268       /* pre-calculate size required for userconst stateobj: */
269       ir3_user_consts_size(ubo_state, &packets, &size);
270 
271       /* also account for UBO addresses: */
272       packets += 1;
273       size += 2 * const_state->num_ubos;
274 
275       unsigned sizedwords = (4 * packets) + size;
276       ubo_state->cmdstream_size = sizedwords * 4;
277    }
278 
279    return ubo_state->cmdstream_size;
280 }
281 
282 struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit * emit)283 fd6_build_user_consts(struct fd6_emit *emit)
284 {
285    static const enum pipe_shader_type types[] = {
286       PIPE_SHADER_VERTEX,   PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
287       PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
288    };
289    struct ir3_shader_variant *variants[] = {
290       emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
291    };
292    struct fd_context *ctx = emit->ctx;
293    unsigned sz = 0;
294 
295    for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
296       if (!variants[i])
297          continue;
298       sz += user_consts_cmdstream_size(variants[i]);
299    }
300 
301    struct fd_ringbuffer *constobj =
302       fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
303 
304    for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
305       if (!variants[i])
306          continue;
307       ir3_emit_user_consts(ctx->screen, variants[i], constobj,
308                            &ctx->constbuf[types[i]]);
309       fd6_emit_ubos(ctx, variants[i], constobj, &ctx->constbuf[types[i]]);
310    }
311 
312    return constobj;
313 }
314 
315 struct fd_ringbuffer *
fd6_build_vs_driver_params(struct fd6_emit * emit)316 fd6_build_vs_driver_params(struct fd6_emit *emit)
317 {
318    struct fd_context *ctx = emit->ctx;
319    struct fd6_context *fd6_ctx = fd6_context(ctx);
320    const struct ir3_shader_variant *vs = emit->vs;
321 
322    if (vs->need_driver_params) {
323       struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
324          ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
325       ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info, emit->indirect,
326                                 emit->draw);
327       fd6_ctx->has_dp_state = true;
328       return dpconstobj;
329    }
330 
331    fd6_ctx->has_dp_state = false;
332    return NULL;
333 }
334 
335 void
fd6_emit_cs_consts(const struct ir3_shader_variant * v,struct fd_ringbuffer * ring,struct fd_context * ctx,const struct pipe_grid_info * info)336 fd6_emit_cs_consts(const struct ir3_shader_variant *v,
337                    struct fd_ringbuffer *ring, struct fd_context *ctx,
338                    const struct pipe_grid_info *info)
339 {
340    ir3_emit_cs_consts(v, ring, ctx, info);
341    fd6_emit_ubos(ctx, v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
342 }
343 
344 void
fd6_emit_immediates(struct fd_screen * screen,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)345 fd6_emit_immediates(struct fd_screen *screen,
346                     const struct ir3_shader_variant *v,
347                     struct fd_ringbuffer *ring)
348 {
349    ir3_emit_immediates(screen, v, ring);
350 }
351 
352 void
fd6_emit_link_map(struct fd_screen * screen,const struct ir3_shader_variant * producer,const struct ir3_shader_variant * v,struct fd_ringbuffer * ring)353 fd6_emit_link_map(struct fd_screen *screen,
354                   const struct ir3_shader_variant *producer,
355                   const struct ir3_shader_variant *v,
356                   struct fd_ringbuffer *ring)
357 {
358    ir3_emit_link_map(screen, producer, v, ring);
359 }
360