1 #include "zink_compiler.h"
2 #include "zink_context.h"
3 #include "zink_program.h"
4 #include "zink_query.h"
5 #include "zink_resource.h"
6 #include "zink_screen.h"
7 #include "zink_state.h"
8 #include "zink_surface.h"
9 #include "zink_inlines.h"
10 
11 #include "tgsi/tgsi_from_mesa.h"
12 #include "util/hash_table.h"
13 #include "util/u_debug.h"
14 #include "util/u_helpers.h"
15 #include "util/u_inlines.h"
16 #include "util/u_prim.h"
17 #include "util/u_prim_restart.h"
18 
19 
20 static void
zink_emit_xfb_counter_barrier(struct zink_context * ctx)21 zink_emit_xfb_counter_barrier(struct zink_context *ctx)
22 {
23    /* Between the pause and resume there needs to be a memory barrier for the counter buffers
24     * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
25     * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
26     * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
27     * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
28     *
29     * - from VK_EXT_transform_feedback spec
30     */
31    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
32       struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
33       if (!t)
34          continue;
35       struct zink_resource *res = zink_resource(t->counter_buffer);
36       if (t->counter_buffer_valid)
37           zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
38                                        VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
39       else
40           zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
41                                        VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
42    }
43    ctx->xfb_barrier = false;
44 }
45 
46 static void
zink_emit_xfb_vertex_input_barrier(struct zink_context * ctx,struct zink_resource * res)47 zink_emit_xfb_vertex_input_barrier(struct zink_context *ctx, struct zink_resource *res)
48 {
49    /* A pipeline barrier is required between using the buffers as
50     * transform feedback buffers and vertex buffers to
51     * ensure all writes to the transform feedback buffers are visible
52     * when the data is read as vertex attributes.
53     * The source access is VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
54     * and the destination access is VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
55     * for the pipeline stages VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
56     * and VK_PIPELINE_STAGE_VERTEX_INPUT_BIT respectively.
57     *
58     * - 20.3.1. Drawing Transform Feedback
59     */
60    zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
61                                 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
62 }
63 
64 static void
zink_emit_stream_output_targets(struct pipe_context * pctx)65 zink_emit_stream_output_targets(struct pipe_context *pctx)
66 {
67    struct zink_context *ctx = zink_context(pctx);
68    struct zink_batch *batch = &ctx->batch;
69    VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {0};
70    VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {0};
71    VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {0};
72 
73    for (unsigned i = 0; i < ctx->num_so_targets; i++) {
74       struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
75       if (!t) {
76          /* no need to reference this or anything */
77          buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
78          buffer_offsets[i] = 0;
79          buffer_sizes[i] = sizeof(uint8_t);
80          continue;
81       }
82       struct zink_resource *res = zink_resource(t->base.buffer);
83       if (!res->so_valid)
84          /* resource has been rebound */
85          t->counter_buffer_valid = false;
86       buffers[i] = res->obj->buffer;
87       zink_batch_reference_resource_rw(batch, res, true);
88       buffer_offsets[i] = t->base.buffer_offset;
89       buffer_sizes[i] = t->base.buffer_size;
90       res->so_valid = true;
91       util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
92                      t->base.buffer_offset + t->base.buffer_size);
93    }
94 
95    VKCTX(CmdBindTransformFeedbackBuffersEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets,
96                                                  buffers, buffer_offsets,
97                                                  buffer_sizes);
98    ctx->dirty_so_targets = false;
99 }
100 
101 ALWAYS_INLINE static void
check_buffer_barrier(struct zink_context * ctx,struct pipe_resource * pres,VkAccessFlags flags,VkPipelineStageFlags pipeline)102 check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
103 {
104    struct zink_resource *res = zink_resource(pres);
105    zink_resource_buffer_barrier(ctx, res, flags, pipeline);
106 }
107 
108 ALWAYS_INLINE static void
barrier_draw_buffers(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_indirect_info * dindirect,struct pipe_resource * index_buffer)109 barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
110                      const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
111 {
112    if (index_buffer)
113       check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
114    if (dindirect && dindirect->buffer) {
115       check_buffer_barrier(ctx, dindirect->buffer,
116                            VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
117       if (dindirect->indirect_draw_count)
118          check_buffer_barrier(ctx, dindirect->indirect_draw_count,
119                               VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
120    }
121 }
122 
123 template <zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_vertex_input HAS_VERTEX_INPUT>
124 static void
zink_bind_vertex_buffers(struct zink_batch * batch,struct zink_context * ctx)125 zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
126 {
127    VkBuffer buffers[PIPE_MAX_ATTRIBS];
128    VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
129    VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS];
130    struct zink_vertex_elements_state *elems = ctx->element_state;
131    struct zink_screen *screen = zink_screen(ctx->base.screen);
132 
133    if (!elems->hw_state.num_bindings)
134       return;
135 
136    for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
137       struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
138       assert(vb);
139       if (vb->buffer.resource) {
140          struct zink_resource *res = zink_resource(vb->buffer.resource);
141          assert(res->obj->buffer);
142          buffers[i] = res->obj->buffer;
143          buffer_offsets[i] = vb->buffer_offset;
144          buffer_strides[i] = vb->stride;
145          if (HAS_VERTEX_INPUT)
146             elems->hw_state.dynbindings[i].stride = vb->stride;
147          zink_batch_resource_usage_set(&ctx->batch, zink_resource(vb->buffer.resource), false);
148       } else {
149          buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
150          buffer_offsets[i] = 0;
151          buffer_strides[i] = 0;
152          if (HAS_VERTEX_INPUT)
153             elems->hw_state.dynbindings[i].stride = 0;
154       }
155    }
156 
157    if (HAS_DYNAMIC_STATE && !HAS_VERTEX_INPUT)
158       VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0,
159                                           elems->hw_state.num_bindings,
160                                           buffers, buffer_offsets, NULL, buffer_strides);
161    else
162       VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
163                              elems->hw_state.num_bindings,
164                              buffers, buffer_offsets);
165 
166    if (HAS_VERTEX_INPUT)
167       VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
168                                       elems->hw_state.num_bindings, elems->hw_state.dynbindings,
169                                       elems->hw_state.num_attribs, elems->hw_state.dynattribs);
170 
171    ctx->vertex_buffers_dirty = false;
172 }
173 
174 static void
update_gfx_program(struct zink_context * ctx)175 update_gfx_program(struct zink_context *ctx)
176 {
177    if (ctx->last_vertex_stage_dirty) {
178       enum pipe_shader_type pstage = pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage);
179       ctx->dirty_shader_stages |= BITFIELD_BIT(pstage);
180       memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
181              &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
182              sizeof(struct zink_vs_key_base));
183       ctx->last_vertex_stage_dirty = false;
184    }
185    unsigned bits = BITFIELD_MASK(PIPE_SHADER_COMPUTE);
186    if (ctx->gfx_dirty) {
187       struct zink_gfx_program *prog = NULL;
188 
189       struct hash_table *ht = &ctx->program_cache[ctx->shader_stages >> 2];
190       const uint32_t hash = ctx->gfx_hash;
191       struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
192       if (entry) {
193          prog = (struct zink_gfx_program*)entry->data;
194          u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages)
195             ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader;
196       } else {
197          ctx->dirty_shader_stages |= bits;
198          prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.vertices_per_patch + 1);
199          _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
200       }
201       zink_update_gfx_program(ctx, prog);
202       if (prog && prog != ctx->curr_program)
203          zink_batch_reference_program(&ctx->batch, &prog->base);
204       if (ctx->curr_program)
205          ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
206       ctx->curr_program = prog;
207       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
208       ctx->gfx_dirty = false;
209    } else if (ctx->dirty_shader_stages & bits) {
210       /* remove old hash */
211       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
212       zink_update_gfx_program(ctx, ctx->curr_program);
213       /* apply new hash */
214       ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
215    }
216    ctx->dirty_shader_stages &= ~bits;
217 }
218 
219 static bool
line_width_needed(enum pipe_prim_type reduced_prim,unsigned polygon_mode)220 line_width_needed(enum pipe_prim_type reduced_prim,
221                   unsigned polygon_mode)
222 {
223    switch (reduced_prim) {
224    case PIPE_PRIM_POINTS:
225       return false;
226 
227    case PIPE_PRIM_LINES:
228       return true;
229 
230    case PIPE_PRIM_TRIANGLES:
231       return polygon_mode == VK_POLYGON_MODE_LINE;
232 
233    default:
234       unreachable("unexpected reduced prim");
235    }
236 }
237 
238 ALWAYS_INLINE static void
update_drawid(struct zink_context * ctx,unsigned draw_id)239 update_drawid(struct zink_context *ctx, unsigned draw_id)
240 {
241    VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
242                       offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
243                       &draw_id);
244 }
245 
246 ALWAYS_INLINE static void
draw_indexed_need_index_buffer_unref(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)247 draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
248              const struct pipe_draw_info *dinfo,
249              const struct pipe_draw_start_count_bias *draws,
250              unsigned num_draws,
251              unsigned draw_id,
252              bool needs_drawid)
253 {
254    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
255    if (dinfo->increment_draw_id && needs_drawid) {
256       for (unsigned i = 0; i < num_draws; i++) {
257          update_drawid(ctx, draw_id);
258          VKCTX(CmdDrawIndexed)(cmdbuf,
259             draws[i].count, dinfo->instance_count,
260             0, draws[i].index_bias, dinfo->start_instance);
261          draw_id++;
262       }
263    } else {
264       if (needs_drawid)
265          update_drawid(ctx, draw_id);
266       for (unsigned i = 0; i < num_draws; i++)
267          VKCTX(CmdDrawIndexed)(cmdbuf,
268             draws[i].count, dinfo->instance_count,
269             0, draws[i].index_bias, dinfo->start_instance);
270 
271    }
272 }
273 
274 template <zink_multidraw HAS_MULTIDRAW>
275 ALWAYS_INLINE static void
draw_indexed(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)276 draw_indexed(struct zink_context *ctx,
277              const struct pipe_draw_info *dinfo,
278              const struct pipe_draw_start_count_bias *draws,
279              unsigned num_draws,
280              unsigned draw_id,
281              bool needs_drawid)
282 {
283    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
284    if (dinfo->increment_draw_id && needs_drawid) {
285       for (unsigned i = 0; i < num_draws; i++) {
286          update_drawid(ctx, draw_id);
287          VKCTX(CmdDrawIndexed)(cmdbuf,
288             draws[i].count, dinfo->instance_count,
289             draws[i].start, draws[i].index_bias, dinfo->start_instance);
290          draw_id++;
291       }
292    } else {
293       if (needs_drawid)
294          update_drawid(ctx, draw_id);
295       if (HAS_MULTIDRAW) {
296          VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws,
297                                        dinfo->instance_count,
298                                        dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias),
299                                        dinfo->index_bias_varies ? NULL : &draws[0].index_bias);
300       } else {
301          for (unsigned i = 0; i < num_draws; i++)
302             VKCTX(CmdDrawIndexed)(cmdbuf,
303                draws[i].count, dinfo->instance_count,
304                draws[i].start, draws[i].index_bias, dinfo->start_instance);
305       }
306    }
307 }
308 
309 template <zink_multidraw HAS_MULTIDRAW>
310 ALWAYS_INLINE static void
draw(struct zink_context * ctx,const struct pipe_draw_info * dinfo,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned draw_id,bool needs_drawid)311 draw(struct zink_context *ctx,
312      const struct pipe_draw_info *dinfo,
313      const struct pipe_draw_start_count_bias *draws,
314      unsigned num_draws,
315      unsigned draw_id,
316      bool needs_drawid)
317 {
318    VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
319    if (dinfo->increment_draw_id && needs_drawid) {
320       for (unsigned i = 0; i < num_draws; i++) {
321          update_drawid(ctx, draw_id);
322          VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
323          draw_id++;
324       }
325    } else {
326       if (needs_drawid)
327          update_drawid(ctx, draw_id);
328       if (HAS_MULTIDRAW)
329          VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws,
330                                 dinfo->instance_count, dinfo->start_instance,
331                                 sizeof(struct pipe_draw_start_count_bias));
332       else {
333          for (unsigned i = 0; i < num_draws; i++)
334             VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
335 
336       }
337    }
338 }
339 
340 ALWAYS_INLINE static VkPipelineStageFlags
find_pipeline_bits(uint32_t * mask)341 find_pipeline_bits(uint32_t *mask)
342 {
343    for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) {
344       if (mask[i]) {
345          return zink_pipeline_flags_from_pipe_stage((enum pipe_shader_type)i);
346       }
347    }
348    return 0;
349 }
350 
351 static void
update_barriers(struct zink_context * ctx,bool is_compute)352 update_barriers(struct zink_context *ctx, bool is_compute)
353 {
354    if (!ctx->need_barriers[is_compute]->entries)
355       return;
356    struct set *need_barriers = ctx->need_barriers[is_compute];
357    ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute];
358    ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]];
359    set_foreach(need_barriers, he) {
360       struct zink_resource *res = (struct zink_resource *)he->key;
361       VkPipelineStageFlags pipeline = 0;
362       VkAccessFlags access = 0;
363       if (res->bind_count[is_compute]) {
364          if (res->write_bind_count[is_compute])
365             access |= VK_ACCESS_SHADER_WRITE_BIT;
366          if (res->write_bind_count[is_compute] != res->bind_count[is_compute]) {
367             unsigned bind_count = res->bind_count[is_compute] - res->write_bind_count[is_compute];
368             if (res->obj->is_buffer) {
369                if (res->ubo_bind_count[is_compute]) {
370                   access |= VK_ACCESS_UNIFORM_READ_BIT;
371                   bind_count -= res->ubo_bind_count[is_compute];
372                }
373                if (!is_compute && res->vbo_bind_mask) {
374                   access |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
375                   pipeline |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
376                   bind_count -= util_bitcount(res->vbo_bind_mask);
377                   if (res->write_bind_count[is_compute])
378                      pipeline |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
379                }
380                bind_count -= res->so_bind_count;
381             }
382             if (bind_count)
383                access |= VK_ACCESS_SHADER_READ_BIT;
384          }
385          if (is_compute)
386             pipeline = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
387          else if (!pipeline) {
388             if (res->ubo_bind_count[0])
389                pipeline |= find_pipeline_bits(res->ubo_bind_mask);
390             if (!pipeline)
391                pipeline |= find_pipeline_bits(res->ssbo_bind_mask);
392             if (!pipeline)
393                pipeline |= find_pipeline_bits(res->sampler_binds);
394             if (!pipeline) //must be a shader image
395                pipeline = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
396          }
397          if (res->base.b.target == PIPE_BUFFER)
398             zink_resource_buffer_barrier(ctx, res, access, pipeline);
399          else {
400             VkImageLayout layout = zink_descriptor_util_image_layout_eval(res, is_compute);
401             if (layout != res->layout)
402                zink_resource_image_barrier(ctx, res, layout, access, pipeline);
403          }
404          /* always barrier on draw if this resource has either multiple image write binds or
405           * image write binds and image read binds
406           */
407          if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1)
408             _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res);
409       }
410       _mesa_set_remove(need_barriers, he);
411       if (!need_barriers->entries)
412          break;
413    }
414 }
415 
416 template <bool BATCH_CHANGED>
417 static bool
update_gfx_pipeline(struct zink_context * ctx,struct zink_batch_state * bs,enum pipe_prim_type mode)418 update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum pipe_prim_type mode)
419 {
420    VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
421    update_gfx_program(ctx);
422    VkPipeline pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
423    bool pipeline_changed = prev_pipeline != pipeline;
424    if (BATCH_CHANGED || pipeline_changed)
425       VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
426    return pipeline_changed;
427 }
428 
429 static bool
hack_conditional_render(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)430 hack_conditional_render(struct pipe_context *pctx,
431                         const struct pipe_draw_info *dinfo,
432                         unsigned drawid_offset,
433                         const struct pipe_draw_indirect_info *dindirect,
434                         const struct pipe_draw_start_count_bias *draws,
435                         unsigned num_draws)
436 {
437    struct zink_context *ctx = zink_context(pctx);
438    struct zink_batch_state *bs = ctx->batch.state;
439    static bool warned;
440    if (!warned) {
441       fprintf(stderr, "ZINK: warning, this is cpu-based conditional rendering, say bye-bye to fps\n");
442       warned = true;
443    }
444    if (!zink_check_conditional_render(ctx))
445       return false;
446    if (bs != ctx->batch.state) {
447       bool prev = ctx->render_condition_active;
448       ctx->render_condition_active = false;
449       zink_select_draw_vbo(ctx);
450       pctx->draw_vbo(pctx, dinfo, drawid_offset, dindirect, draws, num_draws);
451       ctx->render_condition_active = prev;
452       return false;
453    }
454    return true;
455 }
456 
457 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
458           zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED>
459 void
zink_draw_vbo(struct pipe_context * pctx,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)460 zink_draw_vbo(struct pipe_context *pctx,
461               const struct pipe_draw_info *dinfo,
462               unsigned drawid_offset,
463               const struct pipe_draw_indirect_info *dindirect,
464               const struct pipe_draw_start_count_bias *draws,
465               unsigned num_draws)
466 {
467    if (!dindirect && (!draws[0].count || !dinfo->instance_count))
468       return;
469 
470    struct zink_context *ctx = zink_context(pctx);
471    struct zink_screen *screen = zink_screen(pctx->screen);
472    struct zink_rasterizer_state *rast_state = ctx->rast_state;
473    struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
474    struct zink_batch *batch = &ctx->batch;
475    struct zink_so_target *so_target =
476       dindirect && dindirect->count_from_stream_output ?
477          zink_so_target(dindirect->count_from_stream_output) : NULL;
478    VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
479    VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS];
480    bool need_index_buffer_unref = false;
481    bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
482    bool reads_drawid = ctx->shader_reads_drawid;
483    bool reads_basevertex = ctx->shader_reads_basevertex;
484    unsigned work_count = ctx->batch.work_count;
485    enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
486 
487    if (unlikely(!screen->info.have_EXT_conditional_rendering)) {
488       if (!hack_conditional_render(pctx, dinfo, drawid_offset, dindirect, draws, num_draws))
489          return;
490    }
491 
492    if (ctx->memory_barrier)
493       zink_flush_memory_barrier(ctx, false);
494    update_barriers(ctx, false);
495 
496    if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
497       ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
498       zink_rebind_all_buffers(ctx);
499    }
500 
501    unsigned index_offset = 0;
502    unsigned index_size = dinfo->index_size;
503    struct pipe_resource *index_buffer = NULL;
504    if (index_size > 0) {
505       if (dinfo->has_user_indices) {
506          if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
507             debug_printf("util_upload_index_buffer() failed\n");
508             return;
509          }
510          zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
511       } else {
512          index_buffer = dinfo->index.resource;
513          zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
514       }
515       assert(index_size <= 4 && index_size != 3);
516       assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
517    }
518 
519    bool have_streamout = !!ctx->num_so_targets;
520    if (have_streamout) {
521       if (ctx->xfb_barrier)
522          zink_emit_xfb_counter_barrier(ctx);
523       if (ctx->dirty_so_targets) {
524          /* have to loop here and below because barriers must be emitted out of renderpass,
525           * but xfb buffers can't be bound before the renderpass is active to avoid
526           * breaking from recursion
527           */
528          for (unsigned i = 0; i < ctx->num_so_targets; i++) {
529             struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
530             if (t)
531                zink_resource_buffer_barrier(ctx, zink_resource(t->base.buffer),
532                                             VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
533          }
534       }
535    }
536 
537    if (so_target)
538       zink_emit_xfb_vertex_input_barrier(ctx, zink_resource(so_target->base.buffer));
539 
540    barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
541 
542    if (BATCH_CHANGED)
543       zink_update_descriptor_refs(ctx, false);
544 
545    zink_batch_rp(ctx);
546 
547    /* these must be after renderpass start to avoid issues with recursion */
548    uint8_t vertices_per_patch = ctx->gfx_pipeline_state.patch_vertices ? ctx->gfx_pipeline_state.patch_vertices - 1 : 0;
549    if (ctx->gfx_pipeline_state.vertices_per_patch != vertices_per_patch)
550       ctx->gfx_pipeline_state.dirty = true;
551    bool drawid_broken = false;
552    if (reads_drawid && (!dindirect || !dindirect->buffer))
553       drawid_broken = (drawid_offset != 0 ||
554                       (!HAS_MULTIDRAW && num_draws > 1) ||
555                       (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
556    if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
557       zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
558    ctx->gfx_pipeline_state.vertices_per_patch = vertices_per_patch;
559    if (mode_changed) {
560       bool points_changed = false;
561       if (mode == PIPE_PRIM_POINTS) {
562          ctx->gfx_pipeline_state.has_points++;
563          points_changed = true;
564       } else if (ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_POINTS) {
565          ctx->gfx_pipeline_state.has_points--;
566          points_changed = true;
567       }
568       if (points_changed && ctx->rast_state->base.point_quad_rasterization)
569          zink_set_fs_point_coord_key(ctx);
570    }
571    ctx->gfx_pipeline_state.gfx_prim_mode = mode;
572 
573    if (index_size) {
574       const VkIndexType index_type[3] = {
575          VK_INDEX_TYPE_UINT8_EXT,
576          VK_INDEX_TYPE_UINT16,
577          VK_INDEX_TYPE_UINT32,
578       };
579       struct zink_resource *res = zink_resource(index_buffer);
580       VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
581    }
582    if (!HAS_DYNAMIC_STATE2) {
583       if (ctx->gfx_pipeline_state.primitive_restart != dinfo->primitive_restart)
584          ctx->gfx_pipeline_state.dirty = true;
585       ctx->gfx_pipeline_state.primitive_restart = dinfo->primitive_restart;
586    }
587 
588    if (have_streamout && ctx->dirty_so_targets)
589       zink_emit_stream_output_targets(pctx);
590 
591    bool pipeline_changed = false;
592    if (!HAS_DYNAMIC_STATE)
593       pipeline_changed = update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
594 
595    if (BATCH_CHANGED || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) {
596       VkViewport viewports[PIPE_MAX_VIEWPORTS];
597       for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
598          VkViewport viewport = {
599             ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
600             ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
601             ctx->vp_state.viewport_states[i].scale[0] * 2,
602             ctx->vp_state.viewport_states[i].scale[1] * 2,
603             ctx->rast_state->base.clip_halfz ?
604                ctx->vp_state.viewport_states[i].translate[2] :
605                ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
606             ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2]
607          };
608          viewports[i] = viewport;
609       }
610       if (HAS_DYNAMIC_STATE)
611          VKCTX(CmdSetViewportWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
612       else
613          VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
614    }
615    if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (!HAS_DYNAMIC_STATE && pipeline_changed)) {
616       VkRect2D scissors[PIPE_MAX_VIEWPORTS];
617       if (ctx->rast_state->base.scissor) {
618          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
619             scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
620             scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
621             scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
622             scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
623          }
624       } else {
625          for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
626             scissors[i].offset.x = 0;
627             scissors[i].offset.y = 0;
628             scissors[i].extent.width = ctx->fb_state.width;
629             scissors[i].extent.height = ctx->fb_state.height;
630          }
631       }
632       if (HAS_DYNAMIC_STATE)
633          VKCTX(CmdSetScissorWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
634       else
635          VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
636    }
637    ctx->vp_state_changed = false;
638    ctx->scissor_changed = false;
639 
640    if (BATCH_CHANGED || ctx->stencil_ref_changed) {
641       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
642                                ctx->stencil_ref.ref_value[0]);
643       VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
644                                ctx->stencil_ref.ref_value[1]);
645       ctx->stencil_ref_changed = false;
646    }
647 
648    if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
649       VKCTX(CmdSetDepthBoundsTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
650       if (dsa_state->hw_state.depth_bounds_test)
651          VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
652                              dsa_state->hw_state.min_depth_bounds,
653                              dsa_state->hw_state.max_depth_bounds);
654       VKCTX(CmdSetDepthTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
655       if (dsa_state->hw_state.depth_test)
656          VKCTX(CmdSetDepthCompareOpEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
657       VKCTX(CmdSetDepthWriteEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
658       VKCTX(CmdSetStencilTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
659       if (dsa_state->hw_state.stencil_test) {
660          VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
661                                        dsa_state->hw_state.stencil_front.failOp,
662                                        dsa_state->hw_state.stencil_front.passOp,
663                                        dsa_state->hw_state.stencil_front.depthFailOp,
664                                        dsa_state->hw_state.stencil_front.compareOp);
665          VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
666                                        dsa_state->hw_state.stencil_back.failOp,
667                                        dsa_state->hw_state.stencil_back.passOp,
668                                        dsa_state->hw_state.stencil_back.depthFailOp,
669                                        dsa_state->hw_state.stencil_back.compareOp);
670       }
671       if (dsa_state->base.stencil[0].enabled) {
672          if (dsa_state->base.stencil[1].enabled) {
673             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
674             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
675             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask);
676             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask);
677          } else {
678             VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
679             VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
680          }
681       }
682    }
683    ctx->dsa_state_changed = false;
684 
685    bool rast_state_changed = ctx->rast_state_changed;
686    if (HAS_DYNAMIC_STATE && (BATCH_CHANGED || rast_state_changed))
687       VKCTX(CmdSetFrontFaceEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.front_face);
688    if ((BATCH_CHANGED || rast_state_changed) &&
689        screen->info.have_EXT_line_rasterization && rast_state->base.line_stipple_enable)
690       VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
691 
692    if (BATCH_CHANGED || ctx->rast_state_changed || mode_changed) {
693       enum pipe_prim_type reduced_prim = ctx->last_vertex_stage->reduced_prim;
694       if (reduced_prim == PIPE_PRIM_MAX)
695          reduced_prim = u_reduced_prim(mode);
696 
697       bool depth_bias = false;
698       switch (reduced_prim) {
699       case PIPE_PRIM_POINTS:
700          depth_bias = rast_state->offset_point;
701          break;
702 
703       case PIPE_PRIM_LINES:
704          depth_bias = rast_state->offset_line;
705          break;
706 
707       case PIPE_PRIM_TRIANGLES:
708          depth_bias = rast_state->offset_tri;
709          break;
710 
711       default:
712          unreachable("unexpected reduced prim");
713       }
714 
715       if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
716          if (screen->info.feats.features.wideLines || rast_state->line_width == 1.0f)
717             VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
718          else
719             debug_printf("BUG: wide lines not supported, needs fallback!");
720       }
721       if (depth_bias)
722          VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
723       else
724          VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
725    }
726    ctx->rast_state_changed = false;
727 
728    if (HAS_DYNAMIC_STATE) {
729       if (ctx->sample_locations_changed) {
730          VkSampleLocationsInfoEXT loc;
731          zink_init_vk_sample_locations(ctx, &loc);
732          VKCTX(CmdSetSampleLocationsEXT)(batch->state->cmdbuf, &loc);
733       }
734       ctx->sample_locations_changed = false;
735    }
736 
737    if ((BATCH_CHANGED || ctx->blend_state_changed) &&
738        ctx->gfx_pipeline_state.blend_state->need_blend_constants) {
739       VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
740    }
741    ctx->blend_state_changed = false;
742 
743    if (BATCH_CHANGED || ctx->vertex_buffers_dirty)
744       zink_bind_vertex_buffers<HAS_DYNAMIC_STATE, HAS_VERTEX_INPUT>(batch, ctx);
745 
746    zink_query_update_gs_states(ctx);
747 
748    if (BATCH_CHANGED) {
749       ctx->pipeline_changed[0] = false;
750       zink_select_draw_vbo(ctx);
751    }
752 
753    if (HAS_DYNAMIC_STATE) {
754       update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
755       if (BATCH_CHANGED || mode_changed)
756          VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode));
757    }
758 
759    if (HAS_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
760       VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart);
761       ctx->primitive_restart = dinfo->primitive_restart;
762    }
763 
764    if (zink_program_has_descriptors(&ctx->curr_program->base))
765       screen->descriptors_update(ctx, false);
766 
767    if (ctx->di.any_bindless_dirty && ctx->curr_program->base.dd->bindless)
768       zink_descriptors_update_bindless(ctx);
769 
770    if (reads_basevertex) {
771       unsigned draw_mode_is_indexed = index_size > 0;
772       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
773                          offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
774                          &draw_mode_is_indexed);
775    }
776    if (ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL] && ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
777       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
778                          offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
779                          &ctx->tess_levels[0]);
780 
781    if (have_streamout) {
782       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
783          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
784          counter_buffers[i] = VK_NULL_HANDLE;
785          if (t) {
786             struct zink_resource *res = zink_resource(t->counter_buffer);
787             t->stride = ctx->last_vertex_stage->streamout.so_info.stride[i] * sizeof(uint32_t);
788             zink_batch_reference_resource_rw(batch, res, true);
789             if (t->counter_buffer_valid) {
790                counter_buffers[i] = res->obj->buffer;
791                counter_buffer_offsets[i] = t->counter_buffer_offset;
792             }
793          }
794       }
795       VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
796    }
797 
798    bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
799    work_count += num_draws;
800    if (index_size > 0) {
801       if (dindirect && dindirect->buffer) {
802          assert(num_draws == 1);
803          if (needs_drawid)
804             update_drawid(ctx, drawid_offset);
805          struct zink_resource *indirect = zink_resource(dindirect->buffer);
806          zink_batch_reference_resource_rw(batch, indirect, false);
807          if (dindirect->indirect_draw_count) {
808              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
809              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
810              VKCTX(CmdDrawIndexedIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
811                                                 indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
812                                                 dindirect->draw_count, dindirect->stride);
813          } else
814             VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
815       } else {
816          if (need_index_buffer_unref)
817             draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
818          else
819             draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
820       }
821    } else {
822       if (so_target && screen->info.tf_props.transformFeedbackDraw) {
823          if (needs_drawid)
824             update_drawid(ctx, drawid_offset);
825          zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
826          zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
827          VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
828                                        zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
829                                        MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
830       } else if (dindirect && dindirect->buffer) {
831          assert(num_draws == 1);
832          if (needs_drawid)
833             update_drawid(ctx, drawid_offset);
834          struct zink_resource *indirect = zink_resource(dindirect->buffer);
835          zink_batch_reference_resource_rw(batch, indirect, false);
836          if (dindirect->indirect_draw_count) {
837              struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
838              zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
839              VKCTX(CmdDrawIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
840                                            indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
841                                            dindirect->draw_count, dindirect->stride);
842          } else
843             VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
844       } else {
845          draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
846       }
847    }
848 
849    if (have_streamout) {
850       for (unsigned i = 0; i < ctx->num_so_targets; i++) {
851          struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
852          if (t) {
853             counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
854             counter_buffer_offsets[i] = t->counter_buffer_offset;
855             t->counter_buffer_valid = true;
856          }
857       }
858       VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
859    }
860    batch->has_work = true;
861    batch->last_was_compute = false;
862    ctx->batch.work_count = work_count;
863    /* flush if there's >100k draws */
864    if (unlikely(work_count >= 30000) || ctx->oom_flush)
865       pctx->flush(pctx, NULL, 0);
866 }
867 
868 template <bool BATCH_CHANGED>
869 static void
zink_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)870 zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
871 {
872    struct zink_context *ctx = zink_context(pctx);
873    struct zink_screen *screen = zink_screen(pctx->screen);
874    struct zink_batch *batch = &ctx->batch;
875 
876    update_barriers(ctx, true);
877    if (ctx->memory_barrier)
878       zink_flush_memory_barrier(ctx, true);
879 
880    if (zink_program_has_descriptors(&ctx->curr_compute->base))
881       screen->descriptors_update(ctx, true);
882    if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd->bindless)
883       zink_descriptors_update_bindless(ctx);
884 
885    zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info->block);
886    VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
887    VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
888                                                &ctx->compute_pipeline_state);
889 
890    if (BATCH_CHANGED) {
891       zink_update_descriptor_refs(ctx, true);
892       zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
893    }
894 
895    if (prev_pipeline != pipeline || BATCH_CHANGED)
896       VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
897    if (BATCH_CHANGED) {
898       ctx->pipeline_changed[1] = false;
899       zink_select_launch_grid(ctx);
900    }
901 
902    if (BITSET_TEST(ctx->compute_stage->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM))
903       VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_compute->base.layout, VK_SHADER_STAGE_COMPUTE_BIT,
904                          offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t),
905                          &info->work_dim);
906 
907    batch->work_count++;
908    zink_batch_no_rp(ctx);
909    if (info->indirect) {
910       /*
911          VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
912          part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
913          VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
914 
915          - Chapter 7. Synchronization and Cache Control
916        */
917       check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
918       VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
919       zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
920    } else
921       VKCTX(CmdDispatch)(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
922    batch->has_work = true;
923    batch->last_was_compute = true;
924    /* flush if there's >100k computes */
925    if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
926       pctx->flush(pctx, NULL, 0);
927 }
928 
929 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
930           zink_dynamic_vertex_input HAS_VERTEX_INPUT, bool BATCH_CHANGED>
931 static void
init_batch_changed_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])932 init_batch_changed_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
933 {
934    draw_vbo_array[HAS_MULTIDRAW][HAS_DYNAMIC_STATE][HAS_DYNAMIC_STATE2][HAS_VERTEX_INPUT][BATCH_CHANGED] =
935    zink_draw_vbo<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, BATCH_CHANGED>;
936 }
937 
938 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2,
939           zink_dynamic_vertex_input HAS_VERTEX_INPUT>
940 static void
init_vertex_input_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])941 init_vertex_input_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
942 {
943    init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, false>(ctx, draw_vbo_array);
944    init_batch_changed_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, HAS_VERTEX_INPUT, true>(ctx, draw_vbo_array);
945 }
946 
947 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE, zink_dynamic_state2 HAS_DYNAMIC_STATE2>
948 static void
init_dynamic_state2_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])949 init_dynamic_state2_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
950 {
951    init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_NO_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array);
952    init_vertex_input_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, HAS_DYNAMIC_STATE2, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array);
953 }
954 
955 template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state HAS_DYNAMIC_STATE>
956 static void
init_dynamic_state_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])957 init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
958 {
959    init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_NO_DYNAMIC_STATE2>(ctx, draw_vbo_array);
960    init_dynamic_state2_functions<HAS_MULTIDRAW, HAS_DYNAMIC_STATE, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array);
961 }
962 
963 template <zink_multidraw HAS_MULTIDRAW>
964 static void
init_multidraw_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])965 init_multidraw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
966 {
967    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array);
968    init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array);
969 }
970 
971 static void
init_all_draw_functions(struct zink_context * ctx,pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])972 init_all_draw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][2][2][2][2])
973 {
974    init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array);
975    init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array);
976 }
977 
978 template <bool BATCH_CHANGED>
979 static void
init_grid_batch_changed_functions(struct zink_context * ctx)980 init_grid_batch_changed_functions(struct zink_context *ctx)
981 {
982    ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid<BATCH_CHANGED>;
983 }
984 
985 static void
init_all_grid_functions(struct zink_context * ctx)986 init_all_grid_functions(struct zink_context *ctx)
987 {
988    init_grid_batch_changed_functions<false>(ctx);
989    init_grid_batch_changed_functions<true>(ctx);
990 }
991 
992 static void
zink_invalid_draw_vbo(struct pipe_context * pipe,const struct pipe_draw_info * dinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * dindirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)993 zink_invalid_draw_vbo(struct pipe_context *pipe,
994                       const struct pipe_draw_info *dinfo,
995                       unsigned drawid_offset,
996                       const struct pipe_draw_indirect_info *dindirect,
997                       const struct pipe_draw_start_count_bias *draws,
998                       unsigned num_draws)
999 {
1000    unreachable("vertex shader not bound");
1001 }
1002 
1003 static void
zink_invalid_launch_grid(struct pipe_context * pctx,const struct pipe_grid_info * info)1004 zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1005 {
1006    unreachable("compute shader not bound");
1007 }
1008 
1009 template <unsigned STAGE_MASK>
1010 static uint32_t
hash_gfx_program(const void * key)1011 hash_gfx_program(const void *key)
1012 {
1013    const struct zink_shader **shaders = (const struct zink_shader**)key;
1014    uint32_t base_hash = shaders[PIPE_SHADER_VERTEX]->hash ^ shaders[PIPE_SHADER_FRAGMENT]->hash;
1015    if (STAGE_MASK == 0) //VS+FS
1016       return base_hash;
1017    if (STAGE_MASK == 1) //VS+GS+FS
1018       return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash;
1019    /*VS+TCS+FS isn't a thing */
1020    /*VS+TCS+GS+FS isn't a thing */
1021    if (STAGE_MASK == 4) //VS+TES+FS
1022       return base_hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1023    if (STAGE_MASK == 5) //VS+TES+GS+FS
1024       return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1025    if (STAGE_MASK == 6) //VS+TCS+TES+FS
1026       return base_hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1027 
1028    /* all stages */
1029    return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1030 }
1031 
1032 template <unsigned STAGE_MASK>
1033 static bool
equals_gfx_program(const void * a,const void * b)1034 equals_gfx_program(const void *a, const void *b)
1035 {
1036    const void **sa = (const void**)a;
1037    const void **sb = (const void**)b;
1038    if (STAGE_MASK == 0) //VS+FS
1039       return !memcmp(a, b, sizeof(void*) * 2);
1040    if (STAGE_MASK == 1) //VS+GS+FS
1041       return !memcmp(a, b, sizeof(void*) * 3);
1042    /*VS+TCS+FS isn't a thing */
1043    /*VS+TCS+GS+FS isn't a thing */
1044    if (STAGE_MASK == 4) //VS+TES+FS
1045       return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 2);
1046    if (STAGE_MASK == 5) //VS+TES+GS+FS
1047       return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 3);
1048    if (STAGE_MASK == 6) //VS+TCS+TES+FS
1049       return !memcmp(&sa[PIPE_SHADER_TESS_CTRL], &sb[PIPE_SHADER_TESS_CTRL], sizeof(void*) * 2) &&
1050              !memcmp(a, b, sizeof(void*) * 2);
1051 
1052    /* all stages */
1053    return !memcmp(a, b, sizeof(void*) * ZINK_SHADER_COUNT);
1054 }
1055 
1056 extern "C"
1057 void
zink_init_draw_functions(struct zink_context * ctx,struct zink_screen * screen)1058 zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
1059 {
1060    pipe_draw_vbo_func draw_vbo_array[2][2][2][2] //multidraw, dynamic state, dynamic state2, dynamic vertex input,
1061                                     [2];   //batch changed
1062    init_all_draw_functions(ctx, draw_vbo_array);
1063    memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
1064                                         [screen->info.have_EXT_extended_dynamic_state]
1065                                         [screen->info.have_EXT_extended_dynamic_state2]
1066                                         [screen->info.have_EXT_vertex_input_dynamic_state],
1067                                         sizeof(ctx->draw_vbo));
1068 
1069    /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
1070     * initialization of callbacks in upper layers (such as u_threaded_context).
1071     */
1072    ctx->base.draw_vbo = zink_invalid_draw_vbo;
1073 
1074    _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
1075    _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
1076    _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>);
1077    _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>);
1078    _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>);
1079    _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
1080    _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
1081    _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
1082 }
1083 
1084 void
zink_init_grid_functions(struct zink_context * ctx)1085 zink_init_grid_functions(struct zink_context *ctx)
1086 {
1087    init_all_grid_functions(ctx);
1088    /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip
1089     * initialization of callbacks in upper layers (such as u_threaded_context).
1090     */
1091    ctx->base.launch_grid = zink_invalid_launch_grid;
1092 }
1093