1 /*
2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FREEDRENO_CONTEXT_H_
28 #define FREEDRENO_CONTEXT_H_
29 
30 #include "pipe/p_context.h"
31 #include "util/libsync.h"
32 #include "util/list.h"
33 #include "util/slab.h"
34 #include "util/u_blitter.h"
35 #include "util/u_string.h"
36 #include "util/u_threaded_context.h"
37 #include "util/perf/u_trace.h"
38 
39 #include "freedreno_autotune.h"
40 #include "freedreno_gmem.h"
41 #include "freedreno_perfetto.h"
42 #include "freedreno_screen.h"
43 #include "freedreno_util.h"
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE)
50 
51 struct fd_vertex_stateobj;
52 struct fd_batch;
53 
54 struct fd_texture_stateobj {
55    struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
56    unsigned num_textures;
57    unsigned valid_textures;
58    struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
59    unsigned num_samplers;
60    unsigned valid_samplers;
61 };
62 
63 struct fd_program_stateobj {
64    void *vs, *hs, *ds, *gs, *fs;
65 };
66 
67 struct fd_constbuf_stateobj {
68    struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
69    uint32_t enabled_mask;
70 };
71 
72 struct fd_shaderbuf_stateobj {
73    struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS];
74    uint32_t enabled_mask;
75    uint32_t writable_mask;
76 };
77 
78 struct fd_shaderimg_stateobj {
79    struct pipe_image_view si[PIPE_MAX_SHADER_IMAGES];
80    uint32_t enabled_mask;
81 };
82 
83 struct fd_vertexbuf_stateobj {
84    struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
85    unsigned count;
86    uint32_t enabled_mask;
87 };
88 
89 struct fd_vertex_stateobj {
90    struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
91    unsigned num_elements;
92 };
93 
94 struct fd_stream_output_target {
95    struct pipe_stream_output_target base;
96    struct pipe_resource *offset_buf;
97    /* stride of the last stream out recorded to this target, for
98     * glDrawTransformFeedback(). */
99    uint32_t stride;
100 };
101 
102 struct fd_streamout_stateobj {
103    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
104    /* Bitmask of stream that should be reset. */
105    unsigned reset;
106 
107    unsigned num_targets;
108    /* Track offset from vtxcnt for streamout data.  This counter
109     * is just incremented by # of vertices on each draw until
110     * reset or new streamout buffer bound.
111     *
112     * When we eventually have GS, the CPU won't actually know the
113     * number of vertices per draw, so I think we'll have to do
114     * something more clever.
115     */
116    unsigned offsets[PIPE_MAX_SO_BUFFERS];
117 
118    /* Pre-a6xx, the maximum number of vertices that could be recorded to this
119     * set of targets with the current vertex shader.  a6xx and newer, hardware
120     * queries are used.
121     */
122    unsigned max_tf_vtx;
123 
124    /* Pre-a6xx, the number of verts written to the buffers since the last
125     * Begin.  Used for overflow checking for SW queries.
126     */
127    unsigned verts_written;
128 };
129 
130 #define MAX_GLOBAL_BUFFERS 16
131 struct fd_global_bindings_stateobj {
132    struct pipe_resource *buf[MAX_GLOBAL_BUFFERS];
133    uint32_t enabled_mask;
134 };
135 
136 /* group together the vertex and vertexbuf state.. for ease of passing
137  * around, and because various internal operations (gmem<->mem, etc)
138  * need their own vertex state:
139  */
140 struct fd_vertex_state {
141    struct fd_vertex_stateobj *vtx;
142    struct fd_vertexbuf_stateobj vertexbuf;
143 };
144 
145 /* global 3d pipeline dirty state: */
146 enum fd_dirty_3d_state {
147    FD_DIRTY_BLEND = BIT(0),
148    FD_DIRTY_RASTERIZER = BIT(1),
149    FD_DIRTY_ZSA = BIT(2),
150    FD_DIRTY_BLEND_COLOR = BIT(3),
151    FD_DIRTY_STENCIL_REF = BIT(4),
152    FD_DIRTY_SAMPLE_MASK = BIT(5),
153    FD_DIRTY_FRAMEBUFFER = BIT(6),
154    FD_DIRTY_STIPPLE = BIT(7),
155    FD_DIRTY_VIEWPORT = BIT(8),
156    FD_DIRTY_VTXSTATE = BIT(9),
157    FD_DIRTY_VTXBUF = BIT(10),
158    FD_DIRTY_MIN_SAMPLES = BIT(11),
159    FD_DIRTY_SCISSOR = BIT(12),
160    FD_DIRTY_STREAMOUT = BIT(13),
161    FD_DIRTY_UCP = BIT(14),
162    FD_DIRTY_PROG = BIT(15),
163    FD_DIRTY_CONST = BIT(16),
164    FD_DIRTY_TEX = BIT(17),
165    FD_DIRTY_IMAGE = BIT(18),
166    FD_DIRTY_SSBO = BIT(19),
167 
168    /* only used by a2xx.. possibly can be removed.. */
169    FD_DIRTY_TEXSTATE = BIT(20),
170 
171    /* fine grained state changes, for cases where state is not orthogonal
172     * from hw perspective:
173     */
174    FD_DIRTY_RASTERIZER_DISCARD = BIT(24),
175    FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE = BIT(25),
176    FD_DIRTY_BLEND_DUAL = BIT(26),
177 #define NUM_DIRTY_BITS 27
178 
179    /* additional flag for state requires updated resource tracking: */
180    FD_DIRTY_RESOURCE = BIT(31),
181 };
182 
183 /* per shader-stage dirty state: */
184 enum fd_dirty_shader_state {
185    FD_DIRTY_SHADER_PROG = BIT(0),
186    FD_DIRTY_SHADER_CONST = BIT(1),
187    FD_DIRTY_SHADER_TEX = BIT(2),
188    FD_DIRTY_SHADER_SSBO = BIT(3),
189    FD_DIRTY_SHADER_IMAGE = BIT(4),
190 #define NUM_DIRTY_SHADER_BITS 5
191 };
192 
193 #define MAX_HW_SAMPLE_PROVIDERS 7
194 struct fd_hw_sample_provider;
195 struct fd_hw_sample;
196 
197 struct ir3_shader_key;
198 
199 struct fd_context {
200    struct pipe_context base;
201 
202    struct threaded_context *tc;
203 
204    struct list_head node; /* node in screen->context_list */
205 
206    /* We currently need to serialize emitting GMEM batches, because of
207     * VSC state access in the context.
208     *
209     * In practice this lock should not be contended, since pipe_context
210     * use should be single threaded.  But it is needed to protect the
211     * case, with batch reordering where a ctxB batch triggers flushing
212     * a ctxA batch
213     */
214    simple_mtx_t gmem_lock;
215 
216    struct fd_device *dev;
217    struct fd_screen *screen;
218    struct fd_pipe *pipe;
219 
220    struct blitter_context *blitter dt;
221    void *clear_rs_state[2] dt;
222 
223    /* slab for pipe_transfer allocations: */
224    struct slab_child_pool transfer_pool dt;
225    struct slab_child_pool transfer_pool_unsync; /* for threaded_context */
226 
227    struct fd_autotune autotune dt;
228 
229    /**
230     * query related state:
231     */
232    /*@{*/
233    /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
234    struct slab_mempool sample_pool dt;
235    struct slab_mempool sample_period_pool dt;
236 
237    /* sample-providers for hw queries: */
238    const struct fd_hw_sample_provider
239       *hw_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
240 
241    /* list of active queries: */
242    struct list_head hw_active_queries dt;
243 
244    /* sample-providers for accumulating hw queries: */
245    const struct fd_acc_sample_provider
246       *acc_sample_providers[MAX_HW_SAMPLE_PROVIDERS];
247 
248    /* list of active accumulating queries: */
249    struct list_head acc_active_queries dt;
250    /*@}*/
251 
252    uint8_t patch_vertices;
253 
254    /* Whether we need to recheck the active_queries list next
255     * fd_batch_update_queries().
256     */
257    bool update_active_queries dt;
258 
259    /* Current state of pctx->set_active_query_state() (i.e. "should drawing
260     * be counted against non-perfcounter queries")
261     */
262    bool active_queries dt;
263 
264    /* shaders used by clear, and gmem->mem blits: */
265    struct fd_program_stateobj solid_prog; // TODO move to screen?
266    struct fd_program_stateobj solid_layered_prog;
267 
268    /* shaders used by mem->gmem blits: */
269    struct fd_program_stateobj
270       blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
271    struct fd_program_stateobj blit_z, blit_zs;
272 
273    /* Stats/counters:
274     */
275    struct {
276       uint64_t prims_emitted;
277       uint64_t prims_generated;
278       uint64_t draw_calls;
279       uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw,
280          batch_restore;
281       uint64_t staging_uploads, shadow_uploads;
282       uint64_t vs_regs, hs_regs, ds_regs, gs_regs, fs_regs;
283    } stats dt;
284 
285    /* Counter for number of users who need sw counters (so we can
286     * skip collecting them when not needed)
287     */
288    unsigned stats_users;
289 
290    /* Current batch.. the rule here is that you can deref ctx->batch
291     * in codepaths from pipe_context entrypoints.  But not in code-
292     * paths from fd_batch_flush() (basically, the stuff that gets
293     * called from GMEM code), since in those code-paths the batch
294     * you care about is not necessarily the same as ctx->batch.
295     */
296    struct fd_batch *batch dt;
297 
298    /* NULL if there has been rendering since last flush.  Otherwise
299     * keeps a reference to the last fence so we can re-use it rather
300     * than having to flush no-op batch.
301     */
302    struct pipe_fence_handle *last_fence dt;
303 
304    /* Fence fd we are told to wait on via ->fence_server_sync() (or -1
305     * if none).  The in-fence is transferred over to the batch on the
306     * next draw/blit/grid.
307     *
308     * The reason for this extra complexity is that apps will typically
309     * do eglWaitSyncKHR()/etc at the beginning of the frame, before the
310     * first draw.  But mesa/st doesn't flush down framebuffer state
311     * change until we hit a draw, so at ->fence_server_sync() time, we
312     * don't yet have the correct batch.  If we created a batch at that
313     * point, it would be the wrong one, and we'd have to flush it pre-
314     * maturely, causing us to stall early in the frame where we could
315     * be building up cmdstream.
316     */
317    int in_fence_fd dt;
318 
319    /* track last known reset status globally and per-context to
320     * determine if more resets occurred since then.  If global reset
321     * count increases, it means some other context crashed.  If
322     * per-context reset count increases, it means we crashed the
323     * gpu.
324     *
325     * Only accessed by front-end thread, never accessed by TC driver
326     * thread.
327     */
328    uint32_t context_reset_count;
329    uint32_t global_reset_count;
330 
331    /* Context sequence #, used for batch-cache key: */
332    uint16_t seqno;
333 
334    /* Cost per draw, used in conjunction with samples-passed history to
335     * estimate whether GMEM or bypass is the better option.
336     */
337    uint8_t draw_cost;
338 
339    /* Are we in process of shadowing a resource? Used to detect recursion
340     * in transfer_map, and skip unneeded synchronization.
341     */
342    bool in_shadow : 1 dt;
343 
344    /* For catching recursion problems with blit fallback: */
345    bool in_blit : 1 dt;
346 
347    /* points to either scissor or disabled_scissor depending on rast state: */
348    struct pipe_scissor_state *current_scissor dt;
349 
350    struct pipe_scissor_state scissor dt;
351 
352    /* we don't have a disable/enable bit for scissor, so instead we keep
353     * a disabled-scissor state which matches the entire bound framebuffer
354     * and use that when scissor is not enabled.
355     */
356    struct pipe_scissor_state disabled_scissor dt;
357 
358    /* Per vsc pipe bo's (a2xx-a5xx): */
359    struct fd_bo *vsc_pipe_bo[32] dt;
360 
361    /* Maps generic gallium oriented fd_dirty_3d_state bits to generation
362     * specific bitmask of state "groups".
363     */
364    uint32_t gen_dirty_map[NUM_DIRTY_BITS];
365    uint32_t gen_dirty_shader_map[PIPE_SHADER_TYPES][NUM_DIRTY_SHADER_BITS];
366 
367    /* Bitmask of all possible gen_dirty bits: */
368    uint32_t gen_all_dirty;
369 
370    /* Generation specific bitmask of dirty state groups: */
371    uint32_t gen_dirty;
372 
373    /* which state objects need to be re-emit'd: */
374    enum fd_dirty_3d_state dirty dt;
375 
376    /* per shader-stage dirty status: */
377    enum fd_dirty_shader_state dirty_shader[PIPE_SHADER_TYPES] dt;
378 
379    void *compute dt;
380    struct pipe_blend_state *blend dt;
381    struct pipe_rasterizer_state *rasterizer dt;
382    struct pipe_depth_stencil_alpha_state *zsa dt;
383 
384    struct fd_texture_stateobj tex[PIPE_SHADER_TYPES] dt;
385 
386    struct fd_program_stateobj prog dt;
387    uint32_t bound_shader_stages dt;
388 
389    struct fd_vertex_state vtx dt;
390 
391    struct pipe_blend_color blend_color dt;
392    struct pipe_stencil_ref stencil_ref dt;
393    unsigned sample_mask dt;
394    unsigned min_samples dt;
395    /* local context fb state, for when ctx->batch is null: */
396    struct pipe_framebuffer_state framebuffer dt;
397    struct pipe_poly_stipple stipple dt;
398    struct pipe_viewport_state viewport dt;
399    struct pipe_scissor_state viewport_scissor dt;
400    struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES] dt;
401    struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES] dt;
402    struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES] dt;
403    struct fd_streamout_stateobj streamout dt;
404    struct fd_global_bindings_stateobj global_bindings dt;
405    struct pipe_clip_state ucp dt;
406 
407    struct pipe_query *cond_query dt;
408    bool cond_cond dt; /* inverted rendering condition */
409    uint cond_mode dt;
410 
411    /* Private memory is a memory space where each fiber gets its own piece of
412     * memory, in addition to registers. It is backed by a buffer which needs
413     * to be large enough to hold the contents of every possible wavefront in
414     * every core of the GPU. Because it allocates space via the internal
415     * wavefront ID which is shared between all currently executing shaders,
416     * the same buffer can be reused by all shaders, as long as all shaders
417     * sharing the same buffer use the exact same configuration. There are two
418     * inputs to the configuration, the amount of per-fiber space and whether
419     * to use the newer per-wave or older per-fiber layout. We only ever
420     * increase the size, and shaders with a smaller size requirement simply
421     * use the larger existing buffer, so that we only need to keep track of
422     * one buffer and its size, but we still need to keep track of per-fiber
423     * and per-wave buffers separately so that we never use the same buffer
424     * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
425     * per-wave.
426     */
427    struct {
428       struct fd_bo *bo;
429       uint32_t per_fiber_size;
430    } pvtmem[2] dt;
431 
432    /* maps per-shader-stage state plus variant key to hw
433     * program stateobj:
434     */
435    struct ir3_cache *shader_cache;
436 
437    struct pipe_debug_callback debug;
438 
439    struct u_trace_context trace_context dt;
440 
441 #ifdef HAVE_PERFETTO
442    struct fd_perfetto_state perfetto;
443 #endif
444 
445    /*
446     * Counter to generate submit-ids
447     */
448    uint32_t submit_count;
449 
450    /* Called on rebind_resource() for any per-gen cleanup required: */
451    void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
452 
453    /* GMEM/tile handling fxns: */
454    void (*emit_tile_init)(struct fd_batch *batch) dt;
455    void (*emit_tile_prep)(struct fd_batch *batch,
456                           const struct fd_tile *tile) dt;
457    void (*emit_tile_mem2gmem)(struct fd_batch *batch,
458                               const struct fd_tile *tile) dt;
459    void (*emit_tile_renderprep)(struct fd_batch *batch,
460                                 const struct fd_tile *tile) dt;
461    void (*emit_tile)(struct fd_batch *batch, const struct fd_tile *tile) dt;
462    void (*emit_tile_gmem2mem)(struct fd_batch *batch,
463                               const struct fd_tile *tile) dt;
464    void (*emit_tile_fini)(struct fd_batch *batch) dt; /* optional */
465 
466    /* optional, for GMEM bypass: */
467    void (*emit_sysmem_prep)(struct fd_batch *batch) dt;
468    void (*emit_sysmem_fini)(struct fd_batch *batch) dt;
469 
470    /* draw: */
471    bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
472 			unsigned drawid_offset,
473                     const struct pipe_draw_indirect_info *indirect,
474 			const struct pipe_draw_start_count_bias *draw,
475                     unsigned index_offset) dt;
476    bool (*clear)(struct fd_context *ctx, unsigned buffers,
477                  const union pipe_color_union *color, double depth,
478                  unsigned stencil) dt;
479 
480    /* compute: */
481    void (*launch_grid)(struct fd_context *ctx,
482                        const struct pipe_grid_info *info) dt;
483 
484    /* query: */
485    struct fd_query *(*create_query)(struct fd_context *ctx, unsigned query_type,
486                                     unsigned index);
487    void (*query_prepare)(struct fd_batch *batch, uint32_t num_tiles) dt;
488    void (*query_prepare_tile)(struct fd_batch *batch, uint32_t n,
489                               struct fd_ringbuffer *ring) dt;
490    void (*query_update_batch)(struct fd_batch *batch, bool disable_all) dt;
491 
492    /* blitter: */
493    bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info) dt;
494    void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc) dt;
495 
496    /* uncompress resource, if necessary, to use as the specified format: */
497    void (*validate_format)(struct fd_context *ctx, struct fd_resource *rsc,
498                            enum pipe_format format) dt;
499 
500    /* handling for barriers: */
501    void (*framebuffer_barrier)(struct fd_context *ctx) dt;
502 
503    /* logger: */
504    void (*record_timestamp)(struct fd_ringbuffer *ring, struct fd_bo *bo,
505                             unsigned offset);
506    uint64_t (*ts_to_ns)(uint64_t ts);
507 
508    /*
509     * Common pre-cooked VBO state (used for a3xx and later):
510     */
511 
512    /* for clear/gmem->mem vertices, and mem->gmem */
513    struct pipe_resource *solid_vbuf;
514 
515    /* for mem->gmem tex coords: */
516    struct pipe_resource *blit_texcoord_vbuf;
517 
518    /* vertex state for solid_vbuf:
519     *    - solid_vbuf / 12 / R32G32B32_FLOAT
520     */
521    struct fd_vertex_state solid_vbuf_state;
522 
523    /* vertex state for blit_prog:
524     *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
525     *    - solid_vbuf / 12 / R32G32B32_FLOAT
526     */
527    struct fd_vertex_state blit_vbuf_state;
528 
529    /*
530     * Info about state of previous draw, for state that comes from
531     * pipe_draw_info (ie. not part of a CSO).  This allows us to
532     * skip some register emit when the state doesn't change from
533     * draw-to-draw
534     */
535    struct {
536       bool dirty; /* last draw state unknown */
537       bool primitive_restart;
538       uint32_t index_start;
539       uint32_t instance_start;
540       uint32_t restart_index;
541       uint32_t streamout_mask;
542 
543       /* some state changes require a different shader variant.  Keep
544        * track of this so we know when we need to re-emit shader state
545        * due to variant change.  See ir3_fixup_shader_state()
546        *
547        * (used for a3xx+, NULL otherwise)
548        */
549       struct ir3_shader_key *key;
550 
551    } last dt;
552 };
553 
554 static inline struct fd_context *
fd_context(struct pipe_context * pctx)555 fd_context(struct pipe_context *pctx)
556 {
557    return (struct fd_context *)pctx;
558 }
559 
560 static inline struct fd_stream_output_target *
fd_stream_output_target(struct pipe_stream_output_target * target)561 fd_stream_output_target(struct pipe_stream_output_target *target)
562 {
563    return (struct fd_stream_output_target *)target;
564 }
565 
566 /**
567  * Does the dirty state require resource tracking, ie. in general
568  * does it reference some resource.  There are some special cases:
569  *
570  * - FD_DIRTY_CONST can reference a resource, but cb0 is handled
571  *   specially as if it is not a user-buffer, we expect it to be
572  *   coming from const_uploader, so we can make some assumptions
573  *   that future transfer_map will be UNSYNCRONIZED
574  * - FD_DIRTY_ZSA controls how the framebuffer is accessed
575  * - FD_DIRTY_BLEND needs to update GMEM reason
576  *
577  * TODO if we can make assumptions that framebuffer state is bound
578  * first, before blend/zsa/etc state we can move some of the ZSA/
579  * BLEND state handling from draw time to bind time.  I think this
580  * is true of mesa/st, perhaps we can just document it to be a
581  * frontend requirement?
582  */
583 static inline bool
fd_context_dirty_resource(enum fd_dirty_3d_state dirty)584 fd_context_dirty_resource(enum fd_dirty_3d_state dirty)
585 {
586    return dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_ZSA | FD_DIRTY_BLEND |
587                    FD_DIRTY_SSBO | FD_DIRTY_IMAGE | FD_DIRTY_VTXBUF |
588                    FD_DIRTY_TEX | FD_DIRTY_STREAMOUT);
589 }
590 
591 #ifdef __cplusplus
592 #define or_dirty(d, mask)                                                      \
593    do {                                                                        \
594       decltype(mask) _d = (d);                                                 \
595       d = (decltype(mask))(_d | (mask));                                       \
596    } while (0)
597 #else
598 #define or_dirty(d, mask)                                                      \
599    do {                                                                        \
600       d |= (mask);                                                             \
601    } while (0)
602 #endif
603 
604 /* Mark specified non-shader-stage related state as dirty: */
605 static inline void
fd_context_dirty(struct fd_context * ctx,enum fd_dirty_3d_state dirty)606 fd_context_dirty(struct fd_context *ctx, enum fd_dirty_3d_state dirty) assert_dt
607 {
608    assert(util_is_power_of_two_nonzero(dirty));
609    STATIC_ASSERT(ffs(dirty) <= ARRAY_SIZE(ctx->gen_dirty_map));
610 
611    ctx->gen_dirty |= ctx->gen_dirty_map[ffs(dirty) - 1];
612 
613    if (fd_context_dirty_resource(dirty))
614       or_dirty(dirty, FD_DIRTY_RESOURCE);
615 
616    or_dirty(ctx->dirty, dirty);
617 }
618 
619 static inline void
fd_context_dirty_shader(struct fd_context * ctx,enum pipe_shader_type shader,enum fd_dirty_shader_state dirty)620 fd_context_dirty_shader(struct fd_context *ctx, enum pipe_shader_type shader,
621                         enum fd_dirty_shader_state dirty) assert_dt
622 {
623    const enum fd_dirty_3d_state map[] = {
624       FD_DIRTY_PROG, FD_DIRTY_CONST, FD_DIRTY_TEX,
625       FD_DIRTY_SSBO, FD_DIRTY_IMAGE,
626    };
627 
628    /* Need to update the table above if these shift: */
629    STATIC_ASSERT(FD_DIRTY_SHADER_PROG == BIT(0));
630    STATIC_ASSERT(FD_DIRTY_SHADER_CONST == BIT(1));
631    STATIC_ASSERT(FD_DIRTY_SHADER_TEX == BIT(2));
632    STATIC_ASSERT(FD_DIRTY_SHADER_SSBO == BIT(3));
633    STATIC_ASSERT(FD_DIRTY_SHADER_IMAGE == BIT(4));
634 
635    assert(util_is_power_of_two_nonzero(dirty));
636    assert(ffs(dirty) <= ARRAY_SIZE(map));
637 
638    ctx->gen_dirty |= ctx->gen_dirty_shader_map[shader][ffs(dirty) - 1];
639 
640    or_dirty(ctx->dirty_shader[shader], dirty);
641    fd_context_dirty(ctx, map[ffs(dirty) - 1]);
642 }
643 
644 /* mark all state dirty: */
645 static inline void
fd_context_all_dirty(struct fd_context * ctx)646 fd_context_all_dirty(struct fd_context *ctx) assert_dt
647 {
648    ctx->last.dirty = true;
649    ctx->dirty = (enum fd_dirty_3d_state) ~0;
650 
651    /* NOTE: don't use ~0 for gen_dirty, because the gen specific
652     * emit code will loop over all the bits:
653     */
654    ctx->gen_dirty = ctx->gen_all_dirty;
655 
656    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
657       ctx->dirty_shader[i] = (enum fd_dirty_shader_state) ~0;
658 }
659 
660 static inline void
fd_context_all_clean(struct fd_context * ctx)661 fd_context_all_clean(struct fd_context *ctx) assert_dt
662 {
663    ctx->last.dirty = false;
664    ctx->dirty = (enum fd_dirty_3d_state)0;
665    ctx->gen_dirty = 0;
666    for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) {
667       /* don't mark compute state as clean, since it is not emitted
668        * during normal draw call.  The places that call _all_dirty(),
669        * it is safe to mark compute state dirty as well, but the
670        * inverse is not true.
671        */
672       if (i == PIPE_SHADER_COMPUTE)
673          continue;
674       ctx->dirty_shader[i] = (enum fd_dirty_shader_state)0;
675    }
676 }
677 
678 /**
679  * Add mapping between global dirty bit and generation specific dirty
680  * bit.
681  */
682 static inline void
fd_context_add_map(struct fd_context * ctx,enum fd_dirty_3d_state dirty,uint32_t gen_dirty)683 fd_context_add_map(struct fd_context *ctx, enum fd_dirty_3d_state dirty,
684                    uint32_t gen_dirty)
685 {
686    u_foreach_bit (b, dirty) {
687       ctx->gen_dirty_map[b] |= gen_dirty;
688    }
689    ctx->gen_all_dirty |= gen_dirty;
690 }
691 
692 /**
693  * Add mapping between shader stage specific dirty bit and generation
694  * specific dirty bit
695  */
696 static inline void
fd_context_add_shader_map(struct fd_context * ctx,enum pipe_shader_type shader,enum fd_dirty_shader_state dirty,uint32_t gen_dirty)697 fd_context_add_shader_map(struct fd_context *ctx, enum pipe_shader_type shader,
698                           enum fd_dirty_shader_state dirty, uint32_t gen_dirty)
699 {
700    u_foreach_bit (b, dirty) {
701       ctx->gen_dirty_shader_map[shader][b] |= gen_dirty;
702    }
703    ctx->gen_all_dirty |= gen_dirty;
704 }
705 
706 static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context * ctx)707 fd_context_get_scissor(struct fd_context *ctx) assert_dt
708 {
709    return ctx->current_scissor;
710 }
711 
712 void fd_context_switch_from(struct fd_context *ctx) assert_dt;
713 void fd_context_switch_to(struct fd_context *ctx,
714                           struct fd_batch *batch) assert_dt;
715 struct fd_batch *fd_context_batch(struct fd_context *ctx) assert_dt;
716 struct fd_batch *fd_context_batch_locked(struct fd_context *ctx) assert_dt;
717 
718 void fd_context_setup_common_vbos(struct fd_context *ctx);
719 void fd_context_cleanup_common_vbos(struct fd_context *ctx);
720 void fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len);
721 void fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len);
722 
723 struct pipe_context *fd_context_init(struct fd_context *ctx,
724                                      struct pipe_screen *pscreen,
725                                      void *priv, unsigned flags);
726 struct pipe_context *fd_context_init_tc(struct pipe_context *pctx,
727                                         unsigned flags);
728 
729 void fd_context_destroy(struct pipe_context *pctx) assert_dt;
730 
731 #ifdef __cplusplus
732 }
733 #endif
734 
735 #endif /* FREEDRENO_CONTEXT_H_ */
736