1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "compiler/shader_info.h"
36 
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42 
43 #if TC_DEBUG >= 2
44 #define tc_printf mesa_logi
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52 
53 #define TC_SENTINEL 0x5ca1ab1e
54 
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59    TC_NUM_CALLS,
60 };
61 
62 #if TC_DEBUG >= 3
63 static const char *tc_call_names[] = {
64 #define CALL(name) #name,
65 #include "u_threaded_context_calls.h"
66 #undef CALL
67 };
68 #endif
69 
70 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71 
72 static const tc_execute execute_func[TC_NUM_CALLS];
73 
74 static void
75 tc_buffer_subdata(struct pipe_context *_pipe,
76                   struct pipe_resource *resource,
77                   unsigned usage, unsigned offset,
78                   unsigned size, const void *data);
79 
80 static void
tc_batch_check(UNUSED struct tc_batch * batch)81 tc_batch_check(UNUSED struct tc_batch *batch)
82 {
83    tc_assert(batch->sentinel == TC_SENTINEL);
84    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
85 }
86 
87 static void
tc_debug_check(struct threaded_context * tc)88 tc_debug_check(struct threaded_context *tc)
89 {
90    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
91       tc_batch_check(&tc->batch_slots[i]);
92       tc_assert(tc->batch_slots[i].tc == tc);
93    }
94 }
95 
96 static void
tc_set_driver_thread(struct threaded_context * tc)97 tc_set_driver_thread(struct threaded_context *tc)
98 {
99 #ifndef NDEBUG
100    tc->driver_thread = util_get_thread_id();
101 #endif
102 }
103 
104 static void
tc_clear_driver_thread(struct threaded_context * tc)105 tc_clear_driver_thread(struct threaded_context *tc)
106 {
107 #ifndef NDEBUG
108    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
109 #endif
110 }
111 
112 static void *
to_call_check(void * ptr,unsigned num_slots)113 to_call_check(void *ptr, unsigned num_slots)
114 {
115 #if TC_DEBUG >= 1
116    struct tc_call_base *call = ptr;
117    tc_assert(call->num_slots == num_slots);
118 #endif
119    return ptr;
120 }
121 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
122 
123 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
124 #define call_size(type)          size_to_slots(sizeof(struct type))
125 #define call_size_with_slots(type, num_slots) size_to_slots( \
126    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
127 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
128 
129 /* Assign src to dst while dst is uninitialized. */
130 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)131 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
132 {
133    *dst = src;
134    pipe_reference(NULL, &src->reference); /* only increment refcount */
135 }
136 
137 /* Assign src to dst while dst is uninitialized. */
138 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)139 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
140                               struct pipe_vertex_state *src)
141 {
142    *dst = src;
143    pipe_reference(NULL, &src->reference); /* only increment refcount */
144 }
145 
146 /* Unreference dst but don't touch the dst pointer. */
147 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)148 tc_drop_resource_reference(struct pipe_resource *dst)
149 {
150    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
151       pipe_resource_destroy(dst);
152 }
153 
154 /* Unreference dst but don't touch the dst pointer. */
155 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)156 tc_drop_surface_reference(struct pipe_surface *dst)
157 {
158    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
159       dst->context->surface_destroy(dst->context, dst);
160 }
161 
162 /* Unreference dst but don't touch the dst pointer. */
163 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)164 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
165 {
166    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
167       dst->context->stream_output_target_destroy(dst->context, dst);
168 }
169 
170 /**
171  * Subtract the given number of references.
172  */
173 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)174 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
175 {
176    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
177 
178    assert(count >= 0);
179    /* Underflows shouldn't happen, but let's be safe. */
180    if (count <= 0)
181       dst->screen->vertex_state_destroy(dst->screen, dst);
182 }
183 
184 /* We don't want to read or write min_index and max_index, because
185  * it shouldn't be needed by drivers at this point.
186  */
187 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
188    offsetof(struct pipe_draw_info, min_index)
189 
190 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)191 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
192 {
193    struct tc_batch *batch = job;
194    struct pipe_context *pipe = batch->tc->pipe;
195    uint64_t *last = &batch->slots[batch->num_total_slots];
196 
197    tc_batch_check(batch);
198    tc_set_driver_thread(batch->tc);
199 
200    assert(!batch->token);
201 
202    for (uint64_t *iter = batch->slots; iter != last;) {
203       struct tc_call_base *call = (struct tc_call_base *)iter;
204 
205       tc_assert(call->sentinel == TC_SENTINEL);
206 
207 #if TC_DEBUG >= 3
208       tc_printf("CALL: %s", tc_call_names[call->call_id]);
209 #endif
210 
211       iter += execute_func[call->call_id](pipe, call, last);
212    }
213 
214    /* Add the fence to the list of fences for the driver to signal at the next
215     * flush, which we use for tracking which buffers are referenced by
216     * an unflushed command buffer.
217     */
218    struct threaded_context *tc = batch->tc;
219    struct util_queue_fence *fence =
220       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
221 
222    if (tc->options.driver_calls_flush_notify) {
223       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
224 
225       /* Since our buffer lists are chained as a ring, we need to flush
226        * the context twice as we go around the ring to make the driver signal
227        * the buffer list fences, so that the producer thread can reuse the buffer
228        * list structures for the next batches without waiting.
229        */
230       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
231       if (batch->buffer_list_index % half_ring == half_ring - 1)
232          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
233    } else {
234       util_queue_fence_signal(fence);
235    }
236 
237    tc_clear_driver_thread(batch->tc);
238    tc_batch_check(batch);
239    batch->num_total_slots = 0;
240 }
241 
242 static void
tc_begin_next_buffer_list(struct threaded_context * tc)243 tc_begin_next_buffer_list(struct threaded_context *tc)
244 {
245    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
246 
247    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
248 
249    /* Clear the buffer list in the new empty batch. */
250    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
251    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
252    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
253    BITSET_ZERO(buf_list->buffer_list);
254 
255    tc->add_all_gfx_bindings_to_buffer_list = true;
256    tc->add_all_compute_bindings_to_buffer_list = true;
257 }
258 
259 static void
tc_batch_flush(struct threaded_context * tc)260 tc_batch_flush(struct threaded_context *tc)
261 {
262    struct tc_batch *next = &tc->batch_slots[tc->next];
263 
264    tc_assert(next->num_total_slots != 0);
265    tc_batch_check(next);
266    tc_debug_check(tc);
267    tc->bytes_mapped_estimate = 0;
268    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
269 
270    if (next->token) {
271       next->token->tc = NULL;
272       tc_unflushed_batch_token_reference(&next->token, NULL);
273    }
274 
275    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
276                       NULL, 0);
277    tc->last = tc->next;
278    tc->next = (tc->next + 1) % TC_MAX_BATCHES;
279    tc_begin_next_buffer_list(tc);
280 }
281 
282 /* This is the function that adds variable-sized calls into the current
283  * batch. It also flushes the batch if there is not enough space there.
284  * All other higher-level "add" functions use it.
285  */
286 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)287 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
288                   unsigned num_slots)
289 {
290    struct tc_batch *next = &tc->batch_slots[tc->next];
291    assert(num_slots <= TC_SLOTS_PER_BATCH);
292    tc_debug_check(tc);
293 
294    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
295       tc_batch_flush(tc);
296       next = &tc->batch_slots[tc->next];
297       tc_assert(next->num_total_slots == 0);
298    }
299 
300    tc_assert(util_queue_fence_is_signalled(&next->fence));
301 
302    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
303    next->num_total_slots += num_slots;
304 
305 #if !defined(NDEBUG) && TC_DEBUG >= 1
306    call->sentinel = TC_SENTINEL;
307 #endif
308    call->call_id = id;
309    call->num_slots = num_slots;
310 
311 #if TC_DEBUG >= 3
312    tc_printf("ENQUEUE: %s", tc_call_names[id]);
313 #endif
314 
315    tc_debug_check(tc);
316    return call;
317 }
318 
319 #define tc_add_call(tc, execute, type) \
320    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
321 
322 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
323    ((struct type*)tc_add_sized_call(tc, execute, \
324                                     call_size_with_slots(type, num_slots)))
325 
326 static bool
tc_is_sync(struct threaded_context * tc)327 tc_is_sync(struct threaded_context *tc)
328 {
329    struct tc_batch *last = &tc->batch_slots[tc->last];
330    struct tc_batch *next = &tc->batch_slots[tc->next];
331 
332    return util_queue_fence_is_signalled(&last->fence) &&
333           !next->num_total_slots;
334 }
335 
336 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)337 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
338 {
339    struct tc_batch *last = &tc->batch_slots[tc->last];
340    struct tc_batch *next = &tc->batch_slots[tc->next];
341    bool synced = false;
342 
343    tc_debug_check(tc);
344 
345    /* Only wait for queued calls... */
346    if (!util_queue_fence_is_signalled(&last->fence)) {
347       util_queue_fence_wait(&last->fence);
348       synced = true;
349    }
350 
351    tc_debug_check(tc);
352 
353    if (next->token) {
354       next->token->tc = NULL;
355       tc_unflushed_batch_token_reference(&next->token, NULL);
356    }
357 
358    /* .. and execute unflushed calls directly. */
359    if (next->num_total_slots) {
360       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
361       tc->bytes_mapped_estimate = 0;
362       tc_batch_execute(next, NULL, 0);
363       tc_begin_next_buffer_list(tc);
364       synced = true;
365    }
366 
367    if (synced) {
368       p_atomic_inc(&tc->num_syncs);
369 
370       if (tc_strcmp(func, "tc_destroy") != 0) {
371          tc_printf("sync %s %s", func, info);
372 	  }
373    }
374 
375    tc_debug_check(tc);
376 }
377 
378 #define tc_sync(tc) _tc_sync(tc, "", __func__)
379 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
380 
381 /**
382  * Call this from fence_finish for same-context fence waits of deferred fences
383  * that haven't been flushed yet.
384  *
385  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
386  * i.e., the wrapped one.
387  */
388 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)389 threaded_context_flush(struct pipe_context *_pipe,
390                        struct tc_unflushed_batch_token *token,
391                        bool prefer_async)
392 {
393    struct threaded_context *tc = threaded_context(_pipe);
394 
395    /* This is called from the gallium frontend / application thread. */
396    if (token->tc && token->tc == tc) {
397       struct tc_batch *last = &tc->batch_slots[tc->last];
398 
399       /* Prefer to do the flush in the driver thread if it is already
400        * running. That should be better for cache locality.
401        */
402       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
403          tc_batch_flush(tc);
404       else
405          tc_sync(token->tc);
406    }
407 }
408 
409 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)410 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
411 {
412    uint32_t id = threaded_resource(buf)->buffer_id_unique;
413    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
414 }
415 
416 /* Set a buffer binding and add it to the buffer list. */
417 static void
tc_bind_buffer(uint32_t * binding,struct tc_buffer_list * next,struct pipe_resource * buf)418 tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
419 {
420    uint32_t id = threaded_resource(buf)->buffer_id_unique;
421    *binding = id;
422    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
423 }
424 
425 /* Reset a buffer binding. */
426 static void
tc_unbind_buffer(uint32_t * binding)427 tc_unbind_buffer(uint32_t *binding)
428 {
429    *binding = 0;
430 }
431 
432 /* Reset a range of buffer binding slots. */
433 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)434 tc_unbind_buffers(uint32_t *binding, unsigned count)
435 {
436    if (count)
437       memset(binding, 0, sizeof(*binding) * count);
438 }
439 
440 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)441 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
442                                unsigned count)
443 {
444    for (unsigned i = 0; i < count; i++) {
445       if (bindings[i])
446          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
447    }
448 }
449 
450 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)451 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
452                    unsigned count)
453 {
454    unsigned rebind_count = 0;
455 
456    for (unsigned i = 0; i < count; i++) {
457       if (bindings[i] == old_id) {
458          bindings[i] = new_id;
459          rebind_count++;
460       }
461    }
462    return rebind_count;
463 }
464 
465 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)466 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
467                                       BITSET_WORD *buffer_list,
468                                       enum pipe_shader_type shader)
469 {
470    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
471                                   tc->max_const_buffers);
472    if (tc->seen_shader_buffers[shader]) {
473       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
474                                      tc->max_shader_buffers);
475    }
476    if (tc->seen_image_buffers[shader]) {
477       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
478                                      tc->max_images);
479    }
480    if (tc->seen_sampler_buffers[shader]) {
481       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
482                                      tc->max_samplers);
483    }
484 }
485 
486 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)487 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
488                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
489 {
490    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
491 
492    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
493                             tc->max_const_buffers);
494    if (ubo)
495       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
496    if (tc->seen_shader_buffers[shader]) {
497       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
498                                 tc->max_shader_buffers);
499       if (ssbo)
500          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
501    }
502    if (tc->seen_image_buffers[shader]) {
503       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
504                                tc->max_images);
505       if (img)
506          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
507    }
508    if (tc->seen_sampler_buffers[shader]) {
509       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
510                                    tc->max_samplers);
511       if (sampler)
512          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
513    }
514    return ubo + ssbo + img + sampler;
515 }
516 
517 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
518  * This is called by the first draw call in a batch when we want to inherit
519  * all bindings set by the previous batch.
520  */
521 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)522 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
523 {
524    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
525 
526    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
527    if (tc->seen_streamout_buffers)
528       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
529 
530    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
531    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
532 
533    if (tc->seen_tcs)
534       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
535    if (tc->seen_tes)
536       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
537    if (tc->seen_gs)
538       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
539 
540    tc->add_all_gfx_bindings_to_buffer_list = false;
541 }
542 
543 /* Add all bound buffers used by compute to the buffer list.
544  * This is called by the first compute call in a batch when we want to inherit
545  * all bindings set by the previous batch.
546  */
547 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)548 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
549 {
550    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
551 
552    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
553    tc->add_all_compute_bindings_to_buffer_list = false;
554 }
555 
556 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)557 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
558 {
559    unsigned vbo = 0, so = 0;
560 
561    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
562                             tc->max_vertex_buffers);
563    if (vbo)
564       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
565 
566    if (tc->seen_streamout_buffers) {
567       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
568                               PIPE_MAX_SO_BUFFERS);
569       if (so)
570          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
571    }
572    unsigned rebound = vbo + so;
573 
574    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
575    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
576 
577    if (tc->seen_tcs)
578       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
579    if (tc->seen_tes)
580       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
581    if (tc->seen_gs)
582       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
583 
584    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
585 
586    if (rebound)
587       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
588    return rebound;
589 }
590 
591 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)592 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
593 {
594    while (binding_mask) {
595       if (bindings[u_bit_scan(&binding_mask)] == id)
596          return true;
597    }
598    return false;
599 }
600 
601 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)602 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
603                                     enum pipe_shader_type shader)
604 {
605    if (tc->seen_shader_buffers[shader] &&
606        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
607                                     tc->shader_buffers_writeable_mask[shader]))
608       return true;
609 
610    if (tc->seen_image_buffers[shader] &&
611        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
612                                     tc->image_buffers_writeable_mask[shader]))
613       return true;
614 
615    return false;
616 }
617 
618 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)619 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
620 {
621    if (tc->seen_streamout_buffers &&
622        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
623                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
624       return true;
625 
626    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
627        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
628        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
629       return true;
630 
631    if (tc->seen_tcs &&
632        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
633       return true;
634 
635    if (tc->seen_tes &&
636        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
637       return true;
638 
639    if (tc->seen_gs &&
640        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
641       return true;
642 
643    return false;
644 }
645 
646 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)647 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
648                   unsigned map_usage)
649 {
650    if (!tc->options.is_resource_busy)
651       return true;
652 
653    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
654 
655    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
656       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
657 
658       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
659        * then the buffer is considered busy. */
660       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
661           BITSET_TEST(buf_list->buffer_list, id_hash))
662          return true;
663    }
664 
665    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
666     * this buffer is busy or not. */
667    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
668 }
669 
670 /**
671  * allow_cpu_storage should be false for user memory and imported buffers.
672  */
673 void
threaded_resource_init(struct pipe_resource * res,bool allow_cpu_storage)674 threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
675 {
676    struct threaded_resource *tres = threaded_resource(res);
677 
678    tres->latest = &tres->b;
679    tres->cpu_storage = NULL;
680    util_range_init(&tres->valid_buffer_range);
681    tres->is_shared = false;
682    tres->is_user_ptr = false;
683    tres->buffer_id_unique = 0;
684    tres->pending_staging_uploads = 0;
685    util_range_init(&tres->pending_staging_uploads_range);
686 
687    if (allow_cpu_storage &&
688        !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
689                        PIPE_RESOURCE_FLAG_SPARSE |
690                        PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
691        /* We need buffer invalidation and buffer busyness tracking for the CPU
692         * storage, which aren't supported with pipe_vertex_state. */
693        !(res->bind & PIPE_BIND_VERTEX_STATE))
694       tres->allow_cpu_storage = true;
695    else
696       tres->allow_cpu_storage = false;
697 }
698 
699 void
threaded_resource_deinit(struct pipe_resource * res)700 threaded_resource_deinit(struct pipe_resource *res)
701 {
702    struct threaded_resource *tres = threaded_resource(res);
703 
704    if (tres->latest != &tres->b)
705            pipe_resource_reference(&tres->latest, NULL);
706    util_range_destroy(&tres->valid_buffer_range);
707    util_range_destroy(&tres->pending_staging_uploads_range);
708    align_free(tres->cpu_storage);
709 }
710 
711 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)712 threaded_context_unwrap_sync(struct pipe_context *pipe)
713 {
714    if (!pipe || !pipe->priv)
715       return pipe;
716 
717    tc_sync(threaded_context(pipe));
718    return (struct pipe_context*)pipe->priv;
719 }
720 
721 
722 /********************************************************************
723  * simple functions
724  */
725 
726 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
727    struct tc_call_##func { \
728       struct tc_call_base base; \
729       type state; \
730    }; \
731    \
732    static uint16_t \
733    tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
734    { \
735       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
736       return call_size(tc_call_##func); \
737    } \
738    \
739    static void \
740    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
741    { \
742       struct threaded_context *tc = threaded_context(_pipe); \
743       struct tc_call_##func *p = (struct tc_call_##func*) \
744                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
745       p->state = deref(param); \
746       __VA_ARGS__; \
747    }
748 
749 TC_FUNC1(set_active_query_state, , bool, , )
750 
751 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
752 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
753 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
754 TC_FUNC1(set_sample_mask, , unsigned, , )
755 TC_FUNC1(set_min_samples, , unsigned, , )
756 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
757 
758 TC_FUNC1(texture_barrier, , unsigned, , )
759 TC_FUNC1(memory_barrier, , unsigned, , )
760 TC_FUNC1(delete_texture_handle, , uint64_t, , )
761 TC_FUNC1(delete_image_handle, , uint64_t, , )
762 TC_FUNC1(set_frontend_noop, , bool, , )
763 
764 
765 /********************************************************************
766  * queries
767  */
768 
769 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)770 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
771                 unsigned index)
772 {
773    struct threaded_context *tc = threaded_context(_pipe);
774    struct pipe_context *pipe = tc->pipe;
775 
776    return pipe->create_query(pipe, query_type, index);
777 }
778 
779 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)780 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
781                       unsigned *query_types)
782 {
783    struct threaded_context *tc = threaded_context(_pipe);
784    struct pipe_context *pipe = tc->pipe;
785 
786    return pipe->create_batch_query(pipe, num_queries, query_types);
787 }
788 
789 struct tc_query_call {
790    struct tc_call_base base;
791    struct pipe_query *query;
792 };
793 
794 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call,uint64_t * last)795 tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
796 {
797    struct pipe_query *query = to_call(call, tc_query_call)->query;
798    struct threaded_query *tq = threaded_query(query);
799 
800    if (list_is_linked(&tq->head_unflushed))
801       list_del(&tq->head_unflushed);
802 
803    pipe->destroy_query(pipe, query);
804    return call_size(tc_query_call);
805 }
806 
807 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)808 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
809 {
810    struct threaded_context *tc = threaded_context(_pipe);
811 
812    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
813 }
814 
815 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call,uint64_t * last)816 tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
817 {
818    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
819    return call_size(tc_query_call);
820 }
821 
822 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)823 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
824 {
825    struct threaded_context *tc = threaded_context(_pipe);
826 
827    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
828    return true; /* we don't care about the return value for this call */
829 }
830 
831 struct tc_end_query_call {
832    struct tc_call_base base;
833    struct threaded_context *tc;
834    struct pipe_query *query;
835 };
836 
837 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call,uint64_t * last)838 tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
839 {
840    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
841    struct threaded_query *tq = threaded_query(p->query);
842 
843    if (!list_is_linked(&tq->head_unflushed))
844       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
845 
846    pipe->end_query(pipe, p->query);
847    return call_size(tc_end_query_call);
848 }
849 
850 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)851 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
852 {
853    struct threaded_context *tc = threaded_context(_pipe);
854    struct threaded_query *tq = threaded_query(query);
855    struct tc_end_query_call *call =
856       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
857 
858    call->tc = tc;
859    call->query = query;
860 
861    tq->flushed = false;
862 
863    return true; /* we don't care about the return value for this call */
864 }
865 
866 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)867 tc_get_query_result(struct pipe_context *_pipe,
868                     struct pipe_query *query, bool wait,
869                     union pipe_query_result *result)
870 {
871    struct threaded_context *tc = threaded_context(_pipe);
872    struct threaded_query *tq = threaded_query(query);
873    struct pipe_context *pipe = tc->pipe;
874    bool flushed = tq->flushed;
875 
876    if (!flushed) {
877       tc_sync_msg(tc, wait ? "wait" : "nowait");
878       tc_set_driver_thread(tc);
879    }
880 
881    bool success = pipe->get_query_result(pipe, query, wait, result);
882 
883    if (!flushed)
884       tc_clear_driver_thread(tc);
885 
886    if (success) {
887       tq->flushed = true;
888       if (list_is_linked(&tq->head_unflushed)) {
889          /* This is safe because it can only happen after we sync'd. */
890          list_del(&tq->head_unflushed);
891       }
892    }
893    return success;
894 }
895 
896 struct tc_query_result_resource {
897    struct tc_call_base base;
898    enum pipe_query_flags flags:8;
899    enum pipe_query_value_type result_type:8;
900    int8_t index; /* it can be -1 */
901    unsigned offset;
902    struct pipe_query *query;
903    struct pipe_resource *resource;
904 };
905 
906 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call,uint64_t * last)907 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
908 {
909    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
910 
911    pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
912                                    p->index, p->resource, p->offset);
913    tc_drop_resource_reference(p->resource);
914    return call_size(tc_query_result_resource);
915 }
916 
917 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)918 tc_get_query_result_resource(struct pipe_context *_pipe,
919                              struct pipe_query *query,
920                              enum pipe_query_flags flags,
921                              enum pipe_query_value_type result_type, int index,
922                              struct pipe_resource *resource, unsigned offset)
923 {
924    struct threaded_context *tc = threaded_context(_pipe);
925 
926    tc_buffer_disable_cpu_storage(resource);
927 
928    struct tc_query_result_resource *p =
929       tc_add_call(tc, TC_CALL_get_query_result_resource,
930                   tc_query_result_resource);
931    p->query = query;
932    p->flags = flags;
933    p->result_type = result_type;
934    p->index = index;
935    tc_set_resource_reference(&p->resource, resource);
936    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
937    p->offset = offset;
938 }
939 
940 struct tc_render_condition {
941    struct tc_call_base base;
942    bool condition;
943    unsigned mode;
944    struct pipe_query *query;
945 };
946 
947 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call,uint64_t * last)948 tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
949 {
950    struct tc_render_condition *p = to_call(call, tc_render_condition);
951    pipe->render_condition(pipe, p->query, p->condition, p->mode);
952    return call_size(tc_render_condition);
953 }
954 
955 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)956 tc_render_condition(struct pipe_context *_pipe,
957                     struct pipe_query *query, bool condition,
958                     enum pipe_render_cond_flag mode)
959 {
960    struct threaded_context *tc = threaded_context(_pipe);
961    struct tc_render_condition *p =
962       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
963 
964    p->query = query;
965    p->condition = condition;
966    p->mode = mode;
967 }
968 
969 
970 /********************************************************************
971  * constant (immutable) states
972  */
973 
974 #define TC_CSO_CREATE(name, sname) \
975    static void * \
976    tc_create_##name##_state(struct pipe_context *_pipe, \
977                             const struct pipe_##sname##_state *state) \
978    { \
979       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
980       return pipe->create_##name##_state(pipe, state); \
981    }
982 
983 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
984 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
985 
986 #define TC_CSO(name, sname, ...) \
987    TC_CSO_CREATE(name, sname) \
988    TC_CSO_BIND(name, ##__VA_ARGS__) \
989    TC_CSO_DELETE(name)
990 
991 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
992 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
993 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
994 
995 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)996 TC_CSO_WHOLE(rasterizer)
997 TC_CSO_WHOLE(depth_stencil_alpha)
998 TC_CSO_WHOLE(compute)
999 TC_CSO_SHADER(fs)
1000 TC_CSO_SHADER(vs)
1001 TC_CSO_SHADER_TRACK(gs)
1002 TC_CSO_SHADER_TRACK(tcs)
1003 TC_CSO_SHADER_TRACK(tes)
1004 TC_CSO_CREATE(sampler, sampler)
1005 TC_CSO_DELETE(sampler)
1006 TC_CSO_BIND(vertex_elements)
1007 TC_CSO_DELETE(vertex_elements)
1008 
1009 static void *
1010 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1011                                 const struct pipe_vertex_element *elems)
1012 {
1013    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1014 
1015    return pipe->create_vertex_elements_state(pipe, count, elems);
1016 }
1017 
1018 struct tc_sampler_states {
1019    struct tc_call_base base;
1020    ubyte shader, start, count;
1021    void *slot[0]; /* more will be allocated if needed */
1022 };
1023 
1024 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call,uint64_t * last)1025 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1026 {
1027    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1028 
1029    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1030    return p->base.num_slots;
1031 }
1032 
1033 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1034 tc_bind_sampler_states(struct pipe_context *_pipe,
1035                        enum pipe_shader_type shader,
1036                        unsigned start, unsigned count, void **states)
1037 {
1038    if (!count)
1039       return;
1040 
1041    struct threaded_context *tc = threaded_context(_pipe);
1042    struct tc_sampler_states *p =
1043       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1044 
1045    p->shader = shader;
1046    p->start = start;
1047    p->count = count;
1048    memcpy(p->slot, states, count * sizeof(states[0]));
1049 }
1050 
1051 
1052 /********************************************************************
1053  * immediate states
1054  */
1055 
1056 struct tc_framebuffer {
1057    struct tc_call_base base;
1058    struct pipe_framebuffer_state state;
1059 };
1060 
1061 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call,uint64_t * last)1062 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1063 {
1064    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1065 
1066    pipe->set_framebuffer_state(pipe, p);
1067 
1068    unsigned nr_cbufs = p->nr_cbufs;
1069    for (unsigned i = 0; i < nr_cbufs; i++)
1070       tc_drop_surface_reference(p->cbufs[i]);
1071    tc_drop_surface_reference(p->zsbuf);
1072    return call_size(tc_framebuffer);
1073 }
1074 
1075 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1076 tc_set_framebuffer_state(struct pipe_context *_pipe,
1077                          const struct pipe_framebuffer_state *fb)
1078 {
1079    struct threaded_context *tc = threaded_context(_pipe);
1080    struct tc_framebuffer *p =
1081       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1082    unsigned nr_cbufs = fb->nr_cbufs;
1083 
1084    p->state.width = fb->width;
1085    p->state.height = fb->height;
1086    p->state.samples = fb->samples;
1087    p->state.layers = fb->layers;
1088    p->state.nr_cbufs = nr_cbufs;
1089 
1090    for (unsigned i = 0; i < nr_cbufs; i++) {
1091       p->state.cbufs[i] = NULL;
1092       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1093    }
1094    p->state.zsbuf = NULL;
1095    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1096 }
1097 
1098 struct tc_tess_state {
1099    struct tc_call_base base;
1100    float state[6];
1101 };
1102 
1103 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call,uint64_t * last)1104 tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1105 {
1106    float *p = to_call(call, tc_tess_state)->state;
1107 
1108    pipe->set_tess_state(pipe, p, p + 4);
1109    return call_size(tc_tess_state);
1110 }
1111 
1112 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1113 tc_set_tess_state(struct pipe_context *_pipe,
1114                   const float default_outer_level[4],
1115                   const float default_inner_level[2])
1116 {
1117    struct threaded_context *tc = threaded_context(_pipe);
1118    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1119 
1120    memcpy(p, default_outer_level, 4 * sizeof(float));
1121    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1122 }
1123 
1124 struct tc_patch_vertices {
1125    struct tc_call_base base;
1126    ubyte patch_vertices;
1127 };
1128 
1129 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call,uint64_t * last)1130 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1131 {
1132    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1133 
1134    pipe->set_patch_vertices(pipe, patch_vertices);
1135    return call_size(tc_patch_vertices);
1136 }
1137 
1138 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1139 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1140 {
1141    struct threaded_context *tc = threaded_context(_pipe);
1142 
1143    tc_add_call(tc, TC_CALL_set_patch_vertices,
1144                tc_patch_vertices)->patch_vertices = patch_vertices;
1145 }
1146 
1147 struct tc_constant_buffer_base {
1148    struct tc_call_base base;
1149    ubyte shader, index;
1150    bool is_null;
1151 };
1152 
1153 struct tc_constant_buffer {
1154    struct tc_constant_buffer_base base;
1155    struct pipe_constant_buffer cb;
1156 };
1157 
1158 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call,uint64_t * last)1159 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1160 {
1161    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1162 
1163    if (unlikely(p->base.is_null)) {
1164       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1165       return call_size(tc_constant_buffer_base);
1166    }
1167 
1168    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1169    return call_size(tc_constant_buffer);
1170 }
1171 
1172 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1173 tc_set_constant_buffer(struct pipe_context *_pipe,
1174                        enum pipe_shader_type shader, uint index,
1175                        bool take_ownership,
1176                        const struct pipe_constant_buffer *cb)
1177 {
1178    struct threaded_context *tc = threaded_context(_pipe);
1179 
1180    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1181       struct tc_constant_buffer_base *p =
1182          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1183       p->shader = shader;
1184       p->index = index;
1185       p->is_null = true;
1186       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1187       return;
1188    }
1189 
1190    struct pipe_resource *buffer;
1191    unsigned offset;
1192 
1193    if (cb->user_buffer) {
1194       /* This must be done before adding set_constant_buffer, because it could
1195        * generate e.g. transfer_unmap and flush partially-uninitialized
1196        * set_constant_buffer to the driver if it was done afterwards.
1197        */
1198       buffer = NULL;
1199       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1200                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1201       u_upload_unmap(tc->base.const_uploader);
1202       take_ownership = true;
1203    } else {
1204       buffer = cb->buffer;
1205       offset = cb->buffer_offset;
1206    }
1207 
1208    struct tc_constant_buffer *p =
1209       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1210    p->base.shader = shader;
1211    p->base.index = index;
1212    p->base.is_null = false;
1213    p->cb.user_buffer = NULL;
1214    p->cb.buffer_offset = offset;
1215    p->cb.buffer_size = cb->buffer_size;
1216 
1217    if (take_ownership)
1218       p->cb.buffer = buffer;
1219    else
1220       tc_set_resource_reference(&p->cb.buffer, buffer);
1221 
1222    if (buffer) {
1223       tc_bind_buffer(&tc->const_buffers[shader][index],
1224                      &tc->buffer_lists[tc->next_buf_list], buffer);
1225    } else {
1226       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1227    }
1228 }
1229 
1230 struct tc_inlinable_constants {
1231    struct tc_call_base base;
1232    ubyte shader;
1233    ubyte num_values;
1234    uint32_t values[MAX_INLINABLE_UNIFORMS];
1235 };
1236 
1237 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call,uint64_t * last)1238 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1239 {
1240    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1241 
1242    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1243    return call_size(tc_inlinable_constants);
1244 }
1245 
1246 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1247 tc_set_inlinable_constants(struct pipe_context *_pipe,
1248                            enum pipe_shader_type shader,
1249                            uint num_values, uint32_t *values)
1250 {
1251    struct threaded_context *tc = threaded_context(_pipe);
1252    struct tc_inlinable_constants *p =
1253       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1254    p->shader = shader;
1255    p->num_values = num_values;
1256    memcpy(p->values, values, num_values * 4);
1257 }
1258 
1259 struct tc_sample_locations {
1260    struct tc_call_base base;
1261    uint16_t size;
1262    uint8_t slot[0];
1263 };
1264 
1265 
1266 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call,uint64_t * last)1267 tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1268 {
1269    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1270 
1271    pipe->set_sample_locations(pipe, p->size, p->slot);
1272    return p->base.num_slots;
1273 }
1274 
1275 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1276 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1277 {
1278    struct threaded_context *tc = threaded_context(_pipe);
1279    struct tc_sample_locations *p =
1280       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1281                              tc_sample_locations, size);
1282 
1283    p->size = size;
1284    memcpy(p->slot, locations, size);
1285 }
1286 
1287 struct tc_scissors {
1288    struct tc_call_base base;
1289    ubyte start, count;
1290    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1291 };
1292 
1293 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call,uint64_t * last)1294 tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1295 {
1296    struct tc_scissors *p = (struct tc_scissors *)call;
1297 
1298    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1299    return p->base.num_slots;
1300 }
1301 
1302 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1303 tc_set_scissor_states(struct pipe_context *_pipe,
1304                       unsigned start, unsigned count,
1305                       const struct pipe_scissor_state *states)
1306 {
1307    struct threaded_context *tc = threaded_context(_pipe);
1308    struct tc_scissors *p =
1309       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1310 
1311    p->start = start;
1312    p->count = count;
1313    memcpy(&p->slot, states, count * sizeof(states[0]));
1314 }
1315 
1316 struct tc_viewports {
1317    struct tc_call_base base;
1318    ubyte start, count;
1319    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1320 };
1321 
1322 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call,uint64_t * last)1323 tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1324 {
1325    struct tc_viewports *p = (struct tc_viewports *)call;
1326 
1327    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1328    return p->base.num_slots;
1329 }
1330 
1331 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1332 tc_set_viewport_states(struct pipe_context *_pipe,
1333                        unsigned start, unsigned count,
1334                        const struct pipe_viewport_state *states)
1335 {
1336    if (!count)
1337       return;
1338 
1339    struct threaded_context *tc = threaded_context(_pipe);
1340    struct tc_viewports *p =
1341       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1342 
1343    p->start = start;
1344    p->count = count;
1345    memcpy(&p->slot, states, count * sizeof(states[0]));
1346 }
1347 
1348 struct tc_window_rects {
1349    struct tc_call_base base;
1350    bool include;
1351    ubyte count;
1352    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1353 };
1354 
1355 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call,uint64_t * last)1356 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1357 {
1358    struct tc_window_rects *p = (struct tc_window_rects *)call;
1359 
1360    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1361    return p->base.num_slots;
1362 }
1363 
1364 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1365 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1366                          unsigned count,
1367                          const struct pipe_scissor_state *rects)
1368 {
1369    struct threaded_context *tc = threaded_context(_pipe);
1370    struct tc_window_rects *p =
1371       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1372 
1373    p->include = include;
1374    p->count = count;
1375    memcpy(p->slot, rects, count * sizeof(rects[0]));
1376 }
1377 
1378 struct tc_sampler_views {
1379    struct tc_call_base base;
1380    ubyte shader, start, count, unbind_num_trailing_slots;
1381    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1382 };
1383 
1384 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call,uint64_t * last)1385 tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1386 {
1387    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1388 
1389    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1390                            p->unbind_num_trailing_slots, true, p->slot);
1391    return p->base.num_slots;
1392 }
1393 
1394 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1395 tc_set_sampler_views(struct pipe_context *_pipe,
1396                      enum pipe_shader_type shader,
1397                      unsigned start, unsigned count,
1398                      unsigned unbind_num_trailing_slots, bool take_ownership,
1399                      struct pipe_sampler_view **views)
1400 {
1401    if (!count && !unbind_num_trailing_slots)
1402       return;
1403 
1404    struct threaded_context *tc = threaded_context(_pipe);
1405    struct tc_sampler_views *p =
1406       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1407                              views ? count : 0);
1408 
1409    p->shader = shader;
1410    p->start = start;
1411 
1412    if (views) {
1413       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1414 
1415       p->count = count;
1416       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1417 
1418       if (take_ownership) {
1419          memcpy(p->slot, views, sizeof(*views) * count);
1420 
1421          for (unsigned i = 0; i < count; i++) {
1422             if (views[i] && views[i]->target == PIPE_BUFFER) {
1423                tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1424                               views[i]->texture);
1425             } else {
1426                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1427             }
1428          }
1429       } else {
1430          for (unsigned i = 0; i < count; i++) {
1431             p->slot[i] = NULL;
1432             pipe_sampler_view_reference(&p->slot[i], views[i]);
1433 
1434             if (views[i] && views[i]->target == PIPE_BUFFER) {
1435                tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1436                               views[i]->texture);
1437             } else {
1438                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1439             }
1440          }
1441       }
1442 
1443       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1444                         unbind_num_trailing_slots);
1445       tc->seen_sampler_buffers[shader] = true;
1446    } else {
1447       p->count = 0;
1448       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1449 
1450       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1451                         count + unbind_num_trailing_slots);
1452    }
1453 }
1454 
1455 struct tc_shader_images {
1456    struct tc_call_base base;
1457    ubyte shader, start, count;
1458    ubyte unbind_num_trailing_slots;
1459    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1460 };
1461 
1462 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call,uint64_t * last)1463 tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1464 {
1465    struct tc_shader_images *p = (struct tc_shader_images *)call;
1466    unsigned count = p->count;
1467 
1468    if (!p->count) {
1469       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1470                               p->unbind_num_trailing_slots, NULL);
1471       return call_size(tc_shader_images);
1472    }
1473 
1474    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1475                            p->unbind_num_trailing_slots, p->slot);
1476 
1477    for (unsigned i = 0; i < count; i++)
1478       tc_drop_resource_reference(p->slot[i].resource);
1479 
1480    return p->base.num_slots;
1481 }
1482 
1483 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1484 tc_set_shader_images(struct pipe_context *_pipe,
1485                      enum pipe_shader_type shader,
1486                      unsigned start, unsigned count,
1487                      unsigned unbind_num_trailing_slots,
1488                      const struct pipe_image_view *images)
1489 {
1490    if (!count && !unbind_num_trailing_slots)
1491       return;
1492 
1493    struct threaded_context *tc = threaded_context(_pipe);
1494    struct tc_shader_images *p =
1495       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1496                              images ? count : 0);
1497    unsigned writable_buffers = 0;
1498 
1499    p->shader = shader;
1500    p->start = start;
1501 
1502    if (images) {
1503       p->count = count;
1504       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1505 
1506       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1507 
1508       for (unsigned i = 0; i < count; i++) {
1509          struct pipe_resource *resource = images[i].resource;
1510 
1511          tc_set_resource_reference(&p->slot[i].resource, resource);
1512 
1513          if (resource && resource->target == PIPE_BUFFER) {
1514             tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
1515 
1516             if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1517                struct threaded_resource *tres = threaded_resource(resource);
1518 
1519                tc_buffer_disable_cpu_storage(resource);
1520                util_range_add(&tres->b, &tres->valid_buffer_range,
1521                               images[i].u.buf.offset,
1522                               images[i].u.buf.offset + images[i].u.buf.size);
1523                writable_buffers |= BITFIELD_BIT(start + i);
1524             }
1525          } else {
1526             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1527          }
1528       }
1529       memcpy(p->slot, images, count * sizeof(images[0]));
1530 
1531       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1532                         unbind_num_trailing_slots);
1533       tc->seen_image_buffers[shader] = true;
1534    } else {
1535       p->count = 0;
1536       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1537 
1538       tc_unbind_buffers(&tc->image_buffers[shader][start],
1539                         count + unbind_num_trailing_slots);
1540    }
1541 
1542    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1543    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1544 }
1545 
1546 struct tc_shader_buffers {
1547    struct tc_call_base base;
1548    ubyte shader, start, count;
1549    bool unbind;
1550    unsigned writable_bitmask;
1551    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1552 };
1553 
1554 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1555 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1556 {
1557    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1558    unsigned count = p->count;
1559 
1560    if (p->unbind) {
1561       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1562       return call_size(tc_shader_buffers);
1563    }
1564 
1565    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1566                             p->writable_bitmask);
1567 
1568    for (unsigned i = 0; i < count; i++)
1569       tc_drop_resource_reference(p->slot[i].buffer);
1570 
1571    return p->base.num_slots;
1572 }
1573 
1574 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1575 tc_set_shader_buffers(struct pipe_context *_pipe,
1576                       enum pipe_shader_type shader,
1577                       unsigned start, unsigned count,
1578                       const struct pipe_shader_buffer *buffers,
1579                       unsigned writable_bitmask)
1580 {
1581    if (!count)
1582       return;
1583 
1584    struct threaded_context *tc = threaded_context(_pipe);
1585    struct tc_shader_buffers *p =
1586       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1587                              buffers ? count : 0);
1588 
1589    p->shader = shader;
1590    p->start = start;
1591    p->count = count;
1592    p->unbind = buffers == NULL;
1593    p->writable_bitmask = writable_bitmask;
1594 
1595    if (buffers) {
1596       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1597 
1598       for (unsigned i = 0; i < count; i++) {
1599          struct pipe_shader_buffer *dst = &p->slot[i];
1600          const struct pipe_shader_buffer *src = buffers + i;
1601 
1602          tc_set_resource_reference(&dst->buffer, src->buffer);
1603          dst->buffer_offset = src->buffer_offset;
1604          dst->buffer_size = src->buffer_size;
1605 
1606          if (src->buffer) {
1607             struct threaded_resource *tres = threaded_resource(src->buffer);
1608 
1609             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
1610 
1611             if (writable_bitmask & BITFIELD_BIT(i)) {
1612                tc_buffer_disable_cpu_storage(src->buffer);
1613                util_range_add(&tres->b, &tres->valid_buffer_range,
1614                               src->buffer_offset,
1615                               src->buffer_offset + src->buffer_size);
1616             }
1617          } else {
1618             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1619          }
1620       }
1621       tc->seen_shader_buffers[shader] = true;
1622    } else {
1623       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1624    }
1625 
1626    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1627    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1628 }
1629 
1630 struct tc_vertex_buffers {
1631    struct tc_call_base base;
1632    ubyte start, count;
1633    ubyte unbind_num_trailing_slots;
1634    struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1635 };
1636 
1637 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1638 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1639 {
1640    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1641    unsigned count = p->count;
1642 
1643    if (!count) {
1644       pipe->set_vertex_buffers(pipe, p->start, 0,
1645                                p->unbind_num_trailing_slots, false, NULL);
1646       return call_size(tc_vertex_buffers);
1647    }
1648 
1649    for (unsigned i = 0; i < count; i++)
1650       tc_assert(!p->slot[i].is_user_buffer);
1651 
1652    pipe->set_vertex_buffers(pipe, p->start, count,
1653                             p->unbind_num_trailing_slots, true, p->slot);
1654    return p->base.num_slots;
1655 }
1656 
1657 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * buffers)1658 tc_set_vertex_buffers(struct pipe_context *_pipe,
1659                       unsigned start, unsigned count,
1660                       unsigned unbind_num_trailing_slots,
1661                       bool take_ownership,
1662                       const struct pipe_vertex_buffer *buffers)
1663 {
1664    struct threaded_context *tc = threaded_context(_pipe);
1665 
1666    if (!count && !unbind_num_trailing_slots)
1667       return;
1668 
1669    if (count && buffers) {
1670       struct tc_vertex_buffers *p =
1671          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1672       p->start = start;
1673       p->count = count;
1674       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1675 
1676       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1677 
1678       if (take_ownership) {
1679          memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1680 
1681          for (unsigned i = 0; i < count; i++) {
1682             struct pipe_resource *buf = buffers[i].buffer.resource;
1683 
1684             if (buf) {
1685                tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1686             } else {
1687                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1688             }
1689          }
1690       } else {
1691          for (unsigned i = 0; i < count; i++) {
1692             struct pipe_vertex_buffer *dst = &p->slot[i];
1693             const struct pipe_vertex_buffer *src = buffers + i;
1694             struct pipe_resource *buf = src->buffer.resource;
1695 
1696             tc_assert(!src->is_user_buffer);
1697             dst->stride = src->stride;
1698             dst->is_user_buffer = false;
1699             tc_set_resource_reference(&dst->buffer.resource, buf);
1700             dst->buffer_offset = src->buffer_offset;
1701 
1702             if (buf) {
1703                tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1704             } else {
1705                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1706             }
1707          }
1708       }
1709 
1710       tc_unbind_buffers(&tc->vertex_buffers[start + count],
1711                         unbind_num_trailing_slots);
1712    } else {
1713       struct tc_vertex_buffers *p =
1714          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1715       p->start = start;
1716       p->count = 0;
1717       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1718 
1719       tc_unbind_buffers(&tc->vertex_buffers[start],
1720                         count + unbind_num_trailing_slots);
1721    }
1722 }
1723 
1724 struct tc_stream_outputs {
1725    struct tc_call_base base;
1726    unsigned count;
1727    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1728    unsigned offsets[PIPE_MAX_SO_BUFFERS];
1729 };
1730 
1731 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call,uint64_t * last)1732 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1733 {
1734    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1735    unsigned count = p->count;
1736 
1737    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1738    for (unsigned i = 0; i < count; i++)
1739       tc_drop_so_target_reference(p->targets[i]);
1740 
1741    return call_size(tc_stream_outputs);
1742 }
1743 
1744 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)1745 tc_set_stream_output_targets(struct pipe_context *_pipe,
1746                              unsigned count,
1747                              struct pipe_stream_output_target **tgs,
1748                              const unsigned *offsets)
1749 {
1750    struct threaded_context *tc = threaded_context(_pipe);
1751    struct tc_stream_outputs *p =
1752       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1753    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1754 
1755    for (unsigned i = 0; i < count; i++) {
1756       p->targets[i] = NULL;
1757       pipe_so_target_reference(&p->targets[i], tgs[i]);
1758       if (tgs[i]) {
1759          tc_buffer_disable_cpu_storage(tgs[i]->buffer);
1760          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
1761       } else {
1762          tc_unbind_buffer(&tc->streamout_buffers[i]);
1763       }
1764    }
1765    p->count = count;
1766    memcpy(p->offsets, offsets, count * sizeof(unsigned));
1767 
1768    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1769    if (count)
1770       tc->seen_streamout_buffers = true;
1771 }
1772 
1773 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1774 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1775                          unsigned count, struct pipe_surface **resources)
1776 {
1777    struct threaded_context *tc = threaded_context(_pipe);
1778    struct pipe_context *pipe = tc->pipe;
1779 
1780    tc_sync(tc);
1781    pipe->set_compute_resources(pipe, start, count, resources);
1782 }
1783 
1784 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1785 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1786                       unsigned count, struct pipe_resource **resources,
1787                       uint32_t **handles)
1788 {
1789    struct threaded_context *tc = threaded_context(_pipe);
1790    struct pipe_context *pipe = tc->pipe;
1791 
1792    tc_sync(tc);
1793    pipe->set_global_binding(pipe, first, count, resources, handles);
1794 }
1795 
1796 
1797 /********************************************************************
1798  * views
1799  */
1800 
1801 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)1802 tc_create_surface(struct pipe_context *_pipe,
1803                   struct pipe_resource *resource,
1804                   const struct pipe_surface *surf_tmpl)
1805 {
1806    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1807    struct pipe_surface *view =
1808          pipe->create_surface(pipe, resource, surf_tmpl);
1809 
1810    if (view)
1811       view->context = _pipe;
1812    return view;
1813 }
1814 
1815 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)1816 tc_surface_destroy(struct pipe_context *_pipe,
1817                    struct pipe_surface *surf)
1818 {
1819    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1820 
1821    pipe->surface_destroy(pipe, surf);
1822 }
1823 
1824 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)1825 tc_create_sampler_view(struct pipe_context *_pipe,
1826                        struct pipe_resource *resource,
1827                        const struct pipe_sampler_view *templ)
1828 {
1829    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1830    struct pipe_sampler_view *view =
1831          pipe->create_sampler_view(pipe, resource, templ);
1832 
1833    if (view)
1834       view->context = _pipe;
1835    return view;
1836 }
1837 
1838 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)1839 tc_sampler_view_destroy(struct pipe_context *_pipe,
1840                         struct pipe_sampler_view *view)
1841 {
1842    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1843 
1844    pipe->sampler_view_destroy(pipe, view);
1845 }
1846 
1847 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)1848 tc_create_stream_output_target(struct pipe_context *_pipe,
1849                                struct pipe_resource *res,
1850                                unsigned buffer_offset,
1851                                unsigned buffer_size)
1852 {
1853    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1854    struct threaded_resource *tres = threaded_resource(res);
1855    struct pipe_stream_output_target *view;
1856 
1857    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1858                   buffer_offset + buffer_size);
1859 
1860    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1861                                             buffer_size);
1862    if (view)
1863       view->context = _pipe;
1864    return view;
1865 }
1866 
1867 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)1868 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1869                                 struct pipe_stream_output_target *target)
1870 {
1871    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1872 
1873    pipe->stream_output_target_destroy(pipe, target);
1874 }
1875 
1876 
1877 /********************************************************************
1878  * bindless
1879  */
1880 
1881 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)1882 tc_create_texture_handle(struct pipe_context *_pipe,
1883                          struct pipe_sampler_view *view,
1884                          const struct pipe_sampler_state *state)
1885 {
1886    struct threaded_context *tc = threaded_context(_pipe);
1887    struct pipe_context *pipe = tc->pipe;
1888 
1889    tc_sync(tc);
1890    return pipe->create_texture_handle(pipe, view, state);
1891 }
1892 
1893 struct tc_make_texture_handle_resident {
1894    struct tc_call_base base;
1895    bool resident;
1896    uint64_t handle;
1897 };
1898 
1899 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1900 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1901 {
1902    struct tc_make_texture_handle_resident *p =
1903       to_call(call, tc_make_texture_handle_resident);
1904 
1905    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1906    return call_size(tc_make_texture_handle_resident);
1907 }
1908 
1909 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)1910 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1911                                 bool resident)
1912 {
1913    struct threaded_context *tc = threaded_context(_pipe);
1914    struct tc_make_texture_handle_resident *p =
1915       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1916                   tc_make_texture_handle_resident);
1917 
1918    p->handle = handle;
1919    p->resident = resident;
1920 }
1921 
1922 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)1923 tc_create_image_handle(struct pipe_context *_pipe,
1924                        const struct pipe_image_view *image)
1925 {
1926    struct threaded_context *tc = threaded_context(_pipe);
1927    struct pipe_context *pipe = tc->pipe;
1928 
1929    if (image->resource->target == PIPE_BUFFER)
1930       tc_buffer_disable_cpu_storage(image->resource);
1931 
1932    tc_sync(tc);
1933    return pipe->create_image_handle(pipe, image);
1934 }
1935 
1936 struct tc_make_image_handle_resident {
1937    struct tc_call_base base;
1938    bool resident;
1939    unsigned access;
1940    uint64_t handle;
1941 };
1942 
1943 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1944 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1945 {
1946    struct tc_make_image_handle_resident *p =
1947       to_call(call, tc_make_image_handle_resident);
1948 
1949    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1950    return call_size(tc_make_image_handle_resident);
1951 }
1952 
1953 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)1954 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1955                               unsigned access, bool resident)
1956 {
1957    struct threaded_context *tc = threaded_context(_pipe);
1958    struct tc_make_image_handle_resident *p =
1959       tc_add_call(tc, TC_CALL_make_image_handle_resident,
1960                   tc_make_image_handle_resident);
1961 
1962    p->handle = handle;
1963    p->access = access;
1964    p->resident = resident;
1965 }
1966 
1967 
1968 /********************************************************************
1969  * transfer
1970  */
1971 
1972 struct tc_replace_buffer_storage {
1973    struct tc_call_base base;
1974    uint16_t num_rebinds;
1975    uint32_t rebind_mask;
1976    uint32_t delete_buffer_id;
1977    struct pipe_resource *dst;
1978    struct pipe_resource *src;
1979    tc_replace_buffer_storage_func func;
1980 };
1981 
1982 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call,uint64_t * last)1983 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
1984 {
1985    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
1986 
1987    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
1988 
1989    tc_drop_resource_reference(p->dst);
1990    tc_drop_resource_reference(p->src);
1991    return call_size(tc_replace_buffer_storage);
1992 }
1993 
1994 /* Return true if the buffer has been invalidated or is idle. */
1995 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)1996 tc_invalidate_buffer(struct threaded_context *tc,
1997                      struct threaded_resource *tbuf)
1998 {
1999    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2000       /* It's idle, so invalidation would be a no-op, but we can still clear
2001        * the valid range because we are technically doing invalidation, but
2002        * skipping it because it's useless.
2003        *
2004        * If the buffer is bound for write, we can't invalidate the range.
2005        */
2006       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2007          util_range_set_empty(&tbuf->valid_buffer_range);
2008       return true;
2009    }
2010 
2011    struct pipe_screen *screen = tc->base.screen;
2012    struct pipe_resource *new_buf;
2013 
2014    /* Shared, pinned, and sparse buffers can't be reallocated. */
2015    if (tbuf->is_shared ||
2016        tbuf->is_user_ptr ||
2017        tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2018       return false;
2019 
2020    /* Allocate a new one. */
2021    new_buf = screen->resource_create(screen, &tbuf->b);
2022    if (!new_buf)
2023       return false;
2024 
2025    /* Replace the "latest" pointer. */
2026    if (tbuf->latest != &tbuf->b)
2027       pipe_resource_reference(&tbuf->latest, NULL);
2028 
2029    tbuf->latest = new_buf;
2030 
2031    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2032 
2033    /* Enqueue storage replacement of the original buffer. */
2034    struct tc_replace_buffer_storage *p =
2035       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2036                   tc_replace_buffer_storage);
2037 
2038    p->func = tc->replace_buffer_storage;
2039    tc_set_resource_reference(&p->dst, &tbuf->b);
2040    tc_set_resource_reference(&p->src, new_buf);
2041    p->delete_buffer_id = delete_buffer_id;
2042    p->rebind_mask = 0;
2043 
2044    /* Treat the current buffer as the new buffer. */
2045    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2046    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2047                                      threaded_resource(new_buf)->buffer_id_unique,
2048                                      &p->rebind_mask);
2049 
2050    /* If the buffer is not bound for write, clear the valid range. */
2051    if (!bound_for_write)
2052       util_range_set_empty(&tbuf->valid_buffer_range);
2053 
2054    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2055    threaded_resource(new_buf)->buffer_id_unique = 0;
2056 
2057    return true;
2058 }
2059 
2060 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2061 tc_improve_map_buffer_flags(struct threaded_context *tc,
2062                             struct threaded_resource *tres, unsigned usage,
2063                             unsigned offset, unsigned size)
2064 {
2065    /* Never invalidate inside the driver and never infer "unsynchronized". */
2066    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2067                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2068 
2069    /* Prevent a reentry. */
2070    if (usage & tc_flags)
2071       return usage;
2072 
2073    /* Use the staging upload if it's preferred. */
2074    if (usage & (PIPE_MAP_DISCARD_RANGE |
2075                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2076        !(usage & PIPE_MAP_PERSISTENT) &&
2077        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2078        tc->use_forced_staging_uploads) {
2079       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2080                  PIPE_MAP_UNSYNCHRONIZED);
2081 
2082       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2083    }
2084 
2085    /* Sparse buffers can't be mapped directly and can't be reallocated
2086     * (fully invalidated). That may just be a radeonsi limitation, but
2087     * the threaded context must obey it with radeonsi.
2088     */
2089    if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2090       /* We can use DISCARD_RANGE instead of full discard. This is the only
2091        * fast path for sparse buffers that doesn't need thread synchronization.
2092        */
2093       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2094          usage |= PIPE_MAP_DISCARD_RANGE;
2095 
2096       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2097        * The threaded context doesn't do unsychronized mappings and invalida-
2098        * tions of sparse buffers, therefore a correct driver behavior won't
2099        * result in an incorrect behavior with the threaded context.
2100        */
2101       return usage;
2102    }
2103 
2104    usage |= tc_flags;
2105 
2106    /* Handle CPU reads trivially. */
2107    if (usage & PIPE_MAP_READ) {
2108       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2109          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2110 
2111       /* Drivers aren't allowed to do buffer invalidations. */
2112       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2113    }
2114 
2115    /* See if the buffer range being mapped has never been initialized or
2116     * the buffer is idle, in which case it can be mapped unsynchronized. */
2117    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2118        ((!tres->is_shared &&
2119          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2120         !tc_is_buffer_busy(tc, tres, usage)))
2121       usage |= PIPE_MAP_UNSYNCHRONIZED;
2122 
2123    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2124       /* If discarding the entire range, discard the whole resource instead. */
2125       if (usage & PIPE_MAP_DISCARD_RANGE &&
2126           offset == 0 && size == tres->b.width0)
2127          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2128 
2129       /* Discard the whole resource if needed. */
2130       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2131          if (tc_invalidate_buffer(tc, tres))
2132             usage |= PIPE_MAP_UNSYNCHRONIZED;
2133          else
2134             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2135       }
2136    }
2137 
2138    /* We won't need this flag anymore. */
2139    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2140    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2141 
2142    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2143     * buffers. */
2144    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2145                 PIPE_MAP_PERSISTENT) ||
2146        tres->is_user_ptr)
2147       usage &= ~PIPE_MAP_DISCARD_RANGE;
2148 
2149    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2150    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2151       usage &= ~PIPE_MAP_DISCARD_RANGE;
2152       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2153    }
2154 
2155    return usage;
2156 }
2157 
2158 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2159 tc_buffer_map(struct pipe_context *_pipe,
2160               struct pipe_resource *resource, unsigned level,
2161               unsigned usage, const struct pipe_box *box,
2162               struct pipe_transfer **transfer)
2163 {
2164    struct threaded_context *tc = threaded_context(_pipe);
2165    struct threaded_resource *tres = threaded_resource(resource);
2166    struct pipe_context *pipe = tc->pipe;
2167 
2168    /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2169     * this shouldn't normally be necessary because glthread only uses large buffers.
2170     */
2171    if (usage & PIPE_MAP_THREAD_SAFE)
2172       tc_buffer_disable_cpu_storage(resource);
2173 
2174    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2175 
2176    /* If the CPU storage is enabled, return it directly. */
2177    if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2178       /* We can't let resource_copy_region disable the CPU storage. */
2179       assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2180 
2181       if (!tres->cpu_storage)
2182          tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2183 
2184       if (tres->cpu_storage) {
2185          struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2186          ttrans->b.resource = resource;
2187          ttrans->b.usage = usage;
2188          ttrans->b.box = *box;
2189          ttrans->valid_buffer_range = &tres->valid_buffer_range;
2190          ttrans->cpu_storage_mapped = true;
2191          *transfer = &ttrans->b;
2192 
2193          return (uint8_t*)tres->cpu_storage + box->x;
2194       } else {
2195          tres->allow_cpu_storage = false;
2196       }
2197    }
2198 
2199    /* Do a staging transfer within the threaded context. The driver should
2200     * only get resource_copy_region.
2201     */
2202    if (usage & PIPE_MAP_DISCARD_RANGE) {
2203       struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2204       uint8_t *map;
2205 
2206       u_upload_alloc(tc->base.stream_uploader, 0,
2207                      box->width + (box->x % tc->map_buffer_alignment),
2208                      tc->map_buffer_alignment, &ttrans->b.offset,
2209                      &ttrans->staging, (void**)&map);
2210       if (!map) {
2211          slab_free(&tc->pool_transfers, ttrans);
2212          return NULL;
2213       }
2214 
2215       ttrans->b.resource = resource;
2216       ttrans->b.level = 0;
2217       ttrans->b.usage = usage;
2218       ttrans->b.box = *box;
2219       ttrans->b.stride = 0;
2220       ttrans->b.layer_stride = 0;
2221       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2222       ttrans->cpu_storage_mapped = false;
2223       *transfer = &ttrans->b;
2224 
2225       p_atomic_inc(&tres->pending_staging_uploads);
2226       util_range_add(resource, &tres->pending_staging_uploads_range,
2227                      box->x, box->x + box->width);
2228 
2229       return map + (box->x % tc->map_buffer_alignment);
2230    }
2231 
2232    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2233        p_atomic_read(&tres->pending_staging_uploads) &&
2234        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2235       /* Write conflict detected between a staging transfer and the direct mapping we're
2236        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2237        * will have to wait for the staging transfer completion.
2238        * Note: The conflict detection is only based on the mapped range, not on the actual
2239        * written range(s).
2240        */
2241       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2242       tc->use_forced_staging_uploads = false;
2243    }
2244 
2245    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2246    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2247       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2248                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2249       tc_set_driver_thread(tc);
2250    }
2251 
2252    tc->bytes_mapped_estimate += box->width;
2253 
2254    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2255                                 level, usage, box, transfer);
2256    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2257    threaded_transfer(*transfer)->cpu_storage_mapped = false;
2258 
2259    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2260       tc_clear_driver_thread(tc);
2261 
2262    return ret;
2263 }
2264 
2265 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2266 tc_texture_map(struct pipe_context *_pipe,
2267                struct pipe_resource *resource, unsigned level,
2268                unsigned usage, const struct pipe_box *box,
2269                struct pipe_transfer **transfer)
2270 {
2271    struct threaded_context *tc = threaded_context(_pipe);
2272    struct threaded_resource *tres = threaded_resource(resource);
2273    struct pipe_context *pipe = tc->pipe;
2274 
2275    tc_sync_msg(tc, "texture");
2276    tc_set_driver_thread(tc);
2277 
2278    tc->bytes_mapped_estimate += box->width;
2279 
2280    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2281                                  level, usage, box, transfer);
2282 
2283    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2284       tc_clear_driver_thread(tc);
2285 
2286    return ret;
2287 }
2288 
2289 struct tc_transfer_flush_region {
2290    struct tc_call_base base;
2291    struct pipe_box box;
2292    struct pipe_transfer *transfer;
2293 };
2294 
2295 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call,uint64_t * last)2296 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2297 {
2298    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2299 
2300    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2301    return call_size(tc_transfer_flush_region);
2302 }
2303 
2304 struct tc_resource_copy_region {
2305    struct tc_call_base base;
2306    unsigned dst_level;
2307    unsigned dstx, dsty, dstz;
2308    unsigned src_level;
2309    struct pipe_box src_box;
2310    struct pipe_resource *dst;
2311    struct pipe_resource *src;
2312 };
2313 
2314 static void
2315 tc_resource_copy_region(struct pipe_context *_pipe,
2316                         struct pipe_resource *dst, unsigned dst_level,
2317                         unsigned dstx, unsigned dsty, unsigned dstz,
2318                         struct pipe_resource *src, unsigned src_level,
2319                         const struct pipe_box *src_box);
2320 
2321 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2322 tc_buffer_do_flush_region(struct threaded_context *tc,
2323                           struct threaded_transfer *ttrans,
2324                           const struct pipe_box *box)
2325 {
2326    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2327 
2328    if (ttrans->staging) {
2329       struct pipe_box src_box;
2330 
2331       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2332                (box->x - ttrans->b.box.x),
2333                box->width, &src_box);
2334 
2335       /* Copy the staging buffer into the original one. */
2336       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2337                               ttrans->staging, 0, &src_box);
2338    }
2339 
2340    /* Don't update the valid range when we're uploading the CPU storage
2341     * because it includes the uninitialized range too.
2342     */
2343    if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2344       util_range_add(&tres->b, ttrans->valid_buffer_range,
2345                      box->x, box->x + box->width);
2346    }
2347 }
2348 
2349 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2350 tc_transfer_flush_region(struct pipe_context *_pipe,
2351                          struct pipe_transfer *transfer,
2352                          const struct pipe_box *rel_box)
2353 {
2354    struct threaded_context *tc = threaded_context(_pipe);
2355    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2356    struct threaded_resource *tres = threaded_resource(transfer->resource);
2357    unsigned required_usage = PIPE_MAP_WRITE |
2358                              PIPE_MAP_FLUSH_EXPLICIT;
2359 
2360    if (tres->b.target == PIPE_BUFFER) {
2361       if ((transfer->usage & required_usage) == required_usage) {
2362          struct pipe_box box;
2363 
2364          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2365          tc_buffer_do_flush_region(tc, ttrans, &box);
2366       }
2367 
2368       /* Staging transfers don't send the call to the driver. */
2369       if (ttrans->staging)
2370          return;
2371    }
2372 
2373    struct tc_transfer_flush_region *p =
2374       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2375    p->transfer = transfer;
2376    p->box = *rel_box;
2377 }
2378 
2379 static void
2380 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2381          unsigned flags);
2382 
2383 struct tc_buffer_unmap {
2384    struct tc_call_base base;
2385    bool was_staging_transfer;
2386    union {
2387       struct pipe_transfer *transfer;
2388       struct pipe_resource *resource;
2389    };
2390 };
2391 
2392 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2393 tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2394 {
2395    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2396 
2397    if (p->was_staging_transfer) {
2398       struct threaded_resource *tres = threaded_resource(p->resource);
2399       /* Nothing to do except keeping track of staging uploads */
2400       assert(tres->pending_staging_uploads > 0);
2401       p_atomic_dec(&tres->pending_staging_uploads);
2402       tc_drop_resource_reference(p->resource);
2403    } else {
2404       pipe->buffer_unmap(pipe, p->transfer);
2405    }
2406 
2407    return call_size(tc_buffer_unmap);
2408 }
2409 
2410 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2411 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2412 {
2413    struct threaded_context *tc = threaded_context(_pipe);
2414    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2415    struct threaded_resource *tres = threaded_resource(transfer->resource);
2416 
2417    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2418     * called from any thread and bypasses all multithreaded queues.
2419     */
2420    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2421       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2422       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2423                                   PIPE_MAP_DISCARD_RANGE)));
2424 
2425       struct pipe_context *pipe = tc->pipe;
2426       util_range_add(&tres->b, ttrans->valid_buffer_range,
2427                       transfer->box.x, transfer->box.x + transfer->box.width);
2428 
2429       pipe->buffer_unmap(pipe, transfer);
2430       return;
2431    }
2432 
2433    if (transfer->usage & PIPE_MAP_WRITE &&
2434        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2435       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2436 
2437    if (ttrans->cpu_storage_mapped) {
2438       /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2439        * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2440        * If that happens, we just ignore the unmap call and don't upload anything to prevent
2441        * a crash.
2442        *
2443        * Disallow the CPU storage in the driver to work around this.
2444        */
2445       assert(tres->cpu_storage);
2446 
2447       if (tres->cpu_storage) {
2448          tc_invalidate_buffer(tc, tres);
2449          tc_buffer_subdata(&tc->base, &tres->b,
2450                            PIPE_MAP_UNSYNCHRONIZED |
2451                            TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2452                            0, tres->b.width0, tres->cpu_storage);
2453          /* This shouldn't have been freed by buffer_subdata. */
2454          assert(tres->cpu_storage);
2455       } else {
2456          static bool warned_once = false;
2457          if (!warned_once) {
2458             fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2459             fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2460             warned_once = true;
2461          }
2462       }
2463 
2464       tc_drop_resource_reference(ttrans->staging);
2465       slab_free(&tc->pool_transfers, ttrans);
2466       return;
2467    }
2468 
2469    bool was_staging_transfer = false;
2470 
2471    if (ttrans->staging) {
2472       was_staging_transfer = true;
2473 
2474       tc_drop_resource_reference(ttrans->staging);
2475       slab_free(&tc->pool_transfers, ttrans);
2476    }
2477 
2478    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2479                                            tc_buffer_unmap);
2480    if (was_staging_transfer) {
2481       tc_set_resource_reference(&p->resource, &tres->b);
2482       p->was_staging_transfer = true;
2483    } else {
2484       p->transfer = transfer;
2485       p->was_staging_transfer = false;
2486    }
2487 
2488    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2489     * defers the unmap operation to the batch execution.
2490     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2491     * and if it goes over an optional limit the current batch is flushed,
2492     * to reclaim some RAM. */
2493    if (!ttrans->staging && tc->bytes_mapped_limit &&
2494        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2495       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2496    }
2497 }
2498 
2499 struct tc_texture_unmap {
2500    struct tc_call_base base;
2501    struct pipe_transfer *transfer;
2502 };
2503 
2504 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2505 tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2506 {
2507    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2508 
2509    pipe->texture_unmap(pipe, p->transfer);
2510    return call_size(tc_texture_unmap);
2511 }
2512 
2513 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2514 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2515 {
2516    struct threaded_context *tc = threaded_context(_pipe);
2517    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2518 
2519    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2520 
2521    /* tc_texture_map directly maps the textures, but tc_texture_unmap
2522     * defers the unmap operation to the batch execution.
2523     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2524     * and if it goes over an optional limit the current batch is flushed,
2525     * to reclaim some RAM. */
2526    if (!ttrans->staging && tc->bytes_mapped_limit &&
2527        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2528       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2529    }
2530 }
2531 
2532 struct tc_buffer_subdata {
2533    struct tc_call_base base;
2534    unsigned usage, offset, size;
2535    struct pipe_resource *resource;
2536    char slot[0]; /* more will be allocated if needed */
2537 };
2538 
2539 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2540 tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2541 {
2542    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2543 
2544    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2545                         p->slot);
2546    tc_drop_resource_reference(p->resource);
2547    return p->base.num_slots;
2548 }
2549 
2550 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)2551 tc_buffer_subdata(struct pipe_context *_pipe,
2552                   struct pipe_resource *resource,
2553                   unsigned usage, unsigned offset,
2554                   unsigned size, const void *data)
2555 {
2556    struct threaded_context *tc = threaded_context(_pipe);
2557    struct threaded_resource *tres = threaded_resource(resource);
2558 
2559    if (!size)
2560       return;
2561 
2562    usage |= PIPE_MAP_WRITE;
2563 
2564    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2565    if (!(usage & PIPE_MAP_DIRECTLY))
2566       usage |= PIPE_MAP_DISCARD_RANGE;
2567 
2568    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2569 
2570    /* Unsychronized and big transfers should use transfer_map. Also handle
2571     * full invalidations, because drivers aren't allowed to do them.
2572     */
2573    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2574                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2575        size > TC_MAX_SUBDATA_BYTES ||
2576        tres->cpu_storage) {
2577       struct pipe_transfer *transfer;
2578       struct pipe_box box;
2579       uint8_t *map = NULL;
2580 
2581       u_box_1d(offset, size, &box);
2582 
2583       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2584       if (map) {
2585          memcpy(map, data, size);
2586          tc_buffer_unmap(_pipe, transfer);
2587       }
2588       return;
2589    }
2590 
2591    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2592 
2593    /* The upload is small. Enqueue it. */
2594    struct tc_buffer_subdata *p =
2595       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2596 
2597    tc_set_resource_reference(&p->resource, resource);
2598    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2599     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2600     */
2601    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
2602    p->usage = usage;
2603    p->offset = offset;
2604    p->size = size;
2605    memcpy(p->slot, data, size);
2606 }
2607 
2608 struct tc_texture_subdata {
2609    struct tc_call_base base;
2610    unsigned level, usage, stride, layer_stride;
2611    struct pipe_box box;
2612    struct pipe_resource *resource;
2613    char slot[0]; /* more will be allocated if needed */
2614 };
2615 
2616 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2617 tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2618 {
2619    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2620 
2621    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2622                          p->slot, p->stride, p->layer_stride);
2623    tc_drop_resource_reference(p->resource);
2624    return p->base.num_slots;
2625 }
2626 
2627 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,unsigned layer_stride)2628 tc_texture_subdata(struct pipe_context *_pipe,
2629                    struct pipe_resource *resource,
2630                    unsigned level, unsigned usage,
2631                    const struct pipe_box *box,
2632                    const void *data, unsigned stride,
2633                    unsigned layer_stride)
2634 {
2635    struct threaded_context *tc = threaded_context(_pipe);
2636    unsigned size;
2637 
2638    assert(box->height >= 1);
2639    assert(box->depth >= 1);
2640 
2641    size = (box->depth - 1) * layer_stride +
2642           (box->height - 1) * stride +
2643           box->width * util_format_get_blocksize(resource->format);
2644    if (!size)
2645       return;
2646 
2647    /* Small uploads can be enqueued, big uploads must sync. */
2648    if (size <= TC_MAX_SUBDATA_BYTES) {
2649       struct tc_texture_subdata *p =
2650          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2651 
2652       tc_set_resource_reference(&p->resource, resource);
2653       p->level = level;
2654       p->usage = usage;
2655       p->box = *box;
2656       p->stride = stride;
2657       p->layer_stride = layer_stride;
2658       memcpy(p->slot, data, size);
2659    } else {
2660       struct pipe_context *pipe = tc->pipe;
2661 
2662       tc_sync(tc);
2663       tc_set_driver_thread(tc);
2664       pipe->texture_subdata(pipe, resource, level, usage, box, data,
2665                             stride, layer_stride);
2666       tc_clear_driver_thread(tc);
2667    }
2668 }
2669 
2670 
2671 /********************************************************************
2672  * miscellaneous
2673  */
2674 
2675 #define TC_FUNC_SYNC_RET0(ret_type, func) \
2676    static ret_type \
2677    tc_##func(struct pipe_context *_pipe) \
2678    { \
2679       struct threaded_context *tc = threaded_context(_pipe); \
2680       struct pipe_context *pipe = tc->pipe; \
2681       tc_sync(tc); \
2682       return pipe->func(pipe); \
2683    }
2684 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)2685 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2686 
2687 static void
2688 tc_get_sample_position(struct pipe_context *_pipe,
2689                        unsigned sample_count, unsigned sample_index,
2690                        float *out_value)
2691 {
2692    struct threaded_context *tc = threaded_context(_pipe);
2693    struct pipe_context *pipe = tc->pipe;
2694 
2695    tc_sync(tc);
2696    pipe->get_sample_position(pipe, sample_count, sample_index,
2697                              out_value);
2698 }
2699 
2700 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)2701 tc_get_device_reset_status(struct pipe_context *_pipe)
2702 {
2703    struct threaded_context *tc = threaded_context(_pipe);
2704    struct pipe_context *pipe = tc->pipe;
2705 
2706    if (!tc->options.unsynchronized_get_device_reset_status)
2707       tc_sync(tc);
2708 
2709    return pipe->get_device_reset_status(pipe);
2710 }
2711 
2712 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)2713 tc_set_device_reset_callback(struct pipe_context *_pipe,
2714                              const struct pipe_device_reset_callback *cb)
2715 {
2716    struct threaded_context *tc = threaded_context(_pipe);
2717    struct pipe_context *pipe = tc->pipe;
2718 
2719    tc_sync(tc);
2720    pipe->set_device_reset_callback(pipe, cb);
2721 }
2722 
2723 struct tc_string_marker {
2724    struct tc_call_base base;
2725    int len;
2726    char slot[0]; /* more will be allocated if needed */
2727 };
2728 
2729 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call,uint64_t * last)2730 tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2731 {
2732    struct tc_string_marker *p = (struct tc_string_marker *)call;
2733    pipe->emit_string_marker(pipe, p->slot, p->len);
2734    return p->base.num_slots;
2735 }
2736 
2737 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)2738 tc_emit_string_marker(struct pipe_context *_pipe,
2739                       const char *string, int len)
2740 {
2741    struct threaded_context *tc = threaded_context(_pipe);
2742 
2743    if (len <= TC_MAX_STRING_MARKER_BYTES) {
2744       struct tc_string_marker *p =
2745          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2746 
2747       memcpy(p->slot, string, len);
2748       p->len = len;
2749    } else {
2750       struct pipe_context *pipe = tc->pipe;
2751 
2752       tc_sync(tc);
2753       tc_set_driver_thread(tc);
2754       pipe->emit_string_marker(pipe, string, len);
2755       tc_clear_driver_thread(tc);
2756    }
2757 }
2758 
2759 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)2760 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2761                     unsigned flags)
2762 {
2763    struct threaded_context *tc = threaded_context(_pipe);
2764    struct pipe_context *pipe = tc->pipe;
2765 
2766    tc_sync(tc);
2767    pipe->dump_debug_state(pipe, stream, flags);
2768 }
2769 
2770 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct pipe_debug_callback * cb)2771 tc_set_debug_callback(struct pipe_context *_pipe,
2772                       const struct pipe_debug_callback *cb)
2773 {
2774    struct threaded_context *tc = threaded_context(_pipe);
2775    struct pipe_context *pipe = tc->pipe;
2776 
2777    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2778     * with this. shader-db will use an environment variable to disable
2779     * the threaded context.
2780     */
2781    if (cb && cb->debug_message && !cb->async)
2782       return;
2783 
2784    tc_sync(tc);
2785    pipe->set_debug_callback(pipe, cb);
2786 }
2787 
2788 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)2789 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2790 {
2791    struct threaded_context *tc = threaded_context(_pipe);
2792    struct pipe_context *pipe = tc->pipe;
2793 
2794    tc_sync(tc);
2795    pipe->set_log_context(pipe, log);
2796 }
2797 
2798 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)2799 tc_create_fence_fd(struct pipe_context *_pipe,
2800                    struct pipe_fence_handle **fence, int fd,
2801                    enum pipe_fd_type type)
2802 {
2803    struct threaded_context *tc = threaded_context(_pipe);
2804    struct pipe_context *pipe = tc->pipe;
2805 
2806    tc_sync(tc);
2807    pipe->create_fence_fd(pipe, fence, fd, type);
2808 }
2809 
2810 struct tc_fence_call {
2811    struct tc_call_base base;
2812    struct pipe_fence_handle *fence;
2813 };
2814 
2815 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call,uint64_t * last)2816 tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2817 {
2818    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2819 
2820    pipe->fence_server_sync(pipe, fence);
2821    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2822    return call_size(tc_fence_call);
2823 }
2824 
2825 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2826 tc_fence_server_sync(struct pipe_context *_pipe,
2827                      struct pipe_fence_handle *fence)
2828 {
2829    struct threaded_context *tc = threaded_context(_pipe);
2830    struct pipe_screen *screen = tc->pipe->screen;
2831    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2832                                             tc_fence_call);
2833 
2834    call->fence = NULL;
2835    screen->fence_reference(screen, &call->fence, fence);
2836 }
2837 
2838 static uint16_t
tc_call_fence_server_signal(struct pipe_context * pipe,void * call,uint64_t * last)2839 tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last)
2840 {
2841    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2842 
2843    pipe->fence_server_signal(pipe, fence);
2844    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2845    return call_size(tc_fence_call);
2846 }
2847 
2848 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2849 tc_fence_server_signal(struct pipe_context *_pipe,
2850                            struct pipe_fence_handle *fence)
2851 {
2852    struct threaded_context *tc = threaded_context(_pipe);
2853    struct pipe_screen *screen = tc->pipe->screen;
2854    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal,
2855                                             tc_fence_call);
2856 
2857    call->fence = NULL;
2858    screen->fence_reference(screen, &call->fence, fence);
2859 }
2860 
2861 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)2862 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2863                       UNUSED const struct pipe_video_codec *templ)
2864 {
2865    unreachable("Threaded context should not be enabled for video APIs");
2866    return NULL;
2867 }
2868 
2869 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)2870 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2871                        UNUSED const struct pipe_video_buffer *templ)
2872 {
2873    unreachable("Threaded context should not be enabled for video APIs");
2874    return NULL;
2875 }
2876 
2877 struct tc_context_param {
2878    struct tc_call_base base;
2879    enum pipe_context_param param;
2880    unsigned value;
2881 };
2882 
2883 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call,uint64_t * last)2884 tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2885 {
2886    struct tc_context_param *p = to_call(call, tc_context_param);
2887 
2888    if (pipe->set_context_param)
2889       pipe->set_context_param(pipe, p->param, p->value);
2890 
2891    return call_size(tc_context_param);
2892 }
2893 
2894 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)2895 tc_set_context_param(struct pipe_context *_pipe,
2896                            enum pipe_context_param param,
2897                            unsigned value)
2898 {
2899    struct threaded_context *tc = threaded_context(_pipe);
2900 
2901    if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2902       /* Pin the gallium thread as requested. */
2903       util_set_thread_affinity(tc->queue.threads[0],
2904                                util_get_cpu_caps()->L3_affinity_mask[value],
2905                                NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2906 
2907       /* Execute this immediately (without enqueuing).
2908        * It's required to be thread-safe.
2909        */
2910       struct pipe_context *pipe = tc->pipe;
2911       if (pipe->set_context_param)
2912          pipe->set_context_param(pipe, param, value);
2913       return;
2914    }
2915 
2916    if (tc->pipe->set_context_param) {
2917       struct tc_context_param *call =
2918          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2919 
2920       call->param = param;
2921       call->value = value;
2922    }
2923 }
2924 
2925 
2926 /********************************************************************
2927  * draw, launch, clear, blit, copy, flush
2928  */
2929 
2930 struct tc_flush_call {
2931    struct tc_call_base base;
2932    unsigned flags;
2933    struct threaded_context *tc;
2934    struct pipe_fence_handle *fence;
2935 };
2936 
2937 static void
tc_flush_queries(struct threaded_context * tc)2938 tc_flush_queries(struct threaded_context *tc)
2939 {
2940    struct threaded_query *tq, *tmp;
2941    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
2942       list_del(&tq->head_unflushed);
2943 
2944       /* Memory release semantics: due to a possible race with
2945        * tc_get_query_result, we must ensure that the linked list changes
2946        * are visible before setting tq->flushed.
2947        */
2948       p_atomic_set(&tq->flushed, true);
2949    }
2950 }
2951 
2952 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call,uint64_t * last)2953 tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
2954 {
2955    struct tc_flush_call *p = to_call(call, tc_flush_call);
2956    struct pipe_screen *screen = pipe->screen;
2957 
2958    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
2959    screen->fence_reference(screen, &p->fence, NULL);
2960 
2961    if (!(p->flags & PIPE_FLUSH_DEFERRED))
2962       tc_flush_queries(p->tc);
2963 
2964    return call_size(tc_flush_call);
2965 }
2966 
2967 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)2968 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2969          unsigned flags)
2970 {
2971    struct threaded_context *tc = threaded_context(_pipe);
2972    struct pipe_context *pipe = tc->pipe;
2973    struct pipe_screen *screen = pipe->screen;
2974    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
2975 
2976    if (async && tc->options.create_fence) {
2977       if (fence) {
2978          struct tc_batch *next = &tc->batch_slots[tc->next];
2979 
2980          if (!next->token) {
2981             next->token = malloc(sizeof(*next->token));
2982             if (!next->token)
2983                goto out_of_memory;
2984 
2985             pipe_reference_init(&next->token->ref, 1);
2986             next->token->tc = tc;
2987          }
2988 
2989          screen->fence_reference(screen, fence,
2990                                  tc->options.create_fence(pipe, next->token));
2991          if (!*fence)
2992             goto out_of_memory;
2993       }
2994 
2995       struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
2996       p->tc = tc;
2997       p->fence = fence ? *fence : NULL;
2998       p->flags = flags | TC_FLUSH_ASYNC;
2999 
3000       if (!(flags & PIPE_FLUSH_DEFERRED))
3001          tc_batch_flush(tc);
3002       return;
3003    }
3004 
3005 out_of_memory:
3006    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3007                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3008 
3009    if (!(flags & PIPE_FLUSH_DEFERRED))
3010       tc_flush_queries(tc);
3011    tc_set_driver_thread(tc);
3012    pipe->flush(pipe, fence, flags);
3013    tc_clear_driver_thread(tc);
3014 }
3015 
3016 struct tc_draw_single {
3017    struct tc_call_base base;
3018    unsigned index_bias;
3019    struct pipe_draw_info info;
3020 };
3021 
3022 struct tc_draw_single_drawid {
3023    struct tc_draw_single base;
3024    unsigned drawid_offset;
3025 };
3026 
3027 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call,uint64_t * last)3028 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
3029 {
3030    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3031    struct tc_draw_single *info = &info_drawid->base;
3032 
3033    /* u_threaded_context stores start/count in min/max_index for single draws. */
3034    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3035    struct pipe_draw_start_count_bias draw;
3036 
3037    draw.start = info->info.min_index;
3038    draw.count = info->info.max_index;
3039    draw.index_bias = info->index_bias;
3040 
3041    info->info.index_bounds_valid = false;
3042    info->info.has_user_indices = false;
3043    info->info.take_index_buffer_ownership = false;
3044 
3045    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3046    if (info->info.index_size)
3047       tc_drop_resource_reference(info->info.index.resource);
3048 
3049    return call_size(tc_draw_single_drawid);
3050 }
3051 
3052 static void
simplify_draw_info(struct pipe_draw_info * info)3053 simplify_draw_info(struct pipe_draw_info *info)
3054 {
3055    /* Clear these fields to facilitate draw merging.
3056     * Drivers shouldn't use them.
3057     */
3058    info->has_user_indices = false;
3059    info->index_bounds_valid = false;
3060    info->take_index_buffer_ownership = false;
3061    info->index_bias_varies = false;
3062    info->_pad = 0;
3063 
3064    /* This shouldn't be set when merging single draws. */
3065    info->increment_draw_id = false;
3066 
3067    if (info->index_size) {
3068       if (!info->primitive_restart)
3069          info->restart_index = 0;
3070    } else {
3071       assert(!info->primitive_restart);
3072       info->primitive_restart = false;
3073       info->restart_index = 0;
3074       info->index.resource = NULL;
3075    }
3076 }
3077 
3078 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)3079 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3080                               struct tc_draw_single *next)
3081 {
3082    if (next->base.call_id != TC_CALL_draw_single)
3083       return false;
3084 
3085    simplify_draw_info(&next->info);
3086 
3087    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3088                  sizeof(struct pipe_draw_info) - 8);
3089    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3090                  sizeof(struct pipe_draw_info) - 4);
3091    /* All fields must be the same except start and count. */
3092    /* u_threaded_context stores start/count in min/max_index for single draws. */
3093    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3094                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3095 }
3096 
3097 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3098 tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3099 {
3100    /* Draw call merging. */
3101    struct tc_draw_single *first = to_call(call, tc_draw_single);
3102    struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3103    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3104 
3105    /* If at least 2 consecutive draw calls can be merged... */
3106    if (next != last &&
3107        next->base.call_id == TC_CALL_draw_single) {
3108       simplify_draw_info(&first->info);
3109 
3110       if (is_next_call_a_mergeable_draw(first, next)) {
3111          /* The maximum number of merged draws is given by the batch size. */
3112          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3113          unsigned num_draws = 2;
3114          bool index_bias_varies = first->index_bias != next->index_bias;
3115 
3116          /* u_threaded_context stores start/count in min/max_index for single draws. */
3117          multi[0].start = first->info.min_index;
3118          multi[0].count = first->info.max_index;
3119          multi[0].index_bias = first->index_bias;
3120          multi[1].start = next->info.min_index;
3121          multi[1].count = next->info.max_index;
3122          multi[1].index_bias = next->index_bias;
3123 
3124          /* Find how many other draws can be merged. */
3125          next = get_next_call(next, tc_draw_single);
3126          for (; next != last && is_next_call_a_mergeable_draw(first, next);
3127               next = get_next_call(next, tc_draw_single), num_draws++) {
3128             /* u_threaded_context stores start/count in min/max_index for single draws. */
3129             multi[num_draws].start = next->info.min_index;
3130             multi[num_draws].count = next->info.max_index;
3131             multi[num_draws].index_bias = next->index_bias;
3132             index_bias_varies |= first->index_bias != next->index_bias;
3133          }
3134 
3135          first->info.index_bias_varies = index_bias_varies;
3136          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3137 
3138          /* Since all draws use the same index buffer, drop all references at once. */
3139          if (first->info.index_size)
3140             pipe_drop_resource_references(first->info.index.resource, num_draws);
3141 
3142          return call_size(tc_draw_single) * num_draws;
3143       }
3144    }
3145 
3146    /* u_threaded_context stores start/count in min/max_index for single draws. */
3147    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3148    struct pipe_draw_start_count_bias draw;
3149 
3150    draw.start = first->info.min_index;
3151    draw.count = first->info.max_index;
3152    draw.index_bias = first->index_bias;
3153 
3154    first->info.index_bounds_valid = false;
3155    first->info.has_user_indices = false;
3156    first->info.take_index_buffer_ownership = false;
3157 
3158    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3159    if (first->info.index_size)
3160       tc_drop_resource_reference(first->info.index.resource);
3161 
3162    return call_size(tc_draw_single);
3163 }
3164 
3165 struct tc_draw_indirect {
3166    struct tc_call_base base;
3167    struct pipe_draw_start_count_bias draw;
3168    struct pipe_draw_info info;
3169    struct pipe_draw_indirect_info indirect;
3170 };
3171 
3172 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call,uint64_t * last)3173 tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3174 {
3175    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3176 
3177    info->info.index_bounds_valid = false;
3178    info->info.take_index_buffer_ownership = false;
3179 
3180    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3181    if (info->info.index_size)
3182       tc_drop_resource_reference(info->info.index.resource);
3183 
3184    tc_drop_resource_reference(info->indirect.buffer);
3185    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3186    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3187    return call_size(tc_draw_indirect);
3188 }
3189 
3190 struct tc_draw_multi {
3191    struct tc_call_base base;
3192    unsigned num_draws;
3193    struct pipe_draw_info info;
3194    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3195 };
3196 
3197 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call,uint64_t * last)3198 tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3199 {
3200    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3201 
3202    info->info.has_user_indices = false;
3203    info->info.index_bounds_valid = false;
3204    info->info.take_index_buffer_ownership = false;
3205 
3206    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3207    if (info->info.index_size)
3208       tc_drop_resource_reference(info->info.index.resource);
3209 
3210    return info->base.num_slots;
3211 }
3212 
3213 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3214    offsetof(struct pipe_draw_info, index)
3215 
3216 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3217 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3218             unsigned drawid_offset,
3219             const struct pipe_draw_indirect_info *indirect,
3220             const struct pipe_draw_start_count_bias *draws,
3221             unsigned num_draws)
3222 {
3223    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3224                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3225 
3226    struct threaded_context *tc = threaded_context(_pipe);
3227    unsigned index_size = info->index_size;
3228    bool has_user_indices = info->has_user_indices;
3229 
3230    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3231       tc_add_all_gfx_bindings_to_buffer_list(tc);
3232 
3233    if (unlikely(indirect)) {
3234       assert(!has_user_indices);
3235       assert(num_draws == 1);
3236 
3237       struct tc_draw_indirect *p =
3238          tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3239       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3240 
3241       if (index_size) {
3242          if (!info->take_index_buffer_ownership) {
3243             tc_set_resource_reference(&p->info.index.resource,
3244                                       info->index.resource);
3245          }
3246          tc_add_to_buffer_list(next, info->index.resource);
3247       }
3248       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3249 
3250       tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3251       tc_set_resource_reference(&p->indirect.indirect_draw_count,
3252                                 indirect->indirect_draw_count);
3253       p->indirect.count_from_stream_output = NULL;
3254       pipe_so_target_reference(&p->indirect.count_from_stream_output,
3255                                indirect->count_from_stream_output);
3256 
3257       if (indirect->buffer)
3258          tc_add_to_buffer_list(next, indirect->buffer);
3259       if (indirect->indirect_draw_count)
3260          tc_add_to_buffer_list(next, indirect->indirect_draw_count);
3261       if (indirect->count_from_stream_output)
3262          tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
3263 
3264       memcpy(&p->indirect, indirect, sizeof(*indirect));
3265       p->draw.start = draws[0].start;
3266       return;
3267    }
3268 
3269    if (num_draws == 1) {
3270       /* Single draw. */
3271       if (index_size && has_user_indices) {
3272          unsigned size = draws[0].count * index_size;
3273          struct pipe_resource *buffer = NULL;
3274          unsigned offset;
3275 
3276          if (!size)
3277             return;
3278 
3279          /* This must be done before adding draw_vbo, because it could generate
3280           * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3281           * to the driver if it was done afterwards.
3282           */
3283          u_upload_data(tc->base.stream_uploader, 0, size, 4,
3284                        (uint8_t*)info->index.user + draws[0].start * index_size,
3285                        &offset, &buffer);
3286          if (unlikely(!buffer))
3287             return;
3288 
3289          struct tc_draw_single *p = drawid_offset > 0 ?
3290             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3291             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3292          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3293          p->info.index.resource = buffer;
3294          if (drawid_offset > 0)
3295             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3296          /* u_threaded_context stores start/count in min/max_index for single draws. */
3297          p->info.min_index = offset >> util_logbase2(index_size);
3298          p->info.max_index = draws[0].count;
3299          p->index_bias = draws[0].index_bias;
3300       } else {
3301          /* Non-indexed call or indexed with a real index buffer. */
3302          struct tc_draw_single *p = drawid_offset > 0 ?
3303             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3304             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3305          if (index_size) {
3306             if (!info->take_index_buffer_ownership) {
3307                tc_set_resource_reference(&p->info.index.resource,
3308                                          info->index.resource);
3309             }
3310             tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3311          }
3312          if (drawid_offset > 0)
3313             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3314          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3315          /* u_threaded_context stores start/count in min/max_index for single draws. */
3316          p->info.min_index = draws[0].start;
3317          p->info.max_index = draws[0].count;
3318          p->index_bias = draws[0].index_bias;
3319       }
3320       return;
3321    }
3322 
3323    const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3324    const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3325    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3326                                                sizeof(struct tc_call_base));
3327    /* Multi draw. */
3328    if (index_size && has_user_indices) {
3329       struct pipe_resource *buffer = NULL;
3330       unsigned buffer_offset, total_count = 0;
3331       unsigned index_size_shift = util_logbase2(index_size);
3332       uint8_t *ptr = NULL;
3333 
3334       /* Get the total count. */
3335       for (unsigned i = 0; i < num_draws; i++)
3336          total_count += draws[i].count;
3337 
3338       if (!total_count)
3339          return;
3340 
3341       /* Allocate space for all index buffers.
3342        *
3343        * This must be done before adding draw_vbo, because it could generate
3344        * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3345        * to the driver if it was done afterwards.
3346        */
3347       u_upload_alloc(tc->base.stream_uploader, 0,
3348                      total_count << index_size_shift, 4,
3349                      &buffer_offset, &buffer, (void**)&ptr);
3350       if (unlikely(!buffer))
3351          return;
3352 
3353       int total_offset = 0;
3354       while (num_draws) {
3355          struct tc_batch *next = &tc->batch_slots[tc->next];
3356 
3357          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3358          /* If there isn't enough place for one draw, try to fill the next one */
3359          if (nb_slots_left < slots_for_one_draw)
3360             nb_slots_left = TC_SLOTS_PER_BATCH;
3361          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3362 
3363          /* How many draws can we fit in the current batch */
3364          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3365 
3366          struct tc_draw_multi *p =
3367             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3368                                    dr);
3369          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3370          p->info.index.resource = buffer;
3371          p->num_draws = dr;
3372 
3373          /* Upload index buffers. */
3374          for (unsigned i = 0, offset = 0; i < dr; i++) {
3375             unsigned count = draws[i + total_offset].count;
3376 
3377             if (!count) {
3378                p->slot[i].start = 0;
3379                p->slot[i].count = 0;
3380                p->slot[i].index_bias = 0;
3381                continue;
3382             }
3383 
3384             unsigned size = count << index_size_shift;
3385             memcpy(ptr + offset,
3386                    (uint8_t*)info->index.user +
3387                    (draws[i + total_offset].start << index_size_shift), size);
3388             p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3389             p->slot[i].count = count;
3390             p->slot[i].index_bias = draws[i + total_offset].index_bias;
3391             offset += size;
3392          }
3393 
3394          total_offset += dr;
3395          num_draws -= dr;
3396       }
3397    } else {
3398       int total_offset = 0;
3399       bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3400       while (num_draws) {
3401          struct tc_batch *next = &tc->batch_slots[tc->next];
3402 
3403          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3404          /* If there isn't enough place for one draw, try to fill the next one */
3405          if (nb_slots_left < slots_for_one_draw)
3406             nb_slots_left = TC_SLOTS_PER_BATCH;
3407          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3408 
3409          /* How many draws can we fit in the current batch */
3410          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3411 
3412          /* Non-indexed call or indexed with a real index buffer. */
3413          struct tc_draw_multi *p =
3414             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3415                                    dr);
3416          if (index_size) {
3417             if (!take_index_buffer_ownership) {
3418                tc_set_resource_reference(&p->info.index.resource,
3419                                          info->index.resource);
3420             }
3421             tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3422          }
3423          take_index_buffer_ownership = false;
3424          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3425          p->num_draws = dr;
3426          memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3427          num_draws -= dr;
3428 
3429          total_offset += dr;
3430       }
3431    }
3432 }
3433 
3434 struct tc_draw_vstate_single {
3435    struct tc_call_base base;
3436    struct pipe_draw_start_count_bias draw;
3437 
3438    /* The following states must be together without holes because they are
3439     * compared by draw merging.
3440     */
3441    struct pipe_vertex_state *state;
3442    uint32_t partial_velem_mask;
3443    struct pipe_draw_vertex_state_info info;
3444 };
3445 
3446 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)3447 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3448                                      struct tc_draw_vstate_single *next)
3449 {
3450    if (next->base.call_id != TC_CALL_draw_vstate_single)
3451       return false;
3452 
3453    return !memcmp(&first->state, &next->state,
3454                   offsetof(struct tc_draw_vstate_single, info) +
3455                   sizeof(struct pipe_draw_vertex_state_info) -
3456                   offsetof(struct tc_draw_vstate_single, state));
3457 }
3458 
3459 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3460 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3461 {
3462    /* Draw call merging. */
3463    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3464    struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3465    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3466 
3467    /* If at least 2 consecutive draw calls can be merged... */
3468    if (next != last &&
3469        is_next_call_a_mergeable_draw_vstate(first, next)) {
3470       /* The maximum number of merged draws is given by the batch size. */
3471       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3472                                               call_size(tc_draw_vstate_single)];
3473       unsigned num_draws = 2;
3474 
3475       draws[0] = first->draw;
3476       draws[1] = next->draw;
3477 
3478       /* Find how many other draws can be merged. */
3479       next = get_next_call(next, tc_draw_vstate_single);
3480       for (; next != last &&
3481            is_next_call_a_mergeable_draw_vstate(first, next);
3482            next = get_next_call(next, tc_draw_vstate_single),
3483            num_draws++)
3484          draws[num_draws] = next->draw;
3485 
3486       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3487                               first->info, draws, num_draws);
3488       /* Since all draws use the same state, drop all references at once. */
3489       tc_drop_vertex_state_references(first->state, num_draws);
3490 
3491       return call_size(tc_draw_vstate_single) * num_draws;
3492    }
3493 
3494    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3495                            first->info, &first->draw, 1);
3496    tc_drop_vertex_state_references(first->state, 1);
3497    return call_size(tc_draw_vstate_single);
3498 }
3499 
3500 struct tc_draw_vstate_multi {
3501    struct tc_call_base base;
3502    uint32_t partial_velem_mask;
3503    struct pipe_draw_vertex_state_info info;
3504    unsigned num_draws;
3505    struct pipe_vertex_state *state;
3506    struct pipe_draw_start_count_bias slot[0];
3507 };
3508 
3509 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call,uint64_t * last)3510 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3511 {
3512    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3513 
3514    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3515                            info->info, info->slot, info->num_draws);
3516    tc_drop_vertex_state_references(info->state, 1);
3517    return info->base.num_slots;
3518 }
3519 
3520 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3521 tc_draw_vertex_state(struct pipe_context *_pipe,
3522                      struct pipe_vertex_state *state,
3523                      uint32_t partial_velem_mask,
3524                      struct pipe_draw_vertex_state_info info,
3525                      const struct pipe_draw_start_count_bias *draws,
3526                      unsigned num_draws)
3527 {
3528    struct threaded_context *tc = threaded_context(_pipe);
3529 
3530    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3531       tc_add_all_gfx_bindings_to_buffer_list(tc);
3532 
3533    if (num_draws == 1) {
3534       /* Single draw. */
3535       struct tc_draw_vstate_single *p =
3536          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3537       p->partial_velem_mask = partial_velem_mask;
3538       p->draw = draws[0];
3539       p->info.mode = info.mode;
3540       p->info.take_vertex_state_ownership = false;
3541 
3542       /* This should be always 0 for simplicity because we assume that
3543        * index_bias doesn't vary.
3544        */
3545       assert(draws[0].index_bias == 0);
3546 
3547       if (!info.take_vertex_state_ownership)
3548          tc_set_vertex_state_reference(&p->state, state);
3549       else
3550          p->state = state;
3551       return;
3552    }
3553 
3554    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3555    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3556    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3557                                                sizeof(struct tc_call_base));
3558    /* Multi draw. */
3559    int total_offset = 0;
3560    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3561    while (num_draws) {
3562       struct tc_batch *next = &tc->batch_slots[tc->next];
3563 
3564       int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3565       /* If there isn't enough place for one draw, try to fill the next one */
3566       if (nb_slots_left < slots_for_one_draw)
3567          nb_slots_left = TC_SLOTS_PER_BATCH;
3568       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3569 
3570       /* How many draws can we fit in the current batch */
3571       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3572 
3573       /* Non-indexed call or indexed with a real index buffer. */
3574       struct tc_draw_vstate_multi *p =
3575          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3576 
3577       if (!take_vertex_state_ownership)
3578          tc_set_vertex_state_reference(&p->state, state);
3579       else
3580          p->state = state;
3581 
3582       take_vertex_state_ownership = false;
3583       p->partial_velem_mask = partial_velem_mask;
3584       p->info.mode = info.mode;
3585       p->info.take_vertex_state_ownership = false;
3586       p->num_draws = dr;
3587       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3588       num_draws -= dr;
3589 
3590       total_offset += dr;
3591    }
3592 }
3593 
3594 struct tc_launch_grid_call {
3595    struct tc_call_base base;
3596    struct pipe_grid_info info;
3597 };
3598 
3599 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call,uint64_t * last)3600 tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3601 {
3602    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3603 
3604    pipe->launch_grid(pipe, p);
3605    tc_drop_resource_reference(p->indirect);
3606    return call_size(tc_launch_grid_call);
3607 }
3608 
3609 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)3610 tc_launch_grid(struct pipe_context *_pipe,
3611                const struct pipe_grid_info *info)
3612 {
3613    struct threaded_context *tc = threaded_context(_pipe);
3614    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3615                                                tc_launch_grid_call);
3616    assert(info->input == NULL);
3617 
3618    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3619       tc_add_all_compute_bindings_to_buffer_list(tc);
3620 
3621    tc_set_resource_reference(&p->info.indirect, info->indirect);
3622    memcpy(&p->info, info, sizeof(*info));
3623 
3624    if (info->indirect)
3625       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
3626 }
3627 
3628 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call,uint64_t * last)3629 tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3630 {
3631    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3632 
3633    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3634                               p->dstz, p->src, p->src_level, &p->src_box);
3635    tc_drop_resource_reference(p->dst);
3636    tc_drop_resource_reference(p->src);
3637    return call_size(tc_resource_copy_region);
3638 }
3639 
3640 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)3641 tc_resource_copy_region(struct pipe_context *_pipe,
3642                         struct pipe_resource *dst, unsigned dst_level,
3643                         unsigned dstx, unsigned dsty, unsigned dstz,
3644                         struct pipe_resource *src, unsigned src_level,
3645                         const struct pipe_box *src_box)
3646 {
3647    struct threaded_context *tc = threaded_context(_pipe);
3648    struct threaded_resource *tdst = threaded_resource(dst);
3649    struct tc_resource_copy_region *p =
3650       tc_add_call(tc, TC_CALL_resource_copy_region,
3651                   tc_resource_copy_region);
3652 
3653    if (dst->target == PIPE_BUFFER)
3654       tc_buffer_disable_cpu_storage(dst);
3655 
3656    tc_set_resource_reference(&p->dst, dst);
3657    p->dst_level = dst_level;
3658    p->dstx = dstx;
3659    p->dsty = dsty;
3660    p->dstz = dstz;
3661    tc_set_resource_reference(&p->src, src);
3662    p->src_level = src_level;
3663    p->src_box = *src_box;
3664 
3665    if (dst->target == PIPE_BUFFER) {
3666       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3667 
3668       tc_add_to_buffer_list(next, src);
3669       tc_add_to_buffer_list(next, dst);
3670 
3671       util_range_add(&tdst->b, &tdst->valid_buffer_range,
3672                      dstx, dstx + src_box->width);
3673    }
3674 }
3675 
3676 struct tc_blit_call {
3677    struct tc_call_base base;
3678    struct pipe_blit_info info;
3679 };
3680 
3681 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call,uint64_t * last)3682 tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3683 {
3684    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3685 
3686    pipe->blit(pipe, blit);
3687    tc_drop_resource_reference(blit->dst.resource);
3688    tc_drop_resource_reference(blit->src.resource);
3689    return call_size(tc_blit_call);
3690 }
3691 
3692 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)3693 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3694 {
3695    struct threaded_context *tc = threaded_context(_pipe);
3696    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3697 
3698    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3699    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3700    memcpy(&blit->info, info, sizeof(*info));
3701 }
3702 
3703 struct tc_generate_mipmap {
3704    struct tc_call_base base;
3705    enum pipe_format format;
3706    unsigned base_level;
3707    unsigned last_level;
3708    unsigned first_layer;
3709    unsigned last_layer;
3710    struct pipe_resource *res;
3711 };
3712 
3713 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call,uint64_t * last)3714 tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3715 {
3716    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3717    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3718                                                     p->base_level,
3719                                                     p->last_level,
3720                                                     p->first_layer,
3721                                                     p->last_layer);
3722    assert(result);
3723    tc_drop_resource_reference(p->res);
3724    return call_size(tc_generate_mipmap);
3725 }
3726 
3727 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)3728 tc_generate_mipmap(struct pipe_context *_pipe,
3729                    struct pipe_resource *res,
3730                    enum pipe_format format,
3731                    unsigned base_level,
3732                    unsigned last_level,
3733                    unsigned first_layer,
3734                    unsigned last_layer)
3735 {
3736    struct threaded_context *tc = threaded_context(_pipe);
3737    struct pipe_context *pipe = tc->pipe;
3738    struct pipe_screen *screen = pipe->screen;
3739    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3740 
3741    if (util_format_is_depth_or_stencil(format))
3742       bind = PIPE_BIND_DEPTH_STENCIL;
3743    else
3744       bind = PIPE_BIND_RENDER_TARGET;
3745 
3746    if (!screen->is_format_supported(screen, format, res->target,
3747                                     res->nr_samples, res->nr_storage_samples,
3748                                     bind))
3749       return false;
3750 
3751    struct tc_generate_mipmap *p =
3752       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3753 
3754    tc_set_resource_reference(&p->res, res);
3755    p->format = format;
3756    p->base_level = base_level;
3757    p->last_level = last_level;
3758    p->first_layer = first_layer;
3759    p->last_layer = last_layer;
3760    return true;
3761 }
3762 
3763 struct tc_resource_call {
3764    struct tc_call_base base;
3765    struct pipe_resource *resource;
3766 };
3767 
3768 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call,uint64_t * last)3769 tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3770 {
3771    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3772 
3773    pipe->flush_resource(pipe, resource);
3774    tc_drop_resource_reference(resource);
3775    return call_size(tc_resource_call);
3776 }
3777 
3778 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3779 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3780 {
3781    struct threaded_context *tc = threaded_context(_pipe);
3782    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3783                                                tc_resource_call);
3784 
3785    tc_set_resource_reference(&call->resource, resource);
3786 }
3787 
3788 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call,uint64_t * last)3789 tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3790 {
3791    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3792 
3793    pipe->invalidate_resource(pipe, resource);
3794    tc_drop_resource_reference(resource);
3795    return call_size(tc_resource_call);
3796 }
3797 
3798 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3799 tc_invalidate_resource(struct pipe_context *_pipe,
3800                        struct pipe_resource *resource)
3801 {
3802    struct threaded_context *tc = threaded_context(_pipe);
3803 
3804    if (resource->target == PIPE_BUFFER) {
3805       tc_invalidate_buffer(tc, threaded_resource(resource));
3806       return;
3807    }
3808 
3809    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3810                                                tc_resource_call);
3811    tc_set_resource_reference(&call->resource, resource);
3812 }
3813 
3814 struct tc_clear {
3815    struct tc_call_base base;
3816    bool scissor_state_set;
3817    uint8_t stencil;
3818    uint16_t buffers;
3819    float depth;
3820    struct pipe_scissor_state scissor_state;
3821    union pipe_color_union color;
3822 };
3823 
3824 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call,uint64_t * last)3825 tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3826 {
3827    struct tc_clear *p = to_call(call, tc_clear);
3828 
3829    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3830    return call_size(tc_clear);
3831 }
3832 
3833 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)3834 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3835          const union pipe_color_union *color, double depth,
3836          unsigned stencil)
3837 {
3838    struct threaded_context *tc = threaded_context(_pipe);
3839    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3840 
3841    p->buffers = buffers;
3842    if (scissor_state)
3843       p->scissor_state = *scissor_state;
3844    p->scissor_state_set = !!scissor_state;
3845    p->color = *color;
3846    p->depth = depth;
3847    p->stencil = stencil;
3848 }
3849 
3850 struct tc_clear_render_target {
3851    struct tc_call_base base;
3852    bool render_condition_enabled;
3853    unsigned dstx;
3854    unsigned dsty;
3855    unsigned width;
3856    unsigned height;
3857    union pipe_color_union color;
3858    struct pipe_surface *dst;
3859 };
3860 
3861 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call,uint64_t * last)3862 tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3863 {
3864    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3865 
3866    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3867                              p->render_condition_enabled);
3868    tc_drop_surface_reference(p->dst);
3869    return call_size(tc_clear_render_target);
3870 }
3871 
3872 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3873 tc_clear_render_target(struct pipe_context *_pipe,
3874                        struct pipe_surface *dst,
3875                        const union pipe_color_union *color,
3876                        unsigned dstx, unsigned dsty,
3877                        unsigned width, unsigned height,
3878                        bool render_condition_enabled)
3879 {
3880    struct threaded_context *tc = threaded_context(_pipe);
3881    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3882    p->dst = NULL;
3883    pipe_surface_reference(&p->dst, dst);
3884    p->color = *color;
3885    p->dstx = dstx;
3886    p->dsty = dsty;
3887    p->width = width;
3888    p->height = height;
3889    p->render_condition_enabled = render_condition_enabled;
3890 }
3891 
3892 
3893 struct tc_clear_depth_stencil {
3894    struct tc_call_base base;
3895    bool render_condition_enabled;
3896    float depth;
3897    unsigned clear_flags;
3898    unsigned stencil;
3899    unsigned dstx;
3900    unsigned dsty;
3901    unsigned width;
3902    unsigned height;
3903    struct pipe_surface *dst;
3904 };
3905 
3906 
3907 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call,uint64_t * last)3908 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
3909 {
3910    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
3911 
3912    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
3913                              p->dstx, p->dsty, p->width, p->height,
3914                              p->render_condition_enabled);
3915    tc_drop_surface_reference(p->dst);
3916    return call_size(tc_clear_depth_stencil);
3917 }
3918 
3919 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3920 tc_clear_depth_stencil(struct pipe_context *_pipe,
3921                        struct pipe_surface *dst, unsigned clear_flags,
3922                        double depth, unsigned stencil, unsigned dstx,
3923                        unsigned dsty, unsigned width, unsigned height,
3924                        bool render_condition_enabled)
3925 {
3926    struct threaded_context *tc = threaded_context(_pipe);
3927    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
3928    p->dst = NULL;
3929    pipe_surface_reference(&p->dst, dst);
3930    p->clear_flags = clear_flags;
3931    p->depth = depth;
3932    p->stencil = stencil;
3933    p->dstx = dstx;
3934    p->dsty = dsty;
3935    p->width = width;
3936    p->height = height;
3937    p->render_condition_enabled = render_condition_enabled;
3938 }
3939 
3940 struct tc_clear_buffer {
3941    struct tc_call_base base;
3942    uint8_t clear_value_size;
3943    unsigned offset;
3944    unsigned size;
3945    char clear_value[16];
3946    struct pipe_resource *res;
3947 };
3948 
3949 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call,uint64_t * last)3950 tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
3951 {
3952    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
3953 
3954    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
3955                       p->clear_value_size);
3956    tc_drop_resource_reference(p->res);
3957    return call_size(tc_clear_buffer);
3958 }
3959 
3960 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)3961 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
3962                 unsigned offset, unsigned size,
3963                 const void *clear_value, int clear_value_size)
3964 {
3965    struct threaded_context *tc = threaded_context(_pipe);
3966    struct threaded_resource *tres = threaded_resource(res);
3967    struct tc_clear_buffer *p =
3968       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
3969 
3970    tc_buffer_disable_cpu_storage(res);
3971 
3972    tc_set_resource_reference(&p->res, res);
3973    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
3974    p->offset = offset;
3975    p->size = size;
3976    memcpy(p->clear_value, clear_value, clear_value_size);
3977    p->clear_value_size = clear_value_size;
3978 
3979    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3980 }
3981 
3982 struct tc_clear_texture {
3983    struct tc_call_base base;
3984    unsigned level;
3985    struct pipe_box box;
3986    char data[16];
3987    struct pipe_resource *res;
3988 };
3989 
3990 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call,uint64_t * last)3991 tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
3992 {
3993    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
3994 
3995    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
3996    tc_drop_resource_reference(p->res);
3997    return call_size(tc_clear_texture);
3998 }
3999 
4000 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)4001 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4002                  unsigned level, const struct pipe_box *box, const void *data)
4003 {
4004    struct threaded_context *tc = threaded_context(_pipe);
4005    struct tc_clear_texture *p =
4006       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4007 
4008    tc_set_resource_reference(&p->res, res);
4009    p->level = level;
4010    p->box = *box;
4011    memcpy(p->data, data,
4012           util_format_get_blocksize(res->format));
4013 }
4014 
4015 struct tc_resource_commit {
4016    struct tc_call_base base;
4017    bool commit;
4018    unsigned level;
4019    struct pipe_box box;
4020    struct pipe_resource *res;
4021 };
4022 
4023 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call,uint64_t * last)4024 tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
4025 {
4026    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4027 
4028    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4029    tc_drop_resource_reference(p->res);
4030    return call_size(tc_resource_commit);
4031 }
4032 
4033 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)4034 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4035                    unsigned level, struct pipe_box *box, bool commit)
4036 {
4037    struct threaded_context *tc = threaded_context(_pipe);
4038    struct tc_resource_commit *p =
4039       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4040 
4041    tc_set_resource_reference(&p->res, res);
4042    p->level = level;
4043    p->box = *box;
4044    p->commit = commit;
4045    return true; /* we don't care about the return value for this call */
4046 }
4047 
4048 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)4049 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4050 {
4051    struct threaded_context *tc = threaded_context(_pipe);
4052    struct pipe_context *pipe = tc->pipe;
4053 
4054    return pipe->init_intel_perf_query_info(pipe);
4055 }
4056 
4057 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)4058 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4059                              unsigned query_index,
4060                              const char **name,
4061                              uint32_t *data_size,
4062                              uint32_t *n_counters,
4063                              uint32_t *n_active)
4064 {
4065    struct threaded_context *tc = threaded_context(_pipe);
4066    struct pipe_context *pipe = tc->pipe;
4067 
4068    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4069    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4070          n_counters, n_active);
4071 }
4072 
4073 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)4074 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4075                                      unsigned query_index,
4076                                      unsigned counter_index,
4077                                      const char **name,
4078                                      const char **desc,
4079                                      uint32_t *offset,
4080                                      uint32_t *data_size,
4081                                      uint32_t *type_enum,
4082                                      uint32_t *data_type_enum,
4083                                      uint64_t *raw_max)
4084 {
4085    struct threaded_context *tc = threaded_context(_pipe);
4086    struct pipe_context *pipe = tc->pipe;
4087 
4088    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4089          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4090 }
4091 
4092 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)4093 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4094 {
4095    struct threaded_context *tc = threaded_context(_pipe);
4096    struct pipe_context *pipe = tc->pipe;
4097 
4098    return pipe->new_intel_perf_query_obj(pipe, query_index);
4099 }
4100 
4101 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4102 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4103 {
4104    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4105    return call_size(tc_query_call);
4106 }
4107 
4108 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4109 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4110 {
4111    struct threaded_context *tc = threaded_context(_pipe);
4112 
4113    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4114 
4115    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4116    return true;
4117 }
4118 
4119 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4120 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4121 {
4122    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4123    return call_size(tc_query_call);
4124 }
4125 
4126 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4127 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4128 {
4129    struct threaded_context *tc = threaded_context(_pipe);
4130 
4131    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4132 }
4133 
4134 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4135 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4136 {
4137    struct threaded_context *tc = threaded_context(_pipe);
4138    struct pipe_context *pipe = tc->pipe;
4139 
4140    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4141    pipe->delete_intel_perf_query(pipe, q);
4142 }
4143 
4144 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4145 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4146 {
4147    struct threaded_context *tc = threaded_context(_pipe);
4148    struct pipe_context *pipe = tc->pipe;
4149 
4150    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4151    pipe->wait_intel_perf_query(pipe, q);
4152 }
4153 
4154 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4155 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4156 {
4157    struct threaded_context *tc = threaded_context(_pipe);
4158    struct pipe_context *pipe = tc->pipe;
4159 
4160    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4161    return pipe->is_intel_perf_query_ready(pipe, q);
4162 }
4163 
4164 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4165 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4166                              struct pipe_query *q,
4167                              size_t data_size,
4168                              uint32_t *data,
4169                              uint32_t *bytes_written)
4170 {
4171    struct threaded_context *tc = threaded_context(_pipe);
4172    struct pipe_context *pipe = tc->pipe;
4173 
4174    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4175    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4176 }
4177 
4178 /********************************************************************
4179  * callback
4180  */
4181 
4182 struct tc_callback_call {
4183    struct tc_call_base base;
4184    void (*fn)(void *data);
4185    void *data;
4186 };
4187 
4188 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call,uint64_t * last)4189 tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4190 {
4191    struct tc_callback_call *p = to_call(call, tc_callback_call);
4192 
4193    p->fn(p->data);
4194    return call_size(tc_callback_call);
4195 }
4196 
4197 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)4198 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4199             bool asap)
4200 {
4201    struct threaded_context *tc = threaded_context(_pipe);
4202 
4203    if (asap && tc_is_sync(tc)) {
4204       fn(data);
4205       return;
4206    }
4207 
4208    struct tc_callback_call *p =
4209       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4210    p->fn = fn;
4211    p->data = data;
4212 }
4213 
4214 
4215 /********************************************************************
4216  * create & destroy
4217  */
4218 
4219 static void
tc_destroy(struct pipe_context * _pipe)4220 tc_destroy(struct pipe_context *_pipe)
4221 {
4222    struct threaded_context *tc = threaded_context(_pipe);
4223    struct pipe_context *pipe = tc->pipe;
4224 
4225    if (tc->base.const_uploader &&
4226        tc->base.stream_uploader != tc->base.const_uploader)
4227       u_upload_destroy(tc->base.const_uploader);
4228 
4229    if (tc->base.stream_uploader)
4230       u_upload_destroy(tc->base.stream_uploader);
4231 
4232    tc_sync(tc);
4233 
4234    if (util_queue_is_initialized(&tc->queue)) {
4235       util_queue_destroy(&tc->queue);
4236 
4237       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4238          util_queue_fence_destroy(&tc->batch_slots[i].fence);
4239          assert(!tc->batch_slots[i].token);
4240       }
4241    }
4242 
4243    slab_destroy_child(&tc->pool_transfers);
4244    assert(tc->batch_slots[tc->next].num_total_slots == 0);
4245    pipe->destroy(pipe);
4246 
4247    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4248       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4249          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4250       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4251    }
4252 
4253    FREE(tc);
4254 }
4255 
4256 static const tc_execute execute_func[TC_NUM_CALLS] = {
4257 #define CALL(name) tc_call_##name,
4258 #include "u_threaded_context_calls.h"
4259 #undef CALL
4260 };
4261 
tc_driver_internal_flush_notify(struct threaded_context * tc)4262 void tc_driver_internal_flush_notify(struct threaded_context *tc)
4263 {
4264    /* Allow drivers to call this function even for internal contexts that
4265     * don't have tc. It simplifies drivers.
4266     */
4267    if (!tc)
4268       return;
4269 
4270    /* Signal fences set by tc_batch_execute. */
4271    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4272       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4273 
4274    tc->num_signal_fences_next_flush = 0;
4275 }
4276 
4277 /**
4278  * Wrap an existing pipe_context into a threaded_context.
4279  *
4280  * \param pipe                 pipe_context to wrap
4281  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4282  *                             transfer objects; the driver should have one
4283  *                             in pipe_screen.
4284  * \param replace_buffer  callback for replacing a pipe_resource's storage
4285  *                        with another pipe_resource's storage.
4286  * \param options         optional TC options/callbacks
4287  * \param out  if successful, the threaded_context will be returned here in
4288  *             addition to the return value if "out" != NULL
4289  */
4290 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)4291 threaded_context_create(struct pipe_context *pipe,
4292                         struct slab_parent_pool *parent_transfer_pool,
4293                         tc_replace_buffer_storage_func replace_buffer,
4294                         const struct threaded_context_options *options,
4295                         struct threaded_context **out)
4296 {
4297    struct threaded_context *tc;
4298 
4299    if (!pipe)
4300       return NULL;
4301 
4302    util_cpu_detect();
4303 
4304    if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4305       return pipe;
4306 
4307    tc = CALLOC_STRUCT(threaded_context);
4308    if (!tc) {
4309       pipe->destroy(pipe);
4310       return NULL;
4311    }
4312 
4313    if (options)
4314       tc->options = *options;
4315 
4316    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4317 
4318    /* The driver context isn't wrapped, so set its "priv" to NULL. */
4319    pipe->priv = NULL;
4320 
4321    tc->pipe = pipe;
4322    tc->replace_buffer_storage = replace_buffer;
4323    tc->map_buffer_alignment =
4324       pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4325    tc->ubo_alignment =
4326       MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4327    tc->base.priv = pipe; /* priv points to the wrapped driver context */
4328    tc->base.screen = pipe->screen;
4329    tc->base.destroy = tc_destroy;
4330    tc->base.callback = tc_callback;
4331 
4332    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4333    if (pipe->stream_uploader == pipe->const_uploader)
4334       tc->base.const_uploader = tc->base.stream_uploader;
4335    else
4336       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4337 
4338    if (!tc->base.stream_uploader || !tc->base.const_uploader)
4339       goto fail;
4340 
4341    tc->use_forced_staging_uploads = true;
4342 
4343    /* The queue size is the number of batches "waiting". Batches are removed
4344     * from the queue before being executed, so keep one tc_batch slot for that
4345     * execution. Also, keep one unused slot for an unflushed batch.
4346     */
4347    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4348       goto fail;
4349 
4350    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4351 #if !defined(NDEBUG) && TC_DEBUG >= 1
4352       tc->batch_slots[i].sentinel = TC_SENTINEL;
4353 #endif
4354       tc->batch_slots[i].tc = tc;
4355       util_queue_fence_init(&tc->batch_slots[i].fence);
4356    }
4357    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4358       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4359 
4360    list_inithead(&tc->unflushed_queries);
4361 
4362    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4363 
4364    /* If you have different limits in each shader stage, set the maximum. */
4365    struct pipe_screen *screen = pipe->screen;;
4366    tc->max_vertex_buffers =
4367       screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4368    tc->max_const_buffers =
4369       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4370                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4371    tc->max_shader_buffers =
4372       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4373                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4374    tc->max_images =
4375       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4376                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4377    tc->max_samplers =
4378       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4379                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4380 
4381    tc->base.set_context_param = tc_set_context_param; /* always set this */
4382 
4383 #define CTX_INIT(_member) \
4384    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4385 
4386    CTX_INIT(flush);
4387    CTX_INIT(draw_vbo);
4388    CTX_INIT(draw_vertex_state);
4389    CTX_INIT(launch_grid);
4390    CTX_INIT(resource_copy_region);
4391    CTX_INIT(blit);
4392    CTX_INIT(clear);
4393    CTX_INIT(clear_render_target);
4394    CTX_INIT(clear_depth_stencil);
4395    CTX_INIT(clear_buffer);
4396    CTX_INIT(clear_texture);
4397    CTX_INIT(flush_resource);
4398    CTX_INIT(generate_mipmap);
4399    CTX_INIT(render_condition);
4400    CTX_INIT(create_query);
4401    CTX_INIT(create_batch_query);
4402    CTX_INIT(destroy_query);
4403    CTX_INIT(begin_query);
4404    CTX_INIT(end_query);
4405    CTX_INIT(get_query_result);
4406    CTX_INIT(get_query_result_resource);
4407    CTX_INIT(set_active_query_state);
4408    CTX_INIT(create_blend_state);
4409    CTX_INIT(bind_blend_state);
4410    CTX_INIT(delete_blend_state);
4411    CTX_INIT(create_sampler_state);
4412    CTX_INIT(bind_sampler_states);
4413    CTX_INIT(delete_sampler_state);
4414    CTX_INIT(create_rasterizer_state);
4415    CTX_INIT(bind_rasterizer_state);
4416    CTX_INIT(delete_rasterizer_state);
4417    CTX_INIT(create_depth_stencil_alpha_state);
4418    CTX_INIT(bind_depth_stencil_alpha_state);
4419    CTX_INIT(delete_depth_stencil_alpha_state);
4420    CTX_INIT(create_fs_state);
4421    CTX_INIT(bind_fs_state);
4422    CTX_INIT(delete_fs_state);
4423    CTX_INIT(create_vs_state);
4424    CTX_INIT(bind_vs_state);
4425    CTX_INIT(delete_vs_state);
4426    CTX_INIT(create_gs_state);
4427    CTX_INIT(bind_gs_state);
4428    CTX_INIT(delete_gs_state);
4429    CTX_INIT(create_tcs_state);
4430    CTX_INIT(bind_tcs_state);
4431    CTX_INIT(delete_tcs_state);
4432    CTX_INIT(create_tes_state);
4433    CTX_INIT(bind_tes_state);
4434    CTX_INIT(delete_tes_state);
4435    CTX_INIT(create_compute_state);
4436    CTX_INIT(bind_compute_state);
4437    CTX_INIT(delete_compute_state);
4438    CTX_INIT(create_vertex_elements_state);
4439    CTX_INIT(bind_vertex_elements_state);
4440    CTX_INIT(delete_vertex_elements_state);
4441    CTX_INIT(set_blend_color);
4442    CTX_INIT(set_stencil_ref);
4443    CTX_INIT(set_sample_mask);
4444    CTX_INIT(set_min_samples);
4445    CTX_INIT(set_clip_state);
4446    CTX_INIT(set_constant_buffer);
4447    CTX_INIT(set_inlinable_constants);
4448    CTX_INIT(set_framebuffer_state);
4449    CTX_INIT(set_polygon_stipple);
4450    CTX_INIT(set_sample_locations);
4451    CTX_INIT(set_scissor_states);
4452    CTX_INIT(set_viewport_states);
4453    CTX_INIT(set_window_rectangles);
4454    CTX_INIT(set_sampler_views);
4455    CTX_INIT(set_tess_state);
4456    CTX_INIT(set_patch_vertices);
4457    CTX_INIT(set_shader_buffers);
4458    CTX_INIT(set_shader_images);
4459    CTX_INIT(set_vertex_buffers);
4460    CTX_INIT(create_stream_output_target);
4461    CTX_INIT(stream_output_target_destroy);
4462    CTX_INIT(set_stream_output_targets);
4463    CTX_INIT(create_sampler_view);
4464    CTX_INIT(sampler_view_destroy);
4465    CTX_INIT(create_surface);
4466    CTX_INIT(surface_destroy);
4467    CTX_INIT(buffer_map);
4468    CTX_INIT(texture_map);
4469    CTX_INIT(transfer_flush_region);
4470    CTX_INIT(buffer_unmap);
4471    CTX_INIT(texture_unmap);
4472    CTX_INIT(buffer_subdata);
4473    CTX_INIT(texture_subdata);
4474    CTX_INIT(texture_barrier);
4475    CTX_INIT(memory_barrier);
4476    CTX_INIT(resource_commit);
4477    CTX_INIT(create_video_codec);
4478    CTX_INIT(create_video_buffer);
4479    CTX_INIT(set_compute_resources);
4480    CTX_INIT(set_global_binding);
4481    CTX_INIT(get_sample_position);
4482    CTX_INIT(invalidate_resource);
4483    CTX_INIT(get_device_reset_status);
4484    CTX_INIT(set_device_reset_callback);
4485    CTX_INIT(dump_debug_state);
4486    CTX_INIT(set_log_context);
4487    CTX_INIT(emit_string_marker);
4488    CTX_INIT(set_debug_callback);
4489    CTX_INIT(create_fence_fd);
4490    CTX_INIT(fence_server_sync);
4491    CTX_INIT(fence_server_signal);
4492    CTX_INIT(get_timestamp);
4493    CTX_INIT(create_texture_handle);
4494    CTX_INIT(delete_texture_handle);
4495    CTX_INIT(make_texture_handle_resident);
4496    CTX_INIT(create_image_handle);
4497    CTX_INIT(delete_image_handle);
4498    CTX_INIT(make_image_handle_resident);
4499    CTX_INIT(set_frontend_noop);
4500    CTX_INIT(init_intel_perf_query_info);
4501    CTX_INIT(get_intel_perf_query_info);
4502    CTX_INIT(get_intel_perf_query_counter_info);
4503    CTX_INIT(new_intel_perf_query_obj);
4504    CTX_INIT(begin_intel_perf_query);
4505    CTX_INIT(end_intel_perf_query);
4506    CTX_INIT(delete_intel_perf_query);
4507    CTX_INIT(wait_intel_perf_query);
4508    CTX_INIT(is_intel_perf_query_ready);
4509    CTX_INIT(get_intel_perf_query_data);
4510 #undef CTX_INIT
4511 
4512    if (out)
4513       *out = tc;
4514 
4515    tc_begin_next_buffer_list(tc);
4516    return &tc->base;
4517 
4518 fail:
4519    tc_destroy(&tc->base);
4520    return NULL;
4521 }
4522 
4523 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)4524 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4525 {
4526    uint64_t total_ram;
4527    if (os_get_total_physical_memory(&total_ram)) {
4528       tc->bytes_mapped_limit = total_ram / divisor;
4529       if (sizeof(void*) == 4)
4530          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4531    }
4532 }
4533