1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  **************************************************************************/
26 
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/format/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33 #include "driver_trace/tr_context.h"
34 #include "util/log.h"
35 #include "compiler/shader_info.h"
36 
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42 
43 #if TC_DEBUG >= 2
44 #define tc_printf mesa_logi
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52 
53 #define TC_SENTINEL 0x5ca1ab1e
54 
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59    TC_NUM_CALLS,
60 };
61 
62 #if TC_DEBUG >= 3
63 static const char *tc_call_names[] = {
64 #define CALL(name) #name,
65 #include "u_threaded_context_calls.h"
66 #undef CALL
67 };
68 #endif
69 
70 typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71 
72 static const tc_execute execute_func[TC_NUM_CALLS];
73 
74 static void
tc_batch_check(UNUSED struct tc_batch * batch)75 tc_batch_check(UNUSED struct tc_batch *batch)
76 {
77    tc_assert(batch->sentinel == TC_SENTINEL);
78    tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
79 }
80 
81 static void
tc_debug_check(struct threaded_context * tc)82 tc_debug_check(struct threaded_context *tc)
83 {
84    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
85       tc_batch_check(&tc->batch_slots[i]);
86       tc_assert(tc->batch_slots[i].tc == tc);
87    }
88 }
89 
90 static void
tc_set_driver_thread(struct threaded_context * tc)91 tc_set_driver_thread(struct threaded_context *tc)
92 {
93 #ifndef NDEBUG
94    tc->driver_thread = util_get_thread_id();
95 #endif
96 }
97 
98 static void
tc_clear_driver_thread(struct threaded_context * tc)99 tc_clear_driver_thread(struct threaded_context *tc)
100 {
101 #ifndef NDEBUG
102    memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
103 #endif
104 }
105 
106 static void *
to_call_check(void * ptr,unsigned num_slots)107 to_call_check(void *ptr, unsigned num_slots)
108 {
109 #if TC_DEBUG >= 1
110    struct tc_call_base *call = ptr;
111    tc_assert(call->num_slots == num_slots);
112 #endif
113    return ptr;
114 }
115 #define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
116 
117 #define size_to_slots(size)      DIV_ROUND_UP(size, 8)
118 #define call_size(type)          size_to_slots(sizeof(struct type))
119 #define call_size_with_slots(type, num_slots) size_to_slots( \
120    sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
121 #define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
122 
123 /* Assign src to dst while dst is uninitialized. */
124 static inline void
tc_set_resource_reference(struct pipe_resource ** dst,struct pipe_resource * src)125 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
126 {
127    *dst = src;
128    pipe_reference(NULL, &src->reference); /* only increment refcount */
129 }
130 
131 /* Assign src to dst while dst is uninitialized. */
132 static inline void
tc_set_vertex_state_reference(struct pipe_vertex_state ** dst,struct pipe_vertex_state * src)133 tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
134                               struct pipe_vertex_state *src)
135 {
136    *dst = src;
137    pipe_reference(NULL, &src->reference); /* only increment refcount */
138 }
139 
140 /* Unreference dst but don't touch the dst pointer. */
141 static inline void
tc_drop_resource_reference(struct pipe_resource * dst)142 tc_drop_resource_reference(struct pipe_resource *dst)
143 {
144    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
145       pipe_resource_destroy(dst);
146 }
147 
148 /* Unreference dst but don't touch the dst pointer. */
149 static inline void
tc_drop_surface_reference(struct pipe_surface * dst)150 tc_drop_surface_reference(struct pipe_surface *dst)
151 {
152    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
153       dst->context->surface_destroy(dst->context, dst);
154 }
155 
156 /* Unreference dst but don't touch the dst pointer. */
157 static inline void
tc_drop_sampler_view_reference(struct pipe_sampler_view * dst)158 tc_drop_sampler_view_reference(struct pipe_sampler_view *dst)
159 {
160    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
161       dst->context->sampler_view_destroy(dst->context, dst);
162 }
163 
164 /* Unreference dst but don't touch the dst pointer. */
165 static inline void
tc_drop_so_target_reference(struct pipe_stream_output_target * dst)166 tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
167 {
168    if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
169       dst->context->stream_output_target_destroy(dst->context, dst);
170 }
171 
172 /**
173  * Subtract the given number of references.
174  */
175 static inline void
tc_drop_vertex_state_references(struct pipe_vertex_state * dst,int num_refs)176 tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
177 {
178    int count = p_atomic_add_return(&dst->reference.count, -num_refs);
179 
180    assert(count >= 0);
181    /* Underflows shouldn't happen, but let's be safe. */
182    if (count <= 0)
183       dst->screen->vertex_state_destroy(dst->screen, dst);
184 }
185 
186 /* We don't want to read or write min_index and max_index, because
187  * it shouldn't be needed by drivers at this point.
188  */
189 #define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
190    offsetof(struct pipe_draw_info, min_index)
191 
192 static void
tc_batch_execute(void * job,UNUSED void * gdata,int thread_index)193 tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
194 {
195    struct tc_batch *batch = job;
196    struct pipe_context *pipe = batch->tc->pipe;
197    uint64_t *last = &batch->slots[batch->num_total_slots];
198 
199    tc_batch_check(batch);
200    tc_set_driver_thread(batch->tc);
201 
202    assert(!batch->token);
203 
204    for (uint64_t *iter = batch->slots; iter != last;) {
205       struct tc_call_base *call = (struct tc_call_base *)iter;
206 
207       tc_assert(call->sentinel == TC_SENTINEL);
208 
209 #if TC_DEBUG >= 3
210       tc_printf("CALL: %s", tc_call_names[call->call_id]);
211 #endif
212 
213       iter += execute_func[call->call_id](pipe, call, last);
214    }
215 
216    /* Add the fence to the list of fences for the driver to signal at the next
217     * flush, which we use for tracking which buffers are referenced by
218     * an unflushed command buffer.
219     */
220    struct threaded_context *tc = batch->tc;
221    struct util_queue_fence *fence =
222       &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
223 
224    if (tc->options.driver_calls_flush_notify) {
225       tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
226 
227       /* Since our buffer lists are chained as a ring, we need to flush
228        * the context twice as we go around the ring to make the driver signal
229        * the buffer list fences, so that the producer thread can reuse the buffer
230        * list structures for the next batches without waiting.
231        */
232       unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
233       if (batch->buffer_list_index % half_ring == half_ring - 1)
234          pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
235    } else {
236       util_queue_fence_signal(fence);
237    }
238 
239    tc_clear_driver_thread(batch->tc);
240    tc_batch_check(batch);
241    batch->num_total_slots = 0;
242 }
243 
244 static void
tc_begin_next_buffer_list(struct threaded_context * tc)245 tc_begin_next_buffer_list(struct threaded_context *tc)
246 {
247    tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
248 
249    tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
250 
251    /* Clear the buffer list in the new empty batch. */
252    struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
253    assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
254    util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
255    BITSET_ZERO(buf_list->buffer_list);
256 
257    tc->add_all_gfx_bindings_to_buffer_list = true;
258    tc->add_all_compute_bindings_to_buffer_list = true;
259 }
260 
261 static void
tc_batch_flush(struct threaded_context * tc)262 tc_batch_flush(struct threaded_context *tc)
263 {
264    struct tc_batch *next = &tc->batch_slots[tc->next];
265 
266    tc_assert(next->num_total_slots != 0);
267    tc_batch_check(next);
268    tc_debug_check(tc);
269    tc->bytes_mapped_estimate = 0;
270    p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
271 
272    if (next->token) {
273       next->token->tc = NULL;
274       tc_unflushed_batch_token_reference(&next->token, NULL);
275    }
276 
277    util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
278                       NULL, 0);
279    tc->last = tc->next;
280    tc->next = (tc->next + 1) % TC_MAX_BATCHES;
281    tc_begin_next_buffer_list(tc);
282 }
283 
284 /* This is the function that adds variable-sized calls into the current
285  * batch. It also flushes the batch if there is not enough space there.
286  * All other higher-level "add" functions use it.
287  */
288 static void *
tc_add_sized_call(struct threaded_context * tc,enum tc_call_id id,unsigned num_slots)289 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
290                   unsigned num_slots)
291 {
292    struct tc_batch *next = &tc->batch_slots[tc->next];
293    assert(num_slots <= TC_SLOTS_PER_BATCH);
294    tc_debug_check(tc);
295 
296    if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
297       tc_batch_flush(tc);
298       next = &tc->batch_slots[tc->next];
299       tc_assert(next->num_total_slots == 0);
300    }
301 
302    tc_assert(util_queue_fence_is_signalled(&next->fence));
303 
304    struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
305    next->num_total_slots += num_slots;
306 
307 #if !defined(NDEBUG) && TC_DEBUG >= 1
308    call->sentinel = TC_SENTINEL;
309 #endif
310    call->call_id = id;
311    call->num_slots = num_slots;
312 
313 #if TC_DEBUG >= 3
314    tc_printf("ENQUEUE: %s", tc_call_names[id]);
315 #endif
316 
317    tc_debug_check(tc);
318    return call;
319 }
320 
321 #define tc_add_call(tc, execute, type) \
322    ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
323 
324 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
325    ((struct type*)tc_add_sized_call(tc, execute, \
326                                     call_size_with_slots(type, num_slots)))
327 
328 static bool
tc_is_sync(struct threaded_context * tc)329 tc_is_sync(struct threaded_context *tc)
330 {
331    struct tc_batch *last = &tc->batch_slots[tc->last];
332    struct tc_batch *next = &tc->batch_slots[tc->next];
333 
334    return util_queue_fence_is_signalled(&last->fence) &&
335           !next->num_total_slots;
336 }
337 
338 static void
_tc_sync(struct threaded_context * tc,UNUSED const char * info,UNUSED const char * func)339 _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
340 {
341    struct tc_batch *last = &tc->batch_slots[tc->last];
342    struct tc_batch *next = &tc->batch_slots[tc->next];
343    bool synced = false;
344 
345    tc_debug_check(tc);
346 
347    /* Only wait for queued calls... */
348    if (!util_queue_fence_is_signalled(&last->fence)) {
349       util_queue_fence_wait(&last->fence);
350       synced = true;
351    }
352 
353    tc_debug_check(tc);
354 
355    if (next->token) {
356       next->token->tc = NULL;
357       tc_unflushed_batch_token_reference(&next->token, NULL);
358    }
359 
360    /* .. and execute unflushed calls directly. */
361    if (next->num_total_slots) {
362       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
363       tc->bytes_mapped_estimate = 0;
364       tc_batch_execute(next, NULL, 0);
365       tc_begin_next_buffer_list(tc);
366       synced = true;
367    }
368 
369    if (synced) {
370       p_atomic_inc(&tc->num_syncs);
371 
372       if (tc_strcmp(func, "tc_destroy") != 0) {
373          tc_printf("sync %s %s", func, info);
374 	  }
375    }
376 
377    tc_debug_check(tc);
378 }
379 
380 #define tc_sync(tc) _tc_sync(tc, "", __func__)
381 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
382 
383 /**
384  * Call this from fence_finish for same-context fence waits of deferred fences
385  * that haven't been flushed yet.
386  *
387  * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
388  * i.e., the wrapped one.
389  */
390 void
threaded_context_flush(struct pipe_context * _pipe,struct tc_unflushed_batch_token * token,bool prefer_async)391 threaded_context_flush(struct pipe_context *_pipe,
392                        struct tc_unflushed_batch_token *token,
393                        bool prefer_async)
394 {
395    struct threaded_context *tc = threaded_context(_pipe);
396 
397    /* This is called from the gallium frontend / application thread. */
398    if (token->tc && token->tc == tc) {
399       struct tc_batch *last = &tc->batch_slots[tc->last];
400 
401       /* Prefer to do the flush in the driver thread if it is already
402        * running. That should be better for cache locality.
403        */
404       if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
405          tc_batch_flush(tc);
406       else
407          tc_sync(token->tc);
408    }
409 }
410 
411 static void
tc_add_to_buffer_list(struct tc_buffer_list * next,struct pipe_resource * buf)412 tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
413 {
414    uint32_t id = threaded_resource(buf)->buffer_id_unique;
415    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
416 }
417 
418 /* Set a buffer binding and add it to the buffer list. */
419 static void
tc_bind_buffer(uint32_t * binding,struct tc_buffer_list * next,struct pipe_resource * buf)420 tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
421 {
422    uint32_t id = threaded_resource(buf)->buffer_id_unique;
423    *binding = id;
424    BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
425 }
426 
427 /* Reset a buffer binding. */
428 static void
tc_unbind_buffer(uint32_t * binding)429 tc_unbind_buffer(uint32_t *binding)
430 {
431    *binding = 0;
432 }
433 
434 /* Reset a range of buffer binding slots. */
435 static void
tc_unbind_buffers(uint32_t * binding,unsigned count)436 tc_unbind_buffers(uint32_t *binding, unsigned count)
437 {
438    if (count)
439       memset(binding, 0, sizeof(*binding) * count);
440 }
441 
442 static void
tc_add_bindings_to_buffer_list(BITSET_WORD * buffer_list,const uint32_t * bindings,unsigned count)443 tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
444                                unsigned count)
445 {
446    for (unsigned i = 0; i < count; i++) {
447       if (bindings[i])
448          BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
449    }
450 }
451 
452 static bool
tc_rebind_bindings(uint32_t old_id,uint32_t new_id,uint32_t * bindings,unsigned count)453 tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
454                    unsigned count)
455 {
456    unsigned rebind_count = 0;
457 
458    for (unsigned i = 0; i < count; i++) {
459       if (bindings[i] == old_id) {
460          bindings[i] = new_id;
461          rebind_count++;
462       }
463    }
464    return rebind_count;
465 }
466 
467 static void
tc_add_shader_bindings_to_buffer_list(struct threaded_context * tc,BITSET_WORD * buffer_list,enum pipe_shader_type shader)468 tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
469                                       BITSET_WORD *buffer_list,
470                                       enum pipe_shader_type shader)
471 {
472    tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
473                                   tc->max_const_buffers);
474    if (tc->seen_shader_buffers[shader]) {
475       tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
476                                      tc->max_shader_buffers);
477    }
478    if (tc->seen_image_buffers[shader]) {
479       tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
480                                      tc->max_images);
481    }
482    if (tc->seen_sampler_buffers[shader]) {
483       tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
484                                      tc->max_samplers);
485    }
486 }
487 
488 static unsigned
tc_rebind_shader_bindings(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,enum pipe_shader_type shader,uint32_t * rebind_mask)489 tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
490                           uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
491 {
492    unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
493 
494    ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
495                             tc->max_const_buffers);
496    if (ubo)
497       *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
498    if (tc->seen_shader_buffers[shader]) {
499       ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
500                                 tc->max_shader_buffers);
501       if (ssbo)
502          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
503    }
504    if (tc->seen_image_buffers[shader]) {
505       img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
506                                tc->max_images);
507       if (img)
508          *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
509    }
510    if (tc->seen_sampler_buffers[shader]) {
511       sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
512                                    tc->max_samplers);
513       if (sampler)
514          *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
515    }
516    return ubo + ssbo + img + sampler;
517 }
518 
519 /* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
520  * This is called by the first draw call in a batch when we want to inherit
521  * all bindings set by the previous batch.
522  */
523 static void
tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context * tc)524 tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
525 {
526    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
527 
528    tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
529    if (tc->seen_streamout_buffers)
530       tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
531 
532    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
533    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
534 
535    if (tc->seen_tcs)
536       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
537    if (tc->seen_tes)
538       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
539    if (tc->seen_gs)
540       tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
541 
542    tc->add_all_gfx_bindings_to_buffer_list = false;
543 }
544 
545 /* Add all bound buffers used by compute to the buffer list.
546  * This is called by the first compute call in a batch when we want to inherit
547  * all bindings set by the previous batch.
548  */
549 static void
tc_add_all_compute_bindings_to_buffer_list(struct threaded_context * tc)550 tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
551 {
552    BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
553 
554    tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
555    tc->add_all_compute_bindings_to_buffer_list = false;
556 }
557 
558 static unsigned
tc_rebind_buffer(struct threaded_context * tc,uint32_t old_id,uint32_t new_id,uint32_t * rebind_mask)559 tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
560 {
561    unsigned vbo = 0, so = 0;
562 
563    vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
564                             tc->max_vertex_buffers);
565    if (vbo)
566       *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
567 
568    if (tc->seen_streamout_buffers) {
569       so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
570                               PIPE_MAX_SO_BUFFERS);
571       if (so)
572          *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
573    }
574    unsigned rebound = vbo + so;
575 
576    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
577    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
578 
579    if (tc->seen_tcs)
580       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
581    if (tc->seen_tes)
582       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
583    if (tc->seen_gs)
584       rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
585 
586    rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
587 
588    if (rebound)
589       BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
590    return rebound;
591 }
592 
593 static bool
tc_is_buffer_bound_with_mask(uint32_t id,uint32_t * bindings,unsigned binding_mask)594 tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
595 {
596    while (binding_mask) {
597       if (bindings[u_bit_scan(&binding_mask)] == id)
598          return true;
599    }
600    return false;
601 }
602 
603 static bool
tc_is_buffer_shader_bound_for_write(struct threaded_context * tc,uint32_t id,enum pipe_shader_type shader)604 tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
605                                     enum pipe_shader_type shader)
606 {
607    if (tc->seen_shader_buffers[shader] &&
608        tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
609                                     tc->shader_buffers_writeable_mask[shader]))
610       return true;
611 
612    if (tc->seen_image_buffers[shader] &&
613        tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
614                                     tc->image_buffers_writeable_mask[shader]))
615       return true;
616 
617    return false;
618 }
619 
620 static bool
tc_is_buffer_bound_for_write(struct threaded_context * tc,uint32_t id)621 tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
622 {
623    if (tc->seen_streamout_buffers &&
624        tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
625                                     BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
626       return true;
627 
628    if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
629        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
630        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
631       return true;
632 
633    if (tc->seen_tcs &&
634        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
635       return true;
636 
637    if (tc->seen_tes &&
638        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
639       return true;
640 
641    if (tc->seen_gs &&
642        tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
643       return true;
644 
645    return false;
646 }
647 
648 static bool
tc_is_buffer_busy(struct threaded_context * tc,struct threaded_resource * tbuf,unsigned map_usage)649 tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
650                   unsigned map_usage)
651 {
652    if (!tc->options.is_resource_busy)
653       return true;
654 
655    uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
656 
657    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
658       struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
659 
660       /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
661        * then the buffer is considered busy. */
662       if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
663           BITSET_TEST(buf_list->buffer_list, id_hash))
664          return true;
665    }
666 
667    /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
668     * this buffer is busy or not. */
669    return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
670 }
671 
672 void
threaded_resource_init(struct pipe_resource * res)673 threaded_resource_init(struct pipe_resource *res)
674 {
675    struct threaded_resource *tres = threaded_resource(res);
676 
677    tres->latest = &tres->b;
678    util_range_init(&tres->valid_buffer_range);
679    tres->is_shared = false;
680    tres->is_user_ptr = false;
681    tres->buffer_id_unique = 0;
682    tres->pending_staging_uploads = 0;
683    util_range_init(&tres->pending_staging_uploads_range);
684 }
685 
686 void
threaded_resource_deinit(struct pipe_resource * res)687 threaded_resource_deinit(struct pipe_resource *res)
688 {
689    struct threaded_resource *tres = threaded_resource(res);
690 
691    if (tres->latest != &tres->b)
692            pipe_resource_reference(&tres->latest, NULL);
693    util_range_destroy(&tres->valid_buffer_range);
694    util_range_destroy(&tres->pending_staging_uploads_range);
695 }
696 
697 struct pipe_context *
threaded_context_unwrap_sync(struct pipe_context * pipe)698 threaded_context_unwrap_sync(struct pipe_context *pipe)
699 {
700    if (!pipe || !pipe->priv)
701       return pipe;
702 
703    tc_sync(threaded_context(pipe));
704    return (struct pipe_context*)pipe->priv;
705 }
706 
707 
708 /********************************************************************
709  * simple functions
710  */
711 
712 #define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
713    struct tc_call_##func { \
714       struct tc_call_base base; \
715       type state; \
716    }; \
717    \
718    static uint16_t \
719    tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
720    { \
721       pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
722       return call_size(tc_call_##func); \
723    } \
724    \
725    static void \
726    tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
727    { \
728       struct threaded_context *tc = threaded_context(_pipe); \
729       struct tc_call_##func *p = (struct tc_call_##func*) \
730                      tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
731       p->state = deref(param); \
732       __VA_ARGS__; \
733    }
734 
735 TC_FUNC1(set_active_query_state, , bool, , )
736 
737 TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
738 TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
739 TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
740 TC_FUNC1(set_sample_mask, , unsigned, , )
741 TC_FUNC1(set_min_samples, , unsigned, , )
742 TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
743 
744 TC_FUNC1(texture_barrier, , unsigned, , )
745 TC_FUNC1(memory_barrier, , unsigned, , )
746 TC_FUNC1(delete_texture_handle, , uint64_t, , )
747 TC_FUNC1(delete_image_handle, , uint64_t, , )
748 TC_FUNC1(set_frontend_noop, , bool, , )
749 
750 
751 /********************************************************************
752  * queries
753  */
754 
755 static struct pipe_query *
tc_create_query(struct pipe_context * _pipe,unsigned query_type,unsigned index)756 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
757                 unsigned index)
758 {
759    struct threaded_context *tc = threaded_context(_pipe);
760    struct pipe_context *pipe = tc->pipe;
761 
762    return pipe->create_query(pipe, query_type, index);
763 }
764 
765 static struct pipe_query *
tc_create_batch_query(struct pipe_context * _pipe,unsigned num_queries,unsigned * query_types)766 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
767                       unsigned *query_types)
768 {
769    struct threaded_context *tc = threaded_context(_pipe);
770    struct pipe_context *pipe = tc->pipe;
771 
772    return pipe->create_batch_query(pipe, num_queries, query_types);
773 }
774 
775 struct tc_query_call {
776    struct tc_call_base base;
777    struct pipe_query *query;
778 };
779 
780 static uint16_t
tc_call_destroy_query(struct pipe_context * pipe,void * call,uint64_t * last)781 tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
782 {
783    struct pipe_query *query = to_call(call, tc_query_call)->query;
784    struct threaded_query *tq = threaded_query(query);
785 
786    if (list_is_linked(&tq->head_unflushed))
787       list_del(&tq->head_unflushed);
788 
789    pipe->destroy_query(pipe, query);
790    return call_size(tc_query_call);
791 }
792 
793 static void
tc_destroy_query(struct pipe_context * _pipe,struct pipe_query * query)794 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
795 {
796    struct threaded_context *tc = threaded_context(_pipe);
797 
798    tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
799 }
800 
801 static uint16_t
tc_call_begin_query(struct pipe_context * pipe,void * call,uint64_t * last)802 tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
803 {
804    pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
805    return call_size(tc_query_call);
806 }
807 
808 static bool
tc_begin_query(struct pipe_context * _pipe,struct pipe_query * query)809 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
810 {
811    struct threaded_context *tc = threaded_context(_pipe);
812 
813    tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
814    return true; /* we don't care about the return value for this call */
815 }
816 
817 struct tc_end_query_call {
818    struct tc_call_base base;
819    struct threaded_context *tc;
820    struct pipe_query *query;
821 };
822 
823 static uint16_t
tc_call_end_query(struct pipe_context * pipe,void * call,uint64_t * last)824 tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
825 {
826    struct tc_end_query_call *p = to_call(call, tc_end_query_call);
827    struct threaded_query *tq = threaded_query(p->query);
828 
829    if (!list_is_linked(&tq->head_unflushed))
830       list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
831 
832    pipe->end_query(pipe, p->query);
833    return call_size(tc_end_query_call);
834 }
835 
836 static bool
tc_end_query(struct pipe_context * _pipe,struct pipe_query * query)837 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
838 {
839    struct threaded_context *tc = threaded_context(_pipe);
840    struct threaded_query *tq = threaded_query(query);
841    struct tc_end_query_call *call =
842       tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
843 
844    call->tc = tc;
845    call->query = query;
846 
847    tq->flushed = false;
848 
849    return true; /* we don't care about the return value for this call */
850 }
851 
852 static bool
tc_get_query_result(struct pipe_context * _pipe,struct pipe_query * query,bool wait,union pipe_query_result * result)853 tc_get_query_result(struct pipe_context *_pipe,
854                     struct pipe_query *query, bool wait,
855                     union pipe_query_result *result)
856 {
857    struct threaded_context *tc = threaded_context(_pipe);
858    struct threaded_query *tq = threaded_query(query);
859    struct pipe_context *pipe = tc->pipe;
860    bool flushed = tq->flushed;
861 
862    if (!flushed) {
863       tc_sync_msg(tc, wait ? "wait" : "nowait");
864       tc_set_driver_thread(tc);
865    }
866 
867    bool success = pipe->get_query_result(pipe, query, wait, result);
868 
869    if (!flushed)
870       tc_clear_driver_thread(tc);
871 
872    if (success) {
873       tq->flushed = true;
874       if (list_is_linked(&tq->head_unflushed)) {
875          /* This is safe because it can only happen after we sync'd. */
876          list_del(&tq->head_unflushed);
877       }
878    }
879    return success;
880 }
881 
882 struct tc_query_result_resource {
883    struct tc_call_base base;
884    bool wait;
885    enum pipe_query_value_type result_type:8;
886    int8_t index; /* it can be -1 */
887    unsigned offset;
888    struct pipe_query *query;
889    struct pipe_resource *resource;
890 };
891 
892 static uint16_t
tc_call_get_query_result_resource(struct pipe_context * pipe,void * call,uint64_t * last)893 tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
894 {
895    struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
896 
897    pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
898                                    p->index, p->resource, p->offset);
899    tc_drop_resource_reference(p->resource);
900    return call_size(tc_query_result_resource);
901 }
902 
903 static void
tc_get_query_result_resource(struct pipe_context * _pipe,struct pipe_query * query,bool wait,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)904 tc_get_query_result_resource(struct pipe_context *_pipe,
905                              struct pipe_query *query, bool wait,
906                              enum pipe_query_value_type result_type, int index,
907                              struct pipe_resource *resource, unsigned offset)
908 {
909    struct threaded_context *tc = threaded_context(_pipe);
910    struct tc_query_result_resource *p =
911       tc_add_call(tc, TC_CALL_get_query_result_resource,
912                   tc_query_result_resource);
913 
914    p->query = query;
915    p->wait = wait;
916    p->result_type = result_type;
917    p->index = index;
918    tc_set_resource_reference(&p->resource, resource);
919    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
920    p->offset = offset;
921 }
922 
923 struct tc_render_condition {
924    struct tc_call_base base;
925    bool condition;
926    unsigned mode;
927    struct pipe_query *query;
928 };
929 
930 static uint16_t
tc_call_render_condition(struct pipe_context * pipe,void * call,uint64_t * last)931 tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
932 {
933    struct tc_render_condition *p = to_call(call, tc_render_condition);
934    pipe->render_condition(pipe, p->query, p->condition, p->mode);
935    return call_size(tc_render_condition);
936 }
937 
938 static void
tc_render_condition(struct pipe_context * _pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)939 tc_render_condition(struct pipe_context *_pipe,
940                     struct pipe_query *query, bool condition,
941                     enum pipe_render_cond_flag mode)
942 {
943    struct threaded_context *tc = threaded_context(_pipe);
944    struct tc_render_condition *p =
945       tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
946 
947    p->query = query;
948    p->condition = condition;
949    p->mode = mode;
950 }
951 
952 
953 /********************************************************************
954  * constant (immutable) states
955  */
956 
957 #define TC_CSO_CREATE(name, sname) \
958    static void * \
959    tc_create_##name##_state(struct pipe_context *_pipe, \
960                             const struct pipe_##sname##_state *state) \
961    { \
962       struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
963       return pipe->create_##name##_state(pipe, state); \
964    }
965 
966 #define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
967 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
968 
969 #define TC_CSO(name, sname, ...) \
970    TC_CSO_CREATE(name, sname) \
971    TC_CSO_BIND(name, ##__VA_ARGS__) \
972    TC_CSO_DELETE(name)
973 
974 #define TC_CSO_WHOLE(name) TC_CSO(name, name)
975 #define TC_CSO_SHADER(name) TC_CSO(name, shader)
976 #define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
977 
978 TC_CSO_WHOLE(blend)
TC_CSO_WHOLE(rasterizer)979 TC_CSO_WHOLE(rasterizer)
980 TC_CSO_WHOLE(depth_stencil_alpha)
981 TC_CSO_WHOLE(compute)
982 TC_CSO_SHADER(fs)
983 TC_CSO_SHADER(vs)
984 TC_CSO_SHADER_TRACK(gs)
985 TC_CSO_SHADER_TRACK(tcs)
986 TC_CSO_SHADER_TRACK(tes)
987 TC_CSO_CREATE(sampler, sampler)
988 TC_CSO_DELETE(sampler)
989 TC_CSO_BIND(vertex_elements)
990 TC_CSO_DELETE(vertex_elements)
991 
992 static void *
993 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
994                                 const struct pipe_vertex_element *elems)
995 {
996    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
997 
998    return pipe->create_vertex_elements_state(pipe, count, elems);
999 }
1000 
1001 struct tc_sampler_states {
1002    struct tc_call_base base;
1003    ubyte shader, start, count;
1004    void *slot[0]; /* more will be allocated if needed */
1005 };
1006 
1007 static uint16_t
tc_call_bind_sampler_states(struct pipe_context * pipe,void * call,uint64_t * last)1008 tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1009 {
1010    struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1011 
1012    pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1013    return p->base.num_slots;
1014 }
1015 
1016 static void
tc_bind_sampler_states(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,void ** states)1017 tc_bind_sampler_states(struct pipe_context *_pipe,
1018                        enum pipe_shader_type shader,
1019                        unsigned start, unsigned count, void **states)
1020 {
1021    if (!count)
1022       return;
1023 
1024    struct threaded_context *tc = threaded_context(_pipe);
1025    struct tc_sampler_states *p =
1026       tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1027 
1028    p->shader = shader;
1029    p->start = start;
1030    p->count = count;
1031    memcpy(p->slot, states, count * sizeof(states[0]));
1032 }
1033 
1034 
1035 /********************************************************************
1036  * immediate states
1037  */
1038 
1039 struct tc_framebuffer {
1040    struct tc_call_base base;
1041    struct pipe_framebuffer_state state;
1042 };
1043 
1044 static uint16_t
tc_call_set_framebuffer_state(struct pipe_context * pipe,void * call,uint64_t * last)1045 tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1046 {
1047    struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1048 
1049    pipe->set_framebuffer_state(pipe, p);
1050 
1051    unsigned nr_cbufs = p->nr_cbufs;
1052    for (unsigned i = 0; i < nr_cbufs; i++)
1053       tc_drop_surface_reference(p->cbufs[i]);
1054    tc_drop_surface_reference(p->zsbuf);
1055    return call_size(tc_framebuffer);
1056 }
1057 
1058 static void
tc_set_framebuffer_state(struct pipe_context * _pipe,const struct pipe_framebuffer_state * fb)1059 tc_set_framebuffer_state(struct pipe_context *_pipe,
1060                          const struct pipe_framebuffer_state *fb)
1061 {
1062    struct threaded_context *tc = threaded_context(_pipe);
1063    struct tc_framebuffer *p =
1064       tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1065    unsigned nr_cbufs = fb->nr_cbufs;
1066 
1067    p->state.width = fb->width;
1068    p->state.height = fb->height;
1069    p->state.samples = fb->samples;
1070    p->state.layers = fb->layers;
1071    p->state.nr_cbufs = nr_cbufs;
1072 
1073    for (unsigned i = 0; i < nr_cbufs; i++) {
1074       p->state.cbufs[i] = NULL;
1075       pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1076    }
1077    p->state.zsbuf = NULL;
1078    pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1079 }
1080 
1081 struct tc_tess_state {
1082    struct tc_call_base base;
1083    float state[6];
1084 };
1085 
1086 static uint16_t
tc_call_set_tess_state(struct pipe_context * pipe,void * call,uint64_t * last)1087 tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1088 {
1089    float *p = to_call(call, tc_tess_state)->state;
1090 
1091    pipe->set_tess_state(pipe, p, p + 4);
1092    return call_size(tc_tess_state);
1093 }
1094 
1095 static void
tc_set_tess_state(struct pipe_context * _pipe,const float default_outer_level[4],const float default_inner_level[2])1096 tc_set_tess_state(struct pipe_context *_pipe,
1097                   const float default_outer_level[4],
1098                   const float default_inner_level[2])
1099 {
1100    struct threaded_context *tc = threaded_context(_pipe);
1101    float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1102 
1103    memcpy(p, default_outer_level, 4 * sizeof(float));
1104    memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1105 }
1106 
1107 struct tc_patch_vertices {
1108    struct tc_call_base base;
1109    ubyte patch_vertices;
1110 };
1111 
1112 static uint16_t
tc_call_set_patch_vertices(struct pipe_context * pipe,void * call,uint64_t * last)1113 tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1114 {
1115    uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1116 
1117    pipe->set_patch_vertices(pipe, patch_vertices);
1118    return call_size(tc_patch_vertices);
1119 }
1120 
1121 static void
tc_set_patch_vertices(struct pipe_context * _pipe,uint8_t patch_vertices)1122 tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1123 {
1124    struct threaded_context *tc = threaded_context(_pipe);
1125 
1126    tc_add_call(tc, TC_CALL_set_patch_vertices,
1127                tc_patch_vertices)->patch_vertices = patch_vertices;
1128 }
1129 
1130 struct tc_constant_buffer_base {
1131    struct tc_call_base base;
1132    ubyte shader, index;
1133    bool is_null;
1134 };
1135 
1136 struct tc_constant_buffer {
1137    struct tc_constant_buffer_base base;
1138    struct pipe_constant_buffer cb;
1139 };
1140 
1141 static uint16_t
tc_call_set_constant_buffer(struct pipe_context * pipe,void * call,uint64_t * last)1142 tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1143 {
1144    struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1145 
1146    if (unlikely(p->base.is_null)) {
1147       pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1148       return call_size(tc_constant_buffer_base);
1149    }
1150 
1151    pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1152    return call_size(tc_constant_buffer);
1153 }
1154 
1155 static void
tc_set_constant_buffer(struct pipe_context * _pipe,enum pipe_shader_type shader,uint index,bool take_ownership,const struct pipe_constant_buffer * cb)1156 tc_set_constant_buffer(struct pipe_context *_pipe,
1157                        enum pipe_shader_type shader, uint index,
1158                        bool take_ownership,
1159                        const struct pipe_constant_buffer *cb)
1160 {
1161    struct threaded_context *tc = threaded_context(_pipe);
1162 
1163    if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1164       struct tc_constant_buffer_base *p =
1165          tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1166       p->shader = shader;
1167       p->index = index;
1168       p->is_null = true;
1169       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1170       return;
1171    }
1172 
1173    struct pipe_resource *buffer;
1174    unsigned offset;
1175 
1176    if (cb->user_buffer) {
1177       /* This must be done before adding set_constant_buffer, because it could
1178        * generate e.g. transfer_unmap and flush partially-uninitialized
1179        * set_constant_buffer to the driver if it was done afterwards.
1180        */
1181       buffer = NULL;
1182       u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1183                     tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1184       u_upload_unmap(tc->base.const_uploader);
1185       take_ownership = true;
1186    } else {
1187       buffer = cb->buffer;
1188       offset = cb->buffer_offset;
1189    }
1190 
1191    struct tc_constant_buffer *p =
1192       tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1193    p->base.shader = shader;
1194    p->base.index = index;
1195    p->base.is_null = false;
1196    p->cb.user_buffer = NULL;
1197    p->cb.buffer_offset = offset;
1198    p->cb.buffer_size = cb->buffer_size;
1199 
1200    if (take_ownership)
1201       p->cb.buffer = buffer;
1202    else
1203       tc_set_resource_reference(&p->cb.buffer, buffer);
1204 
1205    if (buffer) {
1206       tc_bind_buffer(&tc->const_buffers[shader][index],
1207                      &tc->buffer_lists[tc->next_buf_list], buffer);
1208    } else {
1209       tc_unbind_buffer(&tc->const_buffers[shader][index]);
1210    }
1211 }
1212 
1213 struct tc_inlinable_constants {
1214    struct tc_call_base base;
1215    ubyte shader;
1216    ubyte num_values;
1217    uint32_t values[MAX_INLINABLE_UNIFORMS];
1218 };
1219 
1220 static uint16_t
tc_call_set_inlinable_constants(struct pipe_context * pipe,void * call,uint64_t * last)1221 tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1222 {
1223    struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1224 
1225    pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1226    return call_size(tc_inlinable_constants);
1227 }
1228 
1229 static void
tc_set_inlinable_constants(struct pipe_context * _pipe,enum pipe_shader_type shader,uint num_values,uint32_t * values)1230 tc_set_inlinable_constants(struct pipe_context *_pipe,
1231                            enum pipe_shader_type shader,
1232                            uint num_values, uint32_t *values)
1233 {
1234    struct threaded_context *tc = threaded_context(_pipe);
1235    struct tc_inlinable_constants *p =
1236       tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1237    p->shader = shader;
1238    p->num_values = num_values;
1239    memcpy(p->values, values, num_values * 4);
1240 }
1241 
1242 struct tc_sample_locations {
1243    struct tc_call_base base;
1244    uint16_t size;
1245    uint8_t slot[0];
1246 };
1247 
1248 
1249 static uint16_t
tc_call_set_sample_locations(struct pipe_context * pipe,void * call,uint64_t * last)1250 tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1251 {
1252    struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1253 
1254    pipe->set_sample_locations(pipe, p->size, p->slot);
1255    return p->base.num_slots;
1256 }
1257 
1258 static void
tc_set_sample_locations(struct pipe_context * _pipe,size_t size,const uint8_t * locations)1259 tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1260 {
1261    struct threaded_context *tc = threaded_context(_pipe);
1262    struct tc_sample_locations *p =
1263       tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1264                              tc_sample_locations, size);
1265 
1266    p->size = size;
1267    memcpy(p->slot, locations, size);
1268 }
1269 
1270 struct tc_scissors {
1271    struct tc_call_base base;
1272    ubyte start, count;
1273    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1274 };
1275 
1276 static uint16_t
tc_call_set_scissor_states(struct pipe_context * pipe,void * call,uint64_t * last)1277 tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1278 {
1279    struct tc_scissors *p = (struct tc_scissors *)call;
1280 
1281    pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1282    return p->base.num_slots;
1283 }
1284 
1285 static void
tc_set_scissor_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_scissor_state * states)1286 tc_set_scissor_states(struct pipe_context *_pipe,
1287                       unsigned start, unsigned count,
1288                       const struct pipe_scissor_state *states)
1289 {
1290    struct threaded_context *tc = threaded_context(_pipe);
1291    struct tc_scissors *p =
1292       tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1293 
1294    p->start = start;
1295    p->count = count;
1296    memcpy(&p->slot, states, count * sizeof(states[0]));
1297 }
1298 
1299 struct tc_viewports {
1300    struct tc_call_base base;
1301    ubyte start, count;
1302    struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1303 };
1304 
1305 static uint16_t
tc_call_set_viewport_states(struct pipe_context * pipe,void * call,uint64_t * last)1306 tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1307 {
1308    struct tc_viewports *p = (struct tc_viewports *)call;
1309 
1310    pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1311    return p->base.num_slots;
1312 }
1313 
1314 static void
tc_set_viewport_states(struct pipe_context * _pipe,unsigned start,unsigned count,const struct pipe_viewport_state * states)1315 tc_set_viewport_states(struct pipe_context *_pipe,
1316                        unsigned start, unsigned count,
1317                        const struct pipe_viewport_state *states)
1318 {
1319    if (!count)
1320       return;
1321 
1322    struct threaded_context *tc = threaded_context(_pipe);
1323    struct tc_viewports *p =
1324       tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1325 
1326    p->start = start;
1327    p->count = count;
1328    memcpy(&p->slot, states, count * sizeof(states[0]));
1329 }
1330 
1331 struct tc_window_rects {
1332    struct tc_call_base base;
1333    bool include;
1334    ubyte count;
1335    struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1336 };
1337 
1338 static uint16_t
tc_call_set_window_rectangles(struct pipe_context * pipe,void * call,uint64_t * last)1339 tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1340 {
1341    struct tc_window_rects *p = (struct tc_window_rects *)call;
1342 
1343    pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1344    return p->base.num_slots;
1345 }
1346 
1347 static void
tc_set_window_rectangles(struct pipe_context * _pipe,bool include,unsigned count,const struct pipe_scissor_state * rects)1348 tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1349                          unsigned count,
1350                          const struct pipe_scissor_state *rects)
1351 {
1352    struct threaded_context *tc = threaded_context(_pipe);
1353    struct tc_window_rects *p =
1354       tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1355 
1356    p->include = include;
1357    p->count = count;
1358    memcpy(p->slot, rects, count * sizeof(rects[0]));
1359 }
1360 
1361 struct tc_sampler_views {
1362    struct tc_call_base base;
1363    ubyte shader, start, count, unbind_num_trailing_slots;
1364    struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1365 };
1366 
1367 static uint16_t
tc_call_set_sampler_views(struct pipe_context * pipe,void * call,uint64_t * last)1368 tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1369 {
1370    struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1371 
1372    pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1373                            p->unbind_num_trailing_slots, true, p->slot);
1374    return p->base.num_slots;
1375 }
1376 
1377 static void
tc_set_sampler_views(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,struct pipe_sampler_view ** views)1378 tc_set_sampler_views(struct pipe_context *_pipe,
1379                      enum pipe_shader_type shader,
1380                      unsigned start, unsigned count,
1381                      unsigned unbind_num_trailing_slots, bool take_ownership,
1382                      struct pipe_sampler_view **views)
1383 {
1384    if (!count && !unbind_num_trailing_slots)
1385       return;
1386 
1387    struct threaded_context *tc = threaded_context(_pipe);
1388    struct tc_sampler_views *p =
1389       tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1390                              views ? count : 0);
1391 
1392    p->shader = shader;
1393    p->start = start;
1394 
1395    if (views) {
1396       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1397 
1398       p->count = count;
1399       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1400 
1401       if (take_ownership) {
1402          memcpy(p->slot, views, sizeof(*views) * count);
1403 
1404          for (unsigned i = 0; i < count; i++) {
1405             if (views[i] && views[i]->target == PIPE_BUFFER) {
1406                tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1407                               views[i]->texture);
1408             } else {
1409                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1410             }
1411          }
1412       } else {
1413          for (unsigned i = 0; i < count; i++) {
1414             p->slot[i] = NULL;
1415             pipe_sampler_view_reference(&p->slot[i], views[i]);
1416 
1417             if (views[i] && views[i]->target == PIPE_BUFFER) {
1418                tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1419                               views[i]->texture);
1420             } else {
1421                tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1422             }
1423          }
1424       }
1425 
1426       tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1427                         unbind_num_trailing_slots);
1428       tc->seen_sampler_buffers[shader] = true;
1429    } else {
1430       p->count = 0;
1431       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1432 
1433       tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1434                         count + unbind_num_trailing_slots);
1435    }
1436 }
1437 
1438 struct tc_shader_images {
1439    struct tc_call_base base;
1440    ubyte shader, start, count;
1441    ubyte unbind_num_trailing_slots;
1442    struct pipe_image_view slot[0]; /* more will be allocated if needed */
1443 };
1444 
1445 static uint16_t
tc_call_set_shader_images(struct pipe_context * pipe,void * call,uint64_t * last)1446 tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1447 {
1448    struct tc_shader_images *p = (struct tc_shader_images *)call;
1449    unsigned count = p->count;
1450 
1451    if (!p->count) {
1452       pipe->set_shader_images(pipe, p->shader, p->start, 0,
1453                               p->unbind_num_trailing_slots, NULL);
1454       return call_size(tc_shader_images);
1455    }
1456 
1457    pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1458                            p->unbind_num_trailing_slots, p->slot);
1459 
1460    for (unsigned i = 0; i < count; i++)
1461       tc_drop_resource_reference(p->slot[i].resource);
1462 
1463    return p->base.num_slots;
1464 }
1465 
1466 static void
tc_set_shader_images(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)1467 tc_set_shader_images(struct pipe_context *_pipe,
1468                      enum pipe_shader_type shader,
1469                      unsigned start, unsigned count,
1470                      unsigned unbind_num_trailing_slots,
1471                      const struct pipe_image_view *images)
1472 {
1473    if (!count && !unbind_num_trailing_slots)
1474       return;
1475 
1476    struct threaded_context *tc = threaded_context(_pipe);
1477    struct tc_shader_images *p =
1478       tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1479                              images ? count : 0);
1480    unsigned writable_buffers = 0;
1481 
1482    p->shader = shader;
1483    p->start = start;
1484 
1485    if (images) {
1486       p->count = count;
1487       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1488 
1489       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1490 
1491       for (unsigned i = 0; i < count; i++) {
1492          struct pipe_resource *resource = images[i].resource;
1493 
1494          tc_set_resource_reference(&p->slot[i].resource, resource);
1495 
1496          if (resource && resource->target == PIPE_BUFFER) {
1497             tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
1498 
1499             if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1500                struct threaded_resource *tres = threaded_resource(resource);
1501 
1502                util_range_add(&tres->b, &tres->valid_buffer_range,
1503                               images[i].u.buf.offset,
1504                               images[i].u.buf.offset + images[i].u.buf.size);
1505                writable_buffers |= BITFIELD_BIT(start + i);
1506             }
1507          } else {
1508             tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1509          }
1510       }
1511       memcpy(p->slot, images, count * sizeof(images[0]));
1512 
1513       tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1514                         unbind_num_trailing_slots);
1515       tc->seen_image_buffers[shader] = true;
1516    } else {
1517       p->count = 0;
1518       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1519 
1520       tc_unbind_buffers(&tc->image_buffers[shader][start],
1521                         count + unbind_num_trailing_slots);
1522    }
1523 
1524    tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1525    tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1526 }
1527 
1528 struct tc_shader_buffers {
1529    struct tc_call_base base;
1530    ubyte shader, start, count;
1531    bool unbind;
1532    unsigned writable_bitmask;
1533    struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1534 };
1535 
1536 static uint16_t
tc_call_set_shader_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1537 tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1538 {
1539    struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1540    unsigned count = p->count;
1541 
1542    if (p->unbind) {
1543       pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1544       return call_size(tc_shader_buffers);
1545    }
1546 
1547    pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1548                             p->writable_bitmask);
1549 
1550    for (unsigned i = 0; i < count; i++)
1551       tc_drop_resource_reference(p->slot[i].buffer);
1552 
1553    return p->base.num_slots;
1554 }
1555 
1556 static void
tc_set_shader_buffers(struct pipe_context * _pipe,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)1557 tc_set_shader_buffers(struct pipe_context *_pipe,
1558                       enum pipe_shader_type shader,
1559                       unsigned start, unsigned count,
1560                       const struct pipe_shader_buffer *buffers,
1561                       unsigned writable_bitmask)
1562 {
1563    if (!count)
1564       return;
1565 
1566    struct threaded_context *tc = threaded_context(_pipe);
1567    struct tc_shader_buffers *p =
1568       tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1569                              buffers ? count : 0);
1570 
1571    p->shader = shader;
1572    p->start = start;
1573    p->count = count;
1574    p->unbind = buffers == NULL;
1575    p->writable_bitmask = writable_bitmask;
1576 
1577    if (buffers) {
1578       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1579 
1580       for (unsigned i = 0; i < count; i++) {
1581          struct pipe_shader_buffer *dst = &p->slot[i];
1582          const struct pipe_shader_buffer *src = buffers + i;
1583 
1584          tc_set_resource_reference(&dst->buffer, src->buffer);
1585          dst->buffer_offset = src->buffer_offset;
1586          dst->buffer_size = src->buffer_size;
1587 
1588          if (src->buffer) {
1589             struct threaded_resource *tres = threaded_resource(src->buffer);
1590 
1591             tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
1592 
1593             if (writable_bitmask & BITFIELD_BIT(i)) {
1594                util_range_add(&tres->b, &tres->valid_buffer_range,
1595                               src->buffer_offset,
1596                               src->buffer_offset + src->buffer_size);
1597             }
1598          } else {
1599             tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1600          }
1601       }
1602       tc->seen_shader_buffers[shader] = true;
1603    } else {
1604       tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1605    }
1606 
1607    tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1608    tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1609 }
1610 
1611 struct tc_vertex_buffers {
1612    struct tc_call_base base;
1613    ubyte start, count;
1614    ubyte unbind_num_trailing_slots;
1615    struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1616 };
1617 
1618 static uint16_t
tc_call_set_vertex_buffers(struct pipe_context * pipe,void * call,uint64_t * last)1619 tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1620 {
1621    struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1622    unsigned count = p->count;
1623 
1624    if (!count) {
1625       pipe->set_vertex_buffers(pipe, p->start, 0,
1626                                p->unbind_num_trailing_slots, false, NULL);
1627       return call_size(tc_vertex_buffers);
1628    }
1629 
1630    for (unsigned i = 0; i < count; i++)
1631       tc_assert(!p->slot[i].is_user_buffer);
1632 
1633    pipe->set_vertex_buffers(pipe, p->start, count,
1634                             p->unbind_num_trailing_slots, true, p->slot);
1635    return p->base.num_slots;
1636 }
1637 
1638 static void
tc_set_vertex_buffers(struct pipe_context * _pipe,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership,const struct pipe_vertex_buffer * buffers)1639 tc_set_vertex_buffers(struct pipe_context *_pipe,
1640                       unsigned start, unsigned count,
1641                       unsigned unbind_num_trailing_slots,
1642                       bool take_ownership,
1643                       const struct pipe_vertex_buffer *buffers)
1644 {
1645    struct threaded_context *tc = threaded_context(_pipe);
1646 
1647    if (!count && !unbind_num_trailing_slots)
1648       return;
1649 
1650    if (count && buffers) {
1651       struct tc_vertex_buffers *p =
1652          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1653       p->start = start;
1654       p->count = count;
1655       p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1656 
1657       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1658 
1659       if (take_ownership) {
1660          memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1661 
1662          for (unsigned i = 0; i < count; i++) {
1663             struct pipe_resource *buf = buffers[i].buffer.resource;
1664 
1665             if (buf) {
1666                tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1667             } else {
1668                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1669             }
1670          }
1671       } else {
1672          for (unsigned i = 0; i < count; i++) {
1673             struct pipe_vertex_buffer *dst = &p->slot[i];
1674             const struct pipe_vertex_buffer *src = buffers + i;
1675             struct pipe_resource *buf = src->buffer.resource;
1676 
1677             tc_assert(!src->is_user_buffer);
1678             dst->stride = src->stride;
1679             dst->is_user_buffer = false;
1680             tc_set_resource_reference(&dst->buffer.resource, buf);
1681             dst->buffer_offset = src->buffer_offset;
1682 
1683             if (buf) {
1684                tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1685             } else {
1686                tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1687             }
1688          }
1689       }
1690 
1691       tc_unbind_buffers(&tc->vertex_buffers[start + count],
1692                         unbind_num_trailing_slots);
1693    } else {
1694       struct tc_vertex_buffers *p =
1695          tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1696       p->start = start;
1697       p->count = 0;
1698       p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1699 
1700       tc_unbind_buffers(&tc->vertex_buffers[start],
1701                         count + unbind_num_trailing_slots);
1702    }
1703 }
1704 
1705 struct tc_stream_outputs {
1706    struct tc_call_base base;
1707    unsigned count;
1708    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1709    unsigned offsets[PIPE_MAX_SO_BUFFERS];
1710 };
1711 
1712 static uint16_t
tc_call_set_stream_output_targets(struct pipe_context * pipe,void * call,uint64_t * last)1713 tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1714 {
1715    struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1716    unsigned count = p->count;
1717 
1718    pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1719    for (unsigned i = 0; i < count; i++)
1720       tc_drop_so_target_reference(p->targets[i]);
1721 
1722    return call_size(tc_stream_outputs);
1723 }
1724 
1725 static void
tc_set_stream_output_targets(struct pipe_context * _pipe,unsigned count,struct pipe_stream_output_target ** tgs,const unsigned * offsets)1726 tc_set_stream_output_targets(struct pipe_context *_pipe,
1727                              unsigned count,
1728                              struct pipe_stream_output_target **tgs,
1729                              const unsigned *offsets)
1730 {
1731    struct threaded_context *tc = threaded_context(_pipe);
1732    struct tc_stream_outputs *p =
1733       tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1734    struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1735 
1736    for (unsigned i = 0; i < count; i++) {
1737       p->targets[i] = NULL;
1738       pipe_so_target_reference(&p->targets[i], tgs[i]);
1739       if (tgs[i]) {
1740          tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
1741       } else {
1742          tc_unbind_buffer(&tc->streamout_buffers[i]);
1743       }
1744    }
1745    p->count = count;
1746    memcpy(p->offsets, offsets, count * sizeof(unsigned));
1747 
1748    tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1749    if (count)
1750       tc->seen_streamout_buffers = true;
1751 }
1752 
1753 static void
tc_set_compute_resources(struct pipe_context * _pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1754 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1755                          unsigned count, struct pipe_surface **resources)
1756 {
1757    struct threaded_context *tc = threaded_context(_pipe);
1758    struct pipe_context *pipe = tc->pipe;
1759 
1760    tc_sync(tc);
1761    pipe->set_compute_resources(pipe, start, count, resources);
1762 }
1763 
1764 static void
tc_set_global_binding(struct pipe_context * _pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1765 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1766                       unsigned count, struct pipe_resource **resources,
1767                       uint32_t **handles)
1768 {
1769    struct threaded_context *tc = threaded_context(_pipe);
1770    struct pipe_context *pipe = tc->pipe;
1771 
1772    tc_sync(tc);
1773    pipe->set_global_binding(pipe, first, count, resources, handles);
1774 }
1775 
1776 
1777 /********************************************************************
1778  * views
1779  */
1780 
1781 static struct pipe_surface *
tc_create_surface(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_surface * surf_tmpl)1782 tc_create_surface(struct pipe_context *_pipe,
1783                   struct pipe_resource *resource,
1784                   const struct pipe_surface *surf_tmpl)
1785 {
1786    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1787    struct pipe_surface *view =
1788          pipe->create_surface(pipe, resource, surf_tmpl);
1789 
1790    if (view)
1791       view->context = _pipe;
1792    return view;
1793 }
1794 
1795 static void
tc_surface_destroy(struct pipe_context * _pipe,struct pipe_surface * surf)1796 tc_surface_destroy(struct pipe_context *_pipe,
1797                    struct pipe_surface *surf)
1798 {
1799    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1800 
1801    pipe->surface_destroy(pipe, surf);
1802 }
1803 
1804 static struct pipe_sampler_view *
tc_create_sampler_view(struct pipe_context * _pipe,struct pipe_resource * resource,const struct pipe_sampler_view * templ)1805 tc_create_sampler_view(struct pipe_context *_pipe,
1806                        struct pipe_resource *resource,
1807                        const struct pipe_sampler_view *templ)
1808 {
1809    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1810    struct pipe_sampler_view *view =
1811          pipe->create_sampler_view(pipe, resource, templ);
1812 
1813    if (view)
1814       view->context = _pipe;
1815    return view;
1816 }
1817 
1818 static void
tc_sampler_view_destroy(struct pipe_context * _pipe,struct pipe_sampler_view * view)1819 tc_sampler_view_destroy(struct pipe_context *_pipe,
1820                         struct pipe_sampler_view *view)
1821 {
1822    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1823 
1824    pipe->sampler_view_destroy(pipe, view);
1825 }
1826 
1827 static struct pipe_stream_output_target *
tc_create_stream_output_target(struct pipe_context * _pipe,struct pipe_resource * res,unsigned buffer_offset,unsigned buffer_size)1828 tc_create_stream_output_target(struct pipe_context *_pipe,
1829                                struct pipe_resource *res,
1830                                unsigned buffer_offset,
1831                                unsigned buffer_size)
1832 {
1833    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1834    struct threaded_resource *tres = threaded_resource(res);
1835    struct pipe_stream_output_target *view;
1836 
1837    util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1838                   buffer_offset + buffer_size);
1839 
1840    view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1841                                             buffer_size);
1842    if (view)
1843       view->context = _pipe;
1844    return view;
1845 }
1846 
1847 static void
tc_stream_output_target_destroy(struct pipe_context * _pipe,struct pipe_stream_output_target * target)1848 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1849                                 struct pipe_stream_output_target *target)
1850 {
1851    struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1852 
1853    pipe->stream_output_target_destroy(pipe, target);
1854 }
1855 
1856 
1857 /********************************************************************
1858  * bindless
1859  */
1860 
1861 static uint64_t
tc_create_texture_handle(struct pipe_context * _pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * state)1862 tc_create_texture_handle(struct pipe_context *_pipe,
1863                          struct pipe_sampler_view *view,
1864                          const struct pipe_sampler_state *state)
1865 {
1866    struct threaded_context *tc = threaded_context(_pipe);
1867    struct pipe_context *pipe = tc->pipe;
1868 
1869    tc_sync(tc);
1870    return pipe->create_texture_handle(pipe, view, state);
1871 }
1872 
1873 struct tc_make_texture_handle_resident {
1874    struct tc_call_base base;
1875    bool resident;
1876    uint64_t handle;
1877 };
1878 
1879 static uint16_t
tc_call_make_texture_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1880 tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1881 {
1882    struct tc_make_texture_handle_resident *p =
1883       to_call(call, tc_make_texture_handle_resident);
1884 
1885    pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1886    return call_size(tc_make_texture_handle_resident);
1887 }
1888 
1889 static void
tc_make_texture_handle_resident(struct pipe_context * _pipe,uint64_t handle,bool resident)1890 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1891                                 bool resident)
1892 {
1893    struct threaded_context *tc = threaded_context(_pipe);
1894    struct tc_make_texture_handle_resident *p =
1895       tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1896                   tc_make_texture_handle_resident);
1897 
1898    p->handle = handle;
1899    p->resident = resident;
1900 }
1901 
1902 static uint64_t
tc_create_image_handle(struct pipe_context * _pipe,const struct pipe_image_view * image)1903 tc_create_image_handle(struct pipe_context *_pipe,
1904                        const struct pipe_image_view *image)
1905 {
1906    struct threaded_context *tc = threaded_context(_pipe);
1907    struct pipe_context *pipe = tc->pipe;
1908 
1909    tc_sync(tc);
1910    return pipe->create_image_handle(pipe, image);
1911 }
1912 
1913 struct tc_make_image_handle_resident {
1914    struct tc_call_base base;
1915    bool resident;
1916    unsigned access;
1917    uint64_t handle;
1918 };
1919 
1920 static uint16_t
tc_call_make_image_handle_resident(struct pipe_context * pipe,void * call,uint64_t * last)1921 tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1922 {
1923    struct tc_make_image_handle_resident *p =
1924       to_call(call, tc_make_image_handle_resident);
1925 
1926    pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1927    return call_size(tc_make_image_handle_resident);
1928 }
1929 
1930 static void
tc_make_image_handle_resident(struct pipe_context * _pipe,uint64_t handle,unsigned access,bool resident)1931 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1932                               unsigned access, bool resident)
1933 {
1934    struct threaded_context *tc = threaded_context(_pipe);
1935    struct tc_make_image_handle_resident *p =
1936       tc_add_call(tc, TC_CALL_make_image_handle_resident,
1937                   tc_make_image_handle_resident);
1938 
1939    p->handle = handle;
1940    p->access = access;
1941    p->resident = resident;
1942 }
1943 
1944 
1945 /********************************************************************
1946  * transfer
1947  */
1948 
1949 struct tc_replace_buffer_storage {
1950    struct tc_call_base base;
1951    uint16_t num_rebinds;
1952    uint32_t rebind_mask;
1953    uint32_t delete_buffer_id;
1954    struct pipe_resource *dst;
1955    struct pipe_resource *src;
1956    tc_replace_buffer_storage_func func;
1957 };
1958 
1959 static uint16_t
tc_call_replace_buffer_storage(struct pipe_context * pipe,void * call,uint64_t * last)1960 tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
1961 {
1962    struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
1963 
1964    p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
1965 
1966    tc_drop_resource_reference(p->dst);
1967    tc_drop_resource_reference(p->src);
1968    return call_size(tc_replace_buffer_storage);
1969 }
1970 
1971 /* Return true if the buffer has been invalidated or is idle. */
1972 static bool
tc_invalidate_buffer(struct threaded_context * tc,struct threaded_resource * tbuf)1973 tc_invalidate_buffer(struct threaded_context *tc,
1974                      struct threaded_resource *tbuf)
1975 {
1976    if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
1977       /* It's idle, so invalidation would be a no-op, but we can still clear
1978        * the valid range because we are technically doing invalidation, but
1979        * skipping it because it's useless.
1980        *
1981        * If the buffer is bound for write, we can't invalidate the range.
1982        */
1983       if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
1984          util_range_set_empty(&tbuf->valid_buffer_range);
1985       return true;
1986    }
1987 
1988    struct pipe_screen *screen = tc->base.screen;
1989    struct pipe_resource *new_buf;
1990 
1991    /* Shared, pinned, and sparse buffers can't be reallocated. */
1992    if (tbuf->is_shared ||
1993        tbuf->is_user_ptr ||
1994        tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1995       return false;
1996 
1997    /* Allocate a new one. */
1998    new_buf = screen->resource_create(screen, &tbuf->b);
1999    if (!new_buf)
2000       return false;
2001 
2002    /* Replace the "latest" pointer. */
2003    if (tbuf->latest != &tbuf->b)
2004       pipe_resource_reference(&tbuf->latest, NULL);
2005 
2006    tbuf->latest = new_buf;
2007 
2008    uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2009 
2010    /* Enqueue storage replacement of the original buffer. */
2011    struct tc_replace_buffer_storage *p =
2012       tc_add_call(tc, TC_CALL_replace_buffer_storage,
2013                   tc_replace_buffer_storage);
2014 
2015    p->func = tc->replace_buffer_storage;
2016    tc_set_resource_reference(&p->dst, &tbuf->b);
2017    tc_set_resource_reference(&p->src, new_buf);
2018    p->delete_buffer_id = delete_buffer_id;
2019    p->rebind_mask = 0;
2020 
2021    /* Treat the current buffer as the new buffer. */
2022    bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2023    p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2024                                      threaded_resource(new_buf)->buffer_id_unique,
2025                                      &p->rebind_mask);
2026 
2027    /* If the buffer is not bound for write, clear the valid range. */
2028    if (!bound_for_write)
2029       util_range_set_empty(&tbuf->valid_buffer_range);
2030 
2031    tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2032    threaded_resource(new_buf)->buffer_id_unique = 0;
2033 
2034    return true;
2035 }
2036 
2037 static unsigned
tc_improve_map_buffer_flags(struct threaded_context * tc,struct threaded_resource * tres,unsigned usage,unsigned offset,unsigned size)2038 tc_improve_map_buffer_flags(struct threaded_context *tc,
2039                             struct threaded_resource *tres, unsigned usage,
2040                             unsigned offset, unsigned size)
2041 {
2042    /* Never invalidate inside the driver and never infer "unsynchronized". */
2043    unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2044                        TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2045 
2046    /* Prevent a reentry. */
2047    if (usage & tc_flags)
2048       return usage;
2049 
2050    /* Use the staging upload if it's preferred. */
2051    if (usage & (PIPE_MAP_DISCARD_RANGE |
2052                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2053        !(usage & PIPE_MAP_PERSISTENT) &&
2054        tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2055        tc->use_forced_staging_uploads) {
2056       usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2057                  PIPE_MAP_UNSYNCHRONIZED);
2058 
2059       return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2060    }
2061 
2062    /* Sparse buffers can't be mapped directly and can't be reallocated
2063     * (fully invalidated). That may just be a radeonsi limitation, but
2064     * the threaded context must obey it with radeonsi.
2065     */
2066    if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
2067       /* We can use DISCARD_RANGE instead of full discard. This is the only
2068        * fast path for sparse buffers that doesn't need thread synchronization.
2069        */
2070       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2071          usage |= PIPE_MAP_DISCARD_RANGE;
2072 
2073       /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2074        * The threaded context doesn't do unsychronized mappings and invalida-
2075        * tions of sparse buffers, therefore a correct driver behavior won't
2076        * result in an incorrect behavior with the threaded context.
2077        */
2078       return usage;
2079    }
2080 
2081    usage |= tc_flags;
2082 
2083    /* Handle CPU reads trivially. */
2084    if (usage & PIPE_MAP_READ) {
2085       if (usage & PIPE_MAP_UNSYNCHRONIZED)
2086          usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2087 
2088       /* Drivers aren't allowed to do buffer invalidations. */
2089       return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2090    }
2091 
2092    /* See if the buffer range being mapped has never been initialized or
2093     * the buffer is idle, in which case it can be mapped unsynchronized. */
2094    if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2095        ((!tres->is_shared &&
2096          !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2097         !tc_is_buffer_busy(tc, tres, usage)))
2098       usage |= PIPE_MAP_UNSYNCHRONIZED;
2099 
2100    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2101       /* If discarding the entire range, discard the whole resource instead. */
2102       if (usage & PIPE_MAP_DISCARD_RANGE &&
2103           offset == 0 && size == tres->b.width0)
2104          usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2105 
2106       /* Discard the whole resource if needed. */
2107       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2108          if (tc_invalidate_buffer(tc, tres))
2109             usage |= PIPE_MAP_UNSYNCHRONIZED;
2110          else
2111             usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2112       }
2113    }
2114 
2115    /* We won't need this flag anymore. */
2116    /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2117    usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2118 
2119    /* GL_AMD_pinned_memory and persistent mappings can't use staging
2120     * buffers. */
2121    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2122                 PIPE_MAP_PERSISTENT) ||
2123        tres->is_user_ptr)
2124       usage &= ~PIPE_MAP_DISCARD_RANGE;
2125 
2126    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2127    if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2128       usage &= ~PIPE_MAP_DISCARD_RANGE;
2129       usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2130    }
2131 
2132    return usage;
2133 }
2134 
2135 static void *
tc_buffer_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2136 tc_buffer_map(struct pipe_context *_pipe,
2137               struct pipe_resource *resource, unsigned level,
2138               unsigned usage, const struct pipe_box *box,
2139               struct pipe_transfer **transfer)
2140 {
2141    struct threaded_context *tc = threaded_context(_pipe);
2142    struct threaded_resource *tres = threaded_resource(resource);
2143    struct pipe_context *pipe = tc->pipe;
2144 
2145    usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2146 
2147    /* Do a staging transfer within the threaded context. The driver should
2148     * only get resource_copy_region.
2149     */
2150    if (usage & PIPE_MAP_DISCARD_RANGE) {
2151       struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
2152       uint8_t *map;
2153 
2154       ttrans->staging = NULL;
2155 
2156       u_upload_alloc(tc->base.stream_uploader, 0,
2157                      box->width + (box->x % tc->map_buffer_alignment),
2158                      tc->map_buffer_alignment, &ttrans->b.offset,
2159                      &ttrans->staging, (void**)&map);
2160       if (!map) {
2161          slab_free(&tc->pool_transfers, ttrans);
2162          return NULL;
2163       }
2164 
2165       ttrans->b.resource = resource;
2166       ttrans->b.level = 0;
2167       ttrans->b.usage = usage;
2168       ttrans->b.box = *box;
2169       ttrans->b.stride = 0;
2170       ttrans->b.layer_stride = 0;
2171       ttrans->valid_buffer_range = &tres->valid_buffer_range;
2172       *transfer = &ttrans->b;
2173 
2174       p_atomic_inc(&tres->pending_staging_uploads);
2175       util_range_add(resource, &tres->pending_staging_uploads_range,
2176                      box->x, box->x + box->width);
2177 
2178       return map + (box->x % tc->map_buffer_alignment);
2179    }
2180 
2181    if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2182        p_atomic_read(&tres->pending_staging_uploads) &&
2183        util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2184       /* Write conflict detected between a staging transfer and the direct mapping we're
2185        * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2186        * will have to wait for the staging transfer completion.
2187        * Note: The conflict detection is only based on the mapped range, not on the actual
2188        * written range(s).
2189        */
2190       usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2191       tc->use_forced_staging_uploads = false;
2192    }
2193 
2194    /* Unsychronized buffer mappings don't have to synchronize the thread. */
2195    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2196       tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2197                       usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2198       tc_set_driver_thread(tc);
2199    }
2200 
2201    tc->bytes_mapped_estimate += box->width;
2202 
2203    void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2204                                 level, usage, box, transfer);
2205    threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2206 
2207    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2208       tc_clear_driver_thread(tc);
2209 
2210    return ret;
2211 }
2212 
2213 static void *
tc_texture_map(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** transfer)2214 tc_texture_map(struct pipe_context *_pipe,
2215                struct pipe_resource *resource, unsigned level,
2216                unsigned usage, const struct pipe_box *box,
2217                struct pipe_transfer **transfer)
2218 {
2219    struct threaded_context *tc = threaded_context(_pipe);
2220    struct threaded_resource *tres = threaded_resource(resource);
2221    struct pipe_context *pipe = tc->pipe;
2222 
2223    tc_sync_msg(tc, "texture");
2224    tc_set_driver_thread(tc);
2225 
2226    tc->bytes_mapped_estimate += box->width;
2227 
2228    void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2229                                  level, usage, box, transfer);
2230 
2231    if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2232       tc_clear_driver_thread(tc);
2233 
2234    return ret;
2235 }
2236 
2237 struct tc_transfer_flush_region {
2238    struct tc_call_base base;
2239    struct pipe_box box;
2240    struct pipe_transfer *transfer;
2241 };
2242 
2243 static uint16_t
tc_call_transfer_flush_region(struct pipe_context * pipe,void * call,uint64_t * last)2244 tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2245 {
2246    struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2247 
2248    pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2249    return call_size(tc_transfer_flush_region);
2250 }
2251 
2252 struct tc_resource_copy_region {
2253    struct tc_call_base base;
2254    unsigned dst_level;
2255    unsigned dstx, dsty, dstz;
2256    unsigned src_level;
2257    struct pipe_box src_box;
2258    struct pipe_resource *dst;
2259    struct pipe_resource *src;
2260 };
2261 
2262 static void
2263 tc_resource_copy_region(struct pipe_context *_pipe,
2264                         struct pipe_resource *dst, unsigned dst_level,
2265                         unsigned dstx, unsigned dsty, unsigned dstz,
2266                         struct pipe_resource *src, unsigned src_level,
2267                         const struct pipe_box *src_box);
2268 
2269 static void
tc_buffer_do_flush_region(struct threaded_context * tc,struct threaded_transfer * ttrans,const struct pipe_box * box)2270 tc_buffer_do_flush_region(struct threaded_context *tc,
2271                           struct threaded_transfer *ttrans,
2272                           const struct pipe_box *box)
2273 {
2274    struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2275 
2276    if (ttrans->staging) {
2277       struct pipe_box src_box;
2278 
2279       u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2280                (box->x - ttrans->b.box.x),
2281                box->width, &src_box);
2282 
2283       /* Copy the staging buffer into the original one. */
2284       tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2285                               ttrans->staging, 0, &src_box);
2286    }
2287 
2288    util_range_add(&tres->b, ttrans->valid_buffer_range,
2289                   box->x, box->x + box->width);
2290 }
2291 
2292 static void
tc_transfer_flush_region(struct pipe_context * _pipe,struct pipe_transfer * transfer,const struct pipe_box * rel_box)2293 tc_transfer_flush_region(struct pipe_context *_pipe,
2294                          struct pipe_transfer *transfer,
2295                          const struct pipe_box *rel_box)
2296 {
2297    struct threaded_context *tc = threaded_context(_pipe);
2298    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2299    struct threaded_resource *tres = threaded_resource(transfer->resource);
2300    unsigned required_usage = PIPE_MAP_WRITE |
2301                              PIPE_MAP_FLUSH_EXPLICIT;
2302 
2303    if (tres->b.target == PIPE_BUFFER) {
2304       if ((transfer->usage & required_usage) == required_usage) {
2305          struct pipe_box box;
2306 
2307          u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2308          tc_buffer_do_flush_region(tc, ttrans, &box);
2309       }
2310 
2311       /* Staging transfers don't send the call to the driver. */
2312       if (ttrans->staging)
2313          return;
2314    }
2315 
2316    struct tc_transfer_flush_region *p =
2317       tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2318    p->transfer = transfer;
2319    p->box = *rel_box;
2320 }
2321 
2322 static void
2323 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2324          unsigned flags);
2325 
2326 struct tc_buffer_unmap {
2327    struct tc_call_base base;
2328    bool was_staging_transfer;
2329    union {
2330       struct pipe_transfer *transfer;
2331       struct pipe_resource *resource;
2332    };
2333 };
2334 
2335 static uint16_t
tc_call_buffer_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2336 tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2337 {
2338    struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2339 
2340    if (p->was_staging_transfer) {
2341       struct threaded_resource *tres = threaded_resource(p->resource);
2342       /* Nothing to do except keeping track of staging uploads */
2343       assert(tres->pending_staging_uploads > 0);
2344       p_atomic_dec(&tres->pending_staging_uploads);
2345       tc_drop_resource_reference(p->resource);
2346    } else {
2347       pipe->buffer_unmap(pipe, p->transfer);
2348    }
2349 
2350    return call_size(tc_buffer_unmap);
2351 }
2352 
2353 static void
tc_buffer_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2354 tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2355 {
2356    struct threaded_context *tc = threaded_context(_pipe);
2357    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2358    struct threaded_resource *tres = threaded_resource(transfer->resource);
2359 
2360    /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2361     * called from any thread and bypasses all multithreaded queues.
2362     */
2363    if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2364       assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2365       assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2366                                   PIPE_MAP_DISCARD_RANGE)));
2367 
2368       struct pipe_context *pipe = tc->pipe;
2369       util_range_add(&tres->b, ttrans->valid_buffer_range,
2370                       transfer->box.x, transfer->box.x + transfer->box.width);
2371 
2372       pipe->buffer_unmap(pipe, transfer);
2373       return;
2374    }
2375 
2376    bool was_staging_transfer = false;
2377 
2378    if (transfer->usage & PIPE_MAP_WRITE &&
2379        !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2380       tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2381 
2382    if (ttrans->staging) {
2383       was_staging_transfer = true;
2384 
2385       tc_drop_resource_reference(ttrans->staging);
2386       slab_free(&tc->pool_transfers, ttrans);
2387    }
2388 
2389    struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2390                                            tc_buffer_unmap);
2391    if (was_staging_transfer) {
2392       tc_set_resource_reference(&p->resource, &tres->b);
2393       p->was_staging_transfer = true;
2394    } else {
2395       p->transfer = transfer;
2396       p->was_staging_transfer = false;
2397    }
2398 
2399    /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2400     * defers the unmap operation to the batch execution.
2401     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2402     * and if it goes over an optional limit the current batch is flushed,
2403     * to reclaim some RAM. */
2404    if (!ttrans->staging && tc->bytes_mapped_limit &&
2405        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2406       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2407    }
2408 }
2409 
2410 struct tc_texture_unmap {
2411    struct tc_call_base base;
2412    struct pipe_transfer *transfer;
2413 };
2414 
2415 static uint16_t
tc_call_texture_unmap(struct pipe_context * pipe,void * call,uint64_t * last)2416 tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2417 {
2418    struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2419 
2420    pipe->texture_unmap(pipe, p->transfer);
2421    return call_size(tc_texture_unmap);
2422 }
2423 
2424 static void
tc_texture_unmap(struct pipe_context * _pipe,struct pipe_transfer * transfer)2425 tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2426 {
2427    struct threaded_context *tc = threaded_context(_pipe);
2428    struct threaded_transfer *ttrans = threaded_transfer(transfer);
2429 
2430    tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2431 
2432    /* tc_texture_map directly maps the textures, but tc_texture_unmap
2433     * defers the unmap operation to the batch execution.
2434     * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2435     * and if it goes over an optional limit the current batch is flushed,
2436     * to reclaim some RAM. */
2437    if (!ttrans->staging && tc->bytes_mapped_limit &&
2438        tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2439       tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2440    }
2441 }
2442 
2443 struct tc_buffer_subdata {
2444    struct tc_call_base base;
2445    unsigned usage, offset, size;
2446    struct pipe_resource *resource;
2447    char slot[0]; /* more will be allocated if needed */
2448 };
2449 
2450 static uint16_t
tc_call_buffer_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2451 tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2452 {
2453    struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2454 
2455    pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2456                         p->slot);
2457    tc_drop_resource_reference(p->resource);
2458    return p->base.num_slots;
2459 }
2460 
2461 static void
tc_buffer_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned usage,unsigned offset,unsigned size,const void * data)2462 tc_buffer_subdata(struct pipe_context *_pipe,
2463                   struct pipe_resource *resource,
2464                   unsigned usage, unsigned offset,
2465                   unsigned size, const void *data)
2466 {
2467    struct threaded_context *tc = threaded_context(_pipe);
2468    struct threaded_resource *tres = threaded_resource(resource);
2469 
2470    if (!size)
2471       return;
2472 
2473    usage |= PIPE_MAP_WRITE;
2474 
2475    /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2476    if (!(usage & PIPE_MAP_DIRECTLY))
2477       usage |= PIPE_MAP_DISCARD_RANGE;
2478 
2479    usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2480 
2481    /* Unsychronized and big transfers should use transfer_map. Also handle
2482     * full invalidations, because drivers aren't allowed to do them.
2483     */
2484    if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2485                 PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2486        size > TC_MAX_SUBDATA_BYTES) {
2487       struct pipe_transfer *transfer;
2488       struct pipe_box box;
2489       uint8_t *map = NULL;
2490 
2491       u_box_1d(offset, size, &box);
2492 
2493       map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2494       if (map) {
2495          memcpy(map, data, size);
2496          tc_buffer_unmap(_pipe, transfer);
2497       }
2498       return;
2499    }
2500 
2501    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2502 
2503    /* The upload is small. Enqueue it. */
2504    struct tc_buffer_subdata *p =
2505       tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2506 
2507    tc_set_resource_reference(&p->resource, resource);
2508    /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2509     * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2510     */
2511    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
2512    p->usage = usage;
2513    p->offset = offset;
2514    p->size = size;
2515    memcpy(p->slot, data, size);
2516 }
2517 
2518 struct tc_texture_subdata {
2519    struct tc_call_base base;
2520    unsigned level, usage, stride, layer_stride;
2521    struct pipe_box box;
2522    struct pipe_resource *resource;
2523    char slot[0]; /* more will be allocated if needed */
2524 };
2525 
2526 static uint16_t
tc_call_texture_subdata(struct pipe_context * pipe,void * call,uint64_t * last)2527 tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2528 {
2529    struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2530 
2531    pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2532                          p->slot, p->stride, p->layer_stride);
2533    tc_drop_resource_reference(p->resource);
2534    return p->base.num_slots;
2535 }
2536 
2537 static void
tc_texture_subdata(struct pipe_context * _pipe,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,const void * data,unsigned stride,unsigned layer_stride)2538 tc_texture_subdata(struct pipe_context *_pipe,
2539                    struct pipe_resource *resource,
2540                    unsigned level, unsigned usage,
2541                    const struct pipe_box *box,
2542                    const void *data, unsigned stride,
2543                    unsigned layer_stride)
2544 {
2545    struct threaded_context *tc = threaded_context(_pipe);
2546    unsigned size;
2547 
2548    assert(box->height >= 1);
2549    assert(box->depth >= 1);
2550 
2551    size = (box->depth - 1) * layer_stride +
2552           (box->height - 1) * stride +
2553           box->width * util_format_get_blocksize(resource->format);
2554    if (!size)
2555       return;
2556 
2557    /* Small uploads can be enqueued, big uploads must sync. */
2558    if (size <= TC_MAX_SUBDATA_BYTES) {
2559       struct tc_texture_subdata *p =
2560          tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2561 
2562       tc_set_resource_reference(&p->resource, resource);
2563       p->level = level;
2564       p->usage = usage;
2565       p->box = *box;
2566       p->stride = stride;
2567       p->layer_stride = layer_stride;
2568       memcpy(p->slot, data, size);
2569    } else {
2570       struct pipe_context *pipe = tc->pipe;
2571 
2572       tc_sync(tc);
2573       tc_set_driver_thread(tc);
2574       pipe->texture_subdata(pipe, resource, level, usage, box, data,
2575                             stride, layer_stride);
2576       tc_clear_driver_thread(tc);
2577    }
2578 }
2579 
2580 
2581 /********************************************************************
2582  * miscellaneous
2583  */
2584 
2585 #define TC_FUNC_SYNC_RET0(ret_type, func) \
2586    static ret_type \
2587    tc_##func(struct pipe_context *_pipe) \
2588    { \
2589       struct threaded_context *tc = threaded_context(_pipe); \
2590       struct pipe_context *pipe = tc->pipe; \
2591       tc_sync(tc); \
2592       return pipe->func(pipe); \
2593    }
2594 
TC_FUNC_SYNC_RET0(uint64_t,get_timestamp)2595 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2596 
2597 static void
2598 tc_get_sample_position(struct pipe_context *_pipe,
2599                        unsigned sample_count, unsigned sample_index,
2600                        float *out_value)
2601 {
2602    struct threaded_context *tc = threaded_context(_pipe);
2603    struct pipe_context *pipe = tc->pipe;
2604 
2605    tc_sync(tc);
2606    pipe->get_sample_position(pipe, sample_count, sample_index,
2607                              out_value);
2608 }
2609 
2610 static enum pipe_reset_status
tc_get_device_reset_status(struct pipe_context * _pipe)2611 tc_get_device_reset_status(struct pipe_context *_pipe)
2612 {
2613    struct threaded_context *tc = threaded_context(_pipe);
2614    struct pipe_context *pipe = tc->pipe;
2615 
2616    if (!tc->options.unsynchronized_get_device_reset_status)
2617       tc_sync(tc);
2618 
2619    return pipe->get_device_reset_status(pipe);
2620 }
2621 
2622 static void
tc_set_device_reset_callback(struct pipe_context * _pipe,const struct pipe_device_reset_callback * cb)2623 tc_set_device_reset_callback(struct pipe_context *_pipe,
2624                              const struct pipe_device_reset_callback *cb)
2625 {
2626    struct threaded_context *tc = threaded_context(_pipe);
2627    struct pipe_context *pipe = tc->pipe;
2628 
2629    tc_sync(tc);
2630    pipe->set_device_reset_callback(pipe, cb);
2631 }
2632 
2633 struct tc_string_marker {
2634    struct tc_call_base base;
2635    int len;
2636    char slot[0]; /* more will be allocated if needed */
2637 };
2638 
2639 static uint16_t
tc_call_emit_string_marker(struct pipe_context * pipe,void * call,uint64_t * last)2640 tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2641 {
2642    struct tc_string_marker *p = (struct tc_string_marker *)call;
2643    pipe->emit_string_marker(pipe, p->slot, p->len);
2644    return p->base.num_slots;
2645 }
2646 
2647 static void
tc_emit_string_marker(struct pipe_context * _pipe,const char * string,int len)2648 tc_emit_string_marker(struct pipe_context *_pipe,
2649                       const char *string, int len)
2650 {
2651    struct threaded_context *tc = threaded_context(_pipe);
2652 
2653    if (len <= TC_MAX_STRING_MARKER_BYTES) {
2654       struct tc_string_marker *p =
2655          tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2656 
2657       memcpy(p->slot, string, len);
2658       p->len = len;
2659    } else {
2660       struct pipe_context *pipe = tc->pipe;
2661 
2662       tc_sync(tc);
2663       tc_set_driver_thread(tc);
2664       pipe->emit_string_marker(pipe, string, len);
2665       tc_clear_driver_thread(tc);
2666    }
2667 }
2668 
2669 static void
tc_dump_debug_state(struct pipe_context * _pipe,FILE * stream,unsigned flags)2670 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2671                     unsigned flags)
2672 {
2673    struct threaded_context *tc = threaded_context(_pipe);
2674    struct pipe_context *pipe = tc->pipe;
2675 
2676    tc_sync(tc);
2677    pipe->dump_debug_state(pipe, stream, flags);
2678 }
2679 
2680 static void
tc_set_debug_callback(struct pipe_context * _pipe,const struct pipe_debug_callback * cb)2681 tc_set_debug_callback(struct pipe_context *_pipe,
2682                       const struct pipe_debug_callback *cb)
2683 {
2684    struct threaded_context *tc = threaded_context(_pipe);
2685    struct pipe_context *pipe = tc->pipe;
2686 
2687    /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2688     * with this. shader-db will use an environment variable to disable
2689     * the threaded context.
2690     */
2691    if (cb && cb->debug_message && !cb->async)
2692       return;
2693 
2694    tc_sync(tc);
2695    pipe->set_debug_callback(pipe, cb);
2696 }
2697 
2698 static void
tc_set_log_context(struct pipe_context * _pipe,struct u_log_context * log)2699 tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2700 {
2701    struct threaded_context *tc = threaded_context(_pipe);
2702    struct pipe_context *pipe = tc->pipe;
2703 
2704    tc_sync(tc);
2705    pipe->set_log_context(pipe, log);
2706 }
2707 
2708 static void
tc_create_fence_fd(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,int fd,enum pipe_fd_type type)2709 tc_create_fence_fd(struct pipe_context *_pipe,
2710                    struct pipe_fence_handle **fence, int fd,
2711                    enum pipe_fd_type type)
2712 {
2713    struct threaded_context *tc = threaded_context(_pipe);
2714    struct pipe_context *pipe = tc->pipe;
2715 
2716    tc_sync(tc);
2717    pipe->create_fence_fd(pipe, fence, fd, type);
2718 }
2719 
2720 struct tc_fence_call {
2721    struct tc_call_base base;
2722    struct pipe_fence_handle *fence;
2723 };
2724 
2725 static uint16_t
tc_call_fence_server_sync(struct pipe_context * pipe,void * call,uint64_t * last)2726 tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2727 {
2728    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2729 
2730    pipe->fence_server_sync(pipe, fence);
2731    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2732    return call_size(tc_fence_call);
2733 }
2734 
2735 static void
tc_fence_server_sync(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2736 tc_fence_server_sync(struct pipe_context *_pipe,
2737                      struct pipe_fence_handle *fence)
2738 {
2739    struct threaded_context *tc = threaded_context(_pipe);
2740    struct pipe_screen *screen = tc->pipe->screen;
2741    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2742                                             tc_fence_call);
2743 
2744    call->fence = NULL;
2745    screen->fence_reference(screen, &call->fence, fence);
2746 }
2747 
2748 static uint16_t
tc_call_fence_server_signal(struct pipe_context * pipe,void * call,uint64_t * last)2749 tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last)
2750 {
2751    struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2752 
2753    pipe->fence_server_signal(pipe, fence);
2754    pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2755    return call_size(tc_fence_call);
2756 }
2757 
2758 static void
tc_fence_server_signal(struct pipe_context * _pipe,struct pipe_fence_handle * fence)2759 tc_fence_server_signal(struct pipe_context *_pipe,
2760                            struct pipe_fence_handle *fence)
2761 {
2762    struct threaded_context *tc = threaded_context(_pipe);
2763    struct pipe_screen *screen = tc->pipe->screen;
2764    struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal,
2765                                             tc_fence_call);
2766 
2767    call->fence = NULL;
2768    screen->fence_reference(screen, &call->fence, fence);
2769 }
2770 
2771 static struct pipe_video_codec *
tc_create_video_codec(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_codec * templ)2772 tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2773                       UNUSED const struct pipe_video_codec *templ)
2774 {
2775    unreachable("Threaded context should not be enabled for video APIs");
2776    return NULL;
2777 }
2778 
2779 static struct pipe_video_buffer *
tc_create_video_buffer(UNUSED struct pipe_context * _pipe,UNUSED const struct pipe_video_buffer * templ)2780 tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2781                        UNUSED const struct pipe_video_buffer *templ)
2782 {
2783    unreachable("Threaded context should not be enabled for video APIs");
2784    return NULL;
2785 }
2786 
2787 struct tc_context_param {
2788    struct tc_call_base base;
2789    enum pipe_context_param param;
2790    unsigned value;
2791 };
2792 
2793 static uint16_t
tc_call_set_context_param(struct pipe_context * pipe,void * call,uint64_t * last)2794 tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2795 {
2796    struct tc_context_param *p = to_call(call, tc_context_param);
2797 
2798    if (pipe->set_context_param)
2799       pipe->set_context_param(pipe, p->param, p->value);
2800 
2801    return call_size(tc_context_param);
2802 }
2803 
2804 static void
tc_set_context_param(struct pipe_context * _pipe,enum pipe_context_param param,unsigned value)2805 tc_set_context_param(struct pipe_context *_pipe,
2806                            enum pipe_context_param param,
2807                            unsigned value)
2808 {
2809    struct threaded_context *tc = threaded_context(_pipe);
2810 
2811    if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2812       /* Pin the gallium thread as requested. */
2813       util_set_thread_affinity(tc->queue.threads[0],
2814                                util_get_cpu_caps()->L3_affinity_mask[value],
2815                                NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2816 
2817       /* Execute this immediately (without enqueuing).
2818        * It's required to be thread-safe.
2819        */
2820       struct pipe_context *pipe = tc->pipe;
2821       if (pipe->set_context_param)
2822          pipe->set_context_param(pipe, param, value);
2823       return;
2824    }
2825 
2826    if (tc->pipe->set_context_param) {
2827       struct tc_context_param *call =
2828          tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2829 
2830       call->param = param;
2831       call->value = value;
2832    }
2833 }
2834 
2835 
2836 /********************************************************************
2837  * draw, launch, clear, blit, copy, flush
2838  */
2839 
2840 struct tc_flush_call {
2841    struct tc_call_base base;
2842    unsigned flags;
2843    struct threaded_context *tc;
2844    struct pipe_fence_handle *fence;
2845 };
2846 
2847 static void
tc_flush_queries(struct threaded_context * tc)2848 tc_flush_queries(struct threaded_context *tc)
2849 {
2850    struct threaded_query *tq, *tmp;
2851    LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
2852       list_del(&tq->head_unflushed);
2853 
2854       /* Memory release semantics: due to a possible race with
2855        * tc_get_query_result, we must ensure that the linked list changes
2856        * are visible before setting tq->flushed.
2857        */
2858       p_atomic_set(&tq->flushed, true);
2859    }
2860 }
2861 
2862 static uint16_t
tc_call_flush(struct pipe_context * pipe,void * call,uint64_t * last)2863 tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
2864 {
2865    struct tc_flush_call *p = to_call(call, tc_flush_call);
2866    struct pipe_screen *screen = pipe->screen;
2867 
2868    pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
2869    screen->fence_reference(screen, &p->fence, NULL);
2870 
2871    if (!(p->flags & PIPE_FLUSH_DEFERRED))
2872       tc_flush_queries(p->tc);
2873 
2874    return call_size(tc_flush_call);
2875 }
2876 
2877 static void
tc_flush(struct pipe_context * _pipe,struct pipe_fence_handle ** fence,unsigned flags)2878 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2879          unsigned flags)
2880 {
2881    struct threaded_context *tc = threaded_context(_pipe);
2882    struct pipe_context *pipe = tc->pipe;
2883    struct pipe_screen *screen = pipe->screen;
2884    bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
2885 
2886    if (async && tc->options.create_fence) {
2887       if (fence) {
2888          struct tc_batch *next = &tc->batch_slots[tc->next];
2889 
2890          if (!next->token) {
2891             next->token = malloc(sizeof(*next->token));
2892             if (!next->token)
2893                goto out_of_memory;
2894 
2895             pipe_reference_init(&next->token->ref, 1);
2896             next->token->tc = tc;
2897          }
2898 
2899          screen->fence_reference(screen, fence,
2900                                  tc->options.create_fence(pipe, next->token));
2901          if (!*fence)
2902             goto out_of_memory;
2903       }
2904 
2905       struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
2906       p->tc = tc;
2907       p->fence = fence ? *fence : NULL;
2908       p->flags = flags | TC_FLUSH_ASYNC;
2909 
2910       if (!(flags & PIPE_FLUSH_DEFERRED))
2911          tc_batch_flush(tc);
2912       return;
2913    }
2914 
2915 out_of_memory:
2916    tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
2917                    flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
2918 
2919    if (!(flags & PIPE_FLUSH_DEFERRED))
2920       tc_flush_queries(tc);
2921    tc_set_driver_thread(tc);
2922    pipe->flush(pipe, fence, flags);
2923    tc_clear_driver_thread(tc);
2924 }
2925 
2926 struct tc_draw_single {
2927    struct tc_call_base base;
2928    unsigned index_bias;
2929    struct pipe_draw_info info;
2930 };
2931 
2932 struct tc_draw_single_drawid {
2933    struct tc_draw_single base;
2934    unsigned drawid_offset;
2935 };
2936 
2937 static uint16_t
tc_call_draw_single_drawid(struct pipe_context * pipe,void * call,uint64_t * last)2938 tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
2939 {
2940    struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
2941    struct tc_draw_single *info = &info_drawid->base;
2942 
2943    /* u_threaded_context stores start/count in min/max_index for single draws. */
2944    /* Drivers using u_threaded_context shouldn't use min/max_index. */
2945    struct pipe_draw_start_count_bias draw;
2946 
2947    draw.start = info->info.min_index;
2948    draw.count = info->info.max_index;
2949    draw.index_bias = info->index_bias;
2950 
2951    info->info.index_bounds_valid = false;
2952    info->info.has_user_indices = false;
2953    info->info.take_index_buffer_ownership = false;
2954 
2955    pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
2956    if (info->info.index_size)
2957       tc_drop_resource_reference(info->info.index.resource);
2958 
2959    return call_size(tc_draw_single_drawid);
2960 }
2961 
2962 static void
simplify_draw_info(struct pipe_draw_info * info)2963 simplify_draw_info(struct pipe_draw_info *info)
2964 {
2965    /* Clear these fields to facilitate draw merging.
2966     * Drivers shouldn't use them.
2967     */
2968    info->has_user_indices = false;
2969    info->index_bounds_valid = false;
2970    info->take_index_buffer_ownership = false;
2971    info->index_bias_varies = false;
2972    info->_pad = 0;
2973 
2974    /* This shouldn't be set when merging single draws. */
2975    info->increment_draw_id = false;
2976 
2977    if (info->index_size) {
2978       if (!info->primitive_restart)
2979          info->restart_index = 0;
2980    } else {
2981       assert(!info->primitive_restart);
2982       info->primitive_restart = false;
2983       info->restart_index = 0;
2984       info->index.resource = NULL;
2985    }
2986 }
2987 
2988 static bool
is_next_call_a_mergeable_draw(struct tc_draw_single * first,struct tc_draw_single * next)2989 is_next_call_a_mergeable_draw(struct tc_draw_single *first,
2990                               struct tc_draw_single *next)
2991 {
2992    if (next->base.call_id != TC_CALL_draw_single)
2993       return false;
2994 
2995    simplify_draw_info(&next->info);
2996 
2997    STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
2998                  sizeof(struct pipe_draw_info) - 8);
2999    STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3000                  sizeof(struct pipe_draw_info) - 4);
3001    /* All fields must be the same except start and count. */
3002    /* u_threaded_context stores start/count in min/max_index for single draws. */
3003    return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3004                  DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3005 }
3006 
3007 static uint16_t
tc_call_draw_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3008 tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3009 {
3010    /* Draw call merging. */
3011    struct tc_draw_single *first = to_call(call, tc_draw_single);
3012    struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3013    struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3014 
3015    /* If at least 2 consecutive draw calls can be merged... */
3016    if (next != last &&
3017        next->base.call_id == TC_CALL_draw_single) {
3018       simplify_draw_info(&first->info);
3019 
3020       if (is_next_call_a_mergeable_draw(first, next)) {
3021          /* The maximum number of merged draws is given by the batch size. */
3022          struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3023          unsigned num_draws = 2;
3024          bool index_bias_varies = first->index_bias != next->index_bias;
3025 
3026          /* u_threaded_context stores start/count in min/max_index for single draws. */
3027          multi[0].start = first->info.min_index;
3028          multi[0].count = first->info.max_index;
3029          multi[0].index_bias = first->index_bias;
3030          multi[1].start = next->info.min_index;
3031          multi[1].count = next->info.max_index;
3032          multi[1].index_bias = next->index_bias;
3033 
3034          /* Find how many other draws can be merged. */
3035          next = get_next_call(next, tc_draw_single);
3036          for (; next != last && is_next_call_a_mergeable_draw(first, next);
3037               next = get_next_call(next, tc_draw_single), num_draws++) {
3038             /* u_threaded_context stores start/count in min/max_index for single draws. */
3039             multi[num_draws].start = next->info.min_index;
3040             multi[num_draws].count = next->info.max_index;
3041             multi[num_draws].index_bias = next->index_bias;
3042             index_bias_varies |= first->index_bias != next->index_bias;
3043          }
3044 
3045          first->info.index_bias_varies = index_bias_varies;
3046          pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3047 
3048          /* Since all draws use the same index buffer, drop all references at once. */
3049          if (first->info.index_size)
3050             pipe_drop_resource_references(first->info.index.resource, num_draws);
3051 
3052          return call_size(tc_draw_single) * num_draws;
3053       }
3054    }
3055 
3056    /* u_threaded_context stores start/count in min/max_index for single draws. */
3057    /* Drivers using u_threaded_context shouldn't use min/max_index. */
3058    struct pipe_draw_start_count_bias draw;
3059 
3060    draw.start = first->info.min_index;
3061    draw.count = first->info.max_index;
3062    draw.index_bias = first->index_bias;
3063 
3064    first->info.index_bounds_valid = false;
3065    first->info.has_user_indices = false;
3066    first->info.take_index_buffer_ownership = false;
3067 
3068    pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3069    if (first->info.index_size)
3070       tc_drop_resource_reference(first->info.index.resource);
3071 
3072    return call_size(tc_draw_single);
3073 }
3074 
3075 struct tc_draw_indirect {
3076    struct tc_call_base base;
3077    struct pipe_draw_start_count_bias draw;
3078    struct pipe_draw_info info;
3079    struct pipe_draw_indirect_info indirect;
3080 };
3081 
3082 static uint16_t
tc_call_draw_indirect(struct pipe_context * pipe,void * call,uint64_t * last)3083 tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3084 {
3085    struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3086 
3087    info->info.index_bounds_valid = false;
3088    info->info.take_index_buffer_ownership = false;
3089 
3090    pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3091    if (info->info.index_size)
3092       tc_drop_resource_reference(info->info.index.resource);
3093 
3094    tc_drop_resource_reference(info->indirect.buffer);
3095    tc_drop_resource_reference(info->indirect.indirect_draw_count);
3096    tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3097    return call_size(tc_draw_indirect);
3098 }
3099 
3100 struct tc_draw_multi {
3101    struct tc_call_base base;
3102    unsigned num_draws;
3103    struct pipe_draw_info info;
3104    struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3105 };
3106 
3107 static uint16_t
tc_call_draw_multi(struct pipe_context * pipe,void * call,uint64_t * last)3108 tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3109 {
3110    struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3111 
3112    info->info.has_user_indices = false;
3113    info->info.index_bounds_valid = false;
3114    info->info.take_index_buffer_ownership = false;
3115 
3116    pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3117    if (info->info.index_size)
3118       tc_drop_resource_reference(info->info.index.resource);
3119 
3120    return info->base.num_slots;
3121 }
3122 
3123 #define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3124    offsetof(struct pipe_draw_info, index)
3125 
3126 void
tc_draw_vbo(struct pipe_context * _pipe,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3127 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3128             unsigned drawid_offset,
3129             const struct pipe_draw_indirect_info *indirect,
3130             const struct pipe_draw_start_count_bias *draws,
3131             unsigned num_draws)
3132 {
3133    STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3134                  sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3135 
3136    struct threaded_context *tc = threaded_context(_pipe);
3137    unsigned index_size = info->index_size;
3138    bool has_user_indices = info->has_user_indices;
3139 
3140    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3141       tc_add_all_gfx_bindings_to_buffer_list(tc);
3142 
3143    if (unlikely(indirect)) {
3144       assert(!has_user_indices);
3145       assert(num_draws == 1);
3146 
3147       struct tc_draw_indirect *p =
3148          tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3149       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3150 
3151       if (index_size) {
3152          if (!info->take_index_buffer_ownership) {
3153             tc_set_resource_reference(&p->info.index.resource,
3154                                       info->index.resource);
3155          }
3156          tc_add_to_buffer_list(next, info->index.resource);
3157       }
3158       memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3159 
3160       tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3161       tc_set_resource_reference(&p->indirect.indirect_draw_count,
3162                                 indirect->indirect_draw_count);
3163       p->indirect.count_from_stream_output = NULL;
3164       pipe_so_target_reference(&p->indirect.count_from_stream_output,
3165                                indirect->count_from_stream_output);
3166 
3167       if (indirect->buffer)
3168          tc_add_to_buffer_list(next, indirect->buffer);
3169       if (indirect->indirect_draw_count)
3170          tc_add_to_buffer_list(next, indirect->indirect_draw_count);
3171       if (indirect->count_from_stream_output)
3172          tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
3173 
3174       memcpy(&p->indirect, indirect, sizeof(*indirect));
3175       p->draw.start = draws[0].start;
3176       return;
3177    }
3178 
3179    if (num_draws == 1) {
3180       /* Single draw. */
3181       if (index_size && has_user_indices) {
3182          unsigned size = draws[0].count * index_size;
3183          struct pipe_resource *buffer = NULL;
3184          unsigned offset;
3185 
3186          if (!size)
3187             return;
3188 
3189          /* This must be done before adding draw_vbo, because it could generate
3190           * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3191           * to the driver if it was done afterwards.
3192           */
3193          u_upload_data(tc->base.stream_uploader, 0, size, 4,
3194                        (uint8_t*)info->index.user + draws[0].start * index_size,
3195                        &offset, &buffer);
3196          if (unlikely(!buffer))
3197             return;
3198 
3199          struct tc_draw_single *p = drawid_offset > 0 ?
3200             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3201             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3202          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3203          p->info.index.resource = buffer;
3204          if (drawid_offset > 0)
3205             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3206          /* u_threaded_context stores start/count in min/max_index for single draws. */
3207          p->info.min_index = offset >> util_logbase2(index_size);
3208          p->info.max_index = draws[0].count;
3209          p->index_bias = draws[0].index_bias;
3210       } else {
3211          /* Non-indexed call or indexed with a real index buffer. */
3212          struct tc_draw_single *p = drawid_offset > 0 ?
3213             &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3214             tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3215          if (index_size) {
3216             if (!info->take_index_buffer_ownership) {
3217                tc_set_resource_reference(&p->info.index.resource,
3218                                          info->index.resource);
3219             }
3220             tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3221          }
3222          if (drawid_offset > 0)
3223             ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3224          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3225          /* u_threaded_context stores start/count in min/max_index for single draws. */
3226          p->info.min_index = draws[0].start;
3227          p->info.max_index = draws[0].count;
3228          p->index_bias = draws[0].index_bias;
3229       }
3230       return;
3231    }
3232 
3233    const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3234    const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3235    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3236                                                sizeof(struct tc_call_base));
3237    /* Multi draw. */
3238    if (index_size && has_user_indices) {
3239       struct pipe_resource *buffer = NULL;
3240       unsigned buffer_offset, total_count = 0;
3241       unsigned index_size_shift = util_logbase2(index_size);
3242       uint8_t *ptr = NULL;
3243 
3244       /* Get the total count. */
3245       for (unsigned i = 0; i < num_draws; i++)
3246          total_count += draws[i].count;
3247 
3248       if (!total_count)
3249          return;
3250 
3251       /* Allocate space for all index buffers.
3252        *
3253        * This must be done before adding draw_vbo, because it could generate
3254        * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3255        * to the driver if it was done afterwards.
3256        */
3257       u_upload_alloc(tc->base.stream_uploader, 0,
3258                      total_count << index_size_shift, 4,
3259                      &buffer_offset, &buffer, (void**)&ptr);
3260       if (unlikely(!buffer))
3261          return;
3262 
3263       int total_offset = 0;
3264       while (num_draws) {
3265          struct tc_batch *next = &tc->batch_slots[tc->next];
3266 
3267          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3268          /* If there isn't enough place for one draw, try to fill the next one */
3269          if (nb_slots_left < slots_for_one_draw)
3270             nb_slots_left = TC_SLOTS_PER_BATCH;
3271          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3272 
3273          /* How many draws can we fit in the current batch */
3274          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3275 
3276          struct tc_draw_multi *p =
3277             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3278                                    dr);
3279          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3280          p->info.index.resource = buffer;
3281          p->num_draws = dr;
3282 
3283          /* Upload index buffers. */
3284          for (unsigned i = 0, offset = 0; i < dr; i++) {
3285             unsigned count = draws[i + total_offset].count;
3286 
3287             if (!count) {
3288                p->slot[i].start = 0;
3289                p->slot[i].count = 0;
3290                p->slot[i].index_bias = 0;
3291                continue;
3292             }
3293 
3294             unsigned size = count << index_size_shift;
3295             memcpy(ptr + offset,
3296                    (uint8_t*)info->index.user +
3297                    (draws[i + total_offset].start << index_size_shift), size);
3298             p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3299             p->slot[i].count = count;
3300             p->slot[i].index_bias = draws[i + total_offset].index_bias;
3301             offset += size;
3302          }
3303 
3304          total_offset += dr;
3305          num_draws -= dr;
3306       }
3307    } else {
3308       int total_offset = 0;
3309       bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3310       while (num_draws) {
3311          struct tc_batch *next = &tc->batch_slots[tc->next];
3312 
3313          int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3314          /* If there isn't enough place for one draw, try to fill the next one */
3315          if (nb_slots_left < slots_for_one_draw)
3316             nb_slots_left = TC_SLOTS_PER_BATCH;
3317          const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3318 
3319          /* How many draws can we fit in the current batch */
3320          const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3321 
3322          /* Non-indexed call or indexed with a real index buffer. */
3323          struct tc_draw_multi *p =
3324             tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3325                                    dr);
3326          if (index_size) {
3327             if (!take_index_buffer_ownership) {
3328                tc_set_resource_reference(&p->info.index.resource,
3329                                          info->index.resource);
3330             }
3331             tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3332          }
3333          take_index_buffer_ownership = false;
3334          memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3335          p->num_draws = dr;
3336          memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3337          num_draws -= dr;
3338 
3339          total_offset += dr;
3340       }
3341    }
3342 }
3343 
3344 struct tc_draw_vstate_single {
3345    struct tc_call_base base;
3346    struct pipe_draw_start_count_bias draw;
3347 
3348    /* The following states must be together without holes because they are
3349     * compared by draw merging.
3350     */
3351    struct pipe_vertex_state *state;
3352    uint32_t partial_velem_mask;
3353    struct pipe_draw_vertex_state_info info;
3354 };
3355 
3356 static bool
is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single * first,struct tc_draw_vstate_single * next)3357 is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3358                                      struct tc_draw_vstate_single *next)
3359 {
3360    if (next->base.call_id != TC_CALL_draw_vstate_single)
3361       return false;
3362 
3363    return !memcmp(&first->state, &next->state,
3364                   offsetof(struct tc_draw_vstate_single, info) +
3365                   sizeof(struct pipe_draw_vertex_state_info) -
3366                   offsetof(struct tc_draw_vstate_single, state));
3367 }
3368 
3369 static uint16_t
tc_call_draw_vstate_single(struct pipe_context * pipe,void * call,uint64_t * last_ptr)3370 tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3371 {
3372    /* Draw call merging. */
3373    struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3374    struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3375    struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3376 
3377    /* If at least 2 consecutive draw calls can be merged... */
3378    if (next != last &&
3379        is_next_call_a_mergeable_draw_vstate(first, next)) {
3380       /* The maximum number of merged draws is given by the batch size. */
3381       struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3382                                               call_size(tc_draw_vstate_single)];
3383       unsigned num_draws = 2;
3384 
3385       draws[0] = first->draw;
3386       draws[1] = next->draw;
3387 
3388       /* Find how many other draws can be merged. */
3389       next = get_next_call(next, tc_draw_vstate_single);
3390       for (; next != last &&
3391            is_next_call_a_mergeable_draw_vstate(first, next);
3392            next = get_next_call(next, tc_draw_vstate_single),
3393            num_draws++)
3394          draws[num_draws] = next->draw;
3395 
3396       pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3397                               first->info, draws, num_draws);
3398       /* Since all draws use the same state, drop all references at once. */
3399       tc_drop_vertex_state_references(first->state, num_draws);
3400 
3401       return call_size(tc_draw_vstate_single) * num_draws;
3402    }
3403 
3404    pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3405                            first->info, &first->draw, 1);
3406    tc_drop_vertex_state_references(first->state, 1);
3407    return call_size(tc_draw_vstate_single);
3408 }
3409 
3410 struct tc_draw_vstate_multi {
3411    struct tc_call_base base;
3412    uint32_t partial_velem_mask;
3413    struct pipe_draw_vertex_state_info info;
3414    unsigned num_draws;
3415    struct pipe_vertex_state *state;
3416    struct pipe_draw_start_count_bias slot[0];
3417 };
3418 
3419 static uint16_t
tc_call_draw_vstate_multi(struct pipe_context * pipe,void * call,uint64_t * last)3420 tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3421 {
3422    struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3423 
3424    pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3425                            info->info, info->slot, info->num_draws);
3426    tc_drop_vertex_state_references(info->state, 1);
3427    return info->base.num_slots;
3428 }
3429 
3430 static void
tc_draw_vertex_state(struct pipe_context * _pipe,struct pipe_vertex_state * state,uint32_t partial_velem_mask,struct pipe_draw_vertex_state_info info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)3431 tc_draw_vertex_state(struct pipe_context *_pipe,
3432                      struct pipe_vertex_state *state,
3433                      uint32_t partial_velem_mask,
3434                      struct pipe_draw_vertex_state_info info,
3435                      const struct pipe_draw_start_count_bias *draws,
3436                      unsigned num_draws)
3437 {
3438    struct threaded_context *tc = threaded_context(_pipe);
3439 
3440    if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3441       tc_add_all_gfx_bindings_to_buffer_list(tc);
3442 
3443    if (num_draws == 1) {
3444       /* Single draw. */
3445       struct tc_draw_vstate_single *p =
3446          tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3447       p->partial_velem_mask = partial_velem_mask;
3448       p->draw = draws[0];
3449       p->info.mode = info.mode;
3450       p->info.take_vertex_state_ownership = false;
3451 
3452       /* This should be always 0 for simplicity because we assume that
3453        * index_bias doesn't vary.
3454        */
3455       assert(draws[0].index_bias == 0);
3456 
3457       if (!info.take_vertex_state_ownership)
3458          tc_set_vertex_state_reference(&p->state, state);
3459       else
3460          p->state = state;
3461       return;
3462    }
3463 
3464    const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3465    const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3466    const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3467                                                sizeof(struct tc_call_base));
3468    /* Multi draw. */
3469    int total_offset = 0;
3470    bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3471    while (num_draws) {
3472       struct tc_batch *next = &tc->batch_slots[tc->next];
3473 
3474       int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3475       /* If there isn't enough place for one draw, try to fill the next one */
3476       if (nb_slots_left < slots_for_one_draw)
3477          nb_slots_left = TC_SLOTS_PER_BATCH;
3478       const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3479 
3480       /* How many draws can we fit in the current batch */
3481       const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3482 
3483       /* Non-indexed call or indexed with a real index buffer. */
3484       struct tc_draw_vstate_multi *p =
3485          tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3486 
3487       if (!take_vertex_state_ownership)
3488          tc_set_vertex_state_reference(&p->state, state);
3489       else
3490          p->state = state;
3491 
3492       take_vertex_state_ownership = false;
3493       p->partial_velem_mask = partial_velem_mask;
3494       p->info.mode = info.mode;
3495       p->info.take_vertex_state_ownership = false;
3496       p->num_draws = dr;
3497       memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3498       num_draws -= dr;
3499 
3500       total_offset += dr;
3501    }
3502 }
3503 
3504 struct tc_launch_grid_call {
3505    struct tc_call_base base;
3506    struct pipe_grid_info info;
3507 };
3508 
3509 static uint16_t
tc_call_launch_grid(struct pipe_context * pipe,void * call,uint64_t * last)3510 tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3511 {
3512    struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3513 
3514    pipe->launch_grid(pipe, p);
3515    tc_drop_resource_reference(p->indirect);
3516    return call_size(tc_launch_grid_call);
3517 }
3518 
3519 static void
tc_launch_grid(struct pipe_context * _pipe,const struct pipe_grid_info * info)3520 tc_launch_grid(struct pipe_context *_pipe,
3521                const struct pipe_grid_info *info)
3522 {
3523    struct threaded_context *tc = threaded_context(_pipe);
3524    struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3525                                                tc_launch_grid_call);
3526    assert(info->input == NULL);
3527 
3528    if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3529       tc_add_all_compute_bindings_to_buffer_list(tc);
3530 
3531    tc_set_resource_reference(&p->info.indirect, info->indirect);
3532    memcpy(&p->info, info, sizeof(*info));
3533 
3534    if (info->indirect)
3535       tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
3536 }
3537 
3538 static uint16_t
tc_call_resource_copy_region(struct pipe_context * pipe,void * call,uint64_t * last)3539 tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3540 {
3541    struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3542 
3543    pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3544                               p->dstz, p->src, p->src_level, &p->src_box);
3545    tc_drop_resource_reference(p->dst);
3546    tc_drop_resource_reference(p->src);
3547    return call_size(tc_resource_copy_region);
3548 }
3549 
3550 static void
tc_resource_copy_region(struct pipe_context * _pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)3551 tc_resource_copy_region(struct pipe_context *_pipe,
3552                         struct pipe_resource *dst, unsigned dst_level,
3553                         unsigned dstx, unsigned dsty, unsigned dstz,
3554                         struct pipe_resource *src, unsigned src_level,
3555                         const struct pipe_box *src_box)
3556 {
3557    struct threaded_context *tc = threaded_context(_pipe);
3558    struct threaded_resource *tdst = threaded_resource(dst);
3559    struct tc_resource_copy_region *p =
3560       tc_add_call(tc, TC_CALL_resource_copy_region,
3561                   tc_resource_copy_region);
3562 
3563    tc_set_resource_reference(&p->dst, dst);
3564    p->dst_level = dst_level;
3565    p->dstx = dstx;
3566    p->dsty = dsty;
3567    p->dstz = dstz;
3568    tc_set_resource_reference(&p->src, src);
3569    p->src_level = src_level;
3570    p->src_box = *src_box;
3571 
3572    if (dst->target == PIPE_BUFFER) {
3573       struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3574 
3575       tc_add_to_buffer_list(next, src);
3576       tc_add_to_buffer_list(next, dst);
3577 
3578       util_range_add(&tdst->b, &tdst->valid_buffer_range,
3579                      dstx, dstx + src_box->width);
3580    }
3581 }
3582 
3583 struct tc_blit_call {
3584    struct tc_call_base base;
3585    struct pipe_blit_info info;
3586 };
3587 
3588 static uint16_t
tc_call_blit(struct pipe_context * pipe,void * call,uint64_t * last)3589 tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3590 {
3591    struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3592 
3593    pipe->blit(pipe, blit);
3594    tc_drop_resource_reference(blit->dst.resource);
3595    tc_drop_resource_reference(blit->src.resource);
3596    return call_size(tc_blit_call);
3597 }
3598 
3599 static void
tc_blit(struct pipe_context * _pipe,const struct pipe_blit_info * info)3600 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3601 {
3602    struct threaded_context *tc = threaded_context(_pipe);
3603    struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3604 
3605    tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3606    tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3607    memcpy(&blit->info, info, sizeof(*info));
3608 }
3609 
3610 struct tc_generate_mipmap {
3611    struct tc_call_base base;
3612    enum pipe_format format;
3613    unsigned base_level;
3614    unsigned last_level;
3615    unsigned first_layer;
3616    unsigned last_layer;
3617    struct pipe_resource *res;
3618 };
3619 
3620 static uint16_t
tc_call_generate_mipmap(struct pipe_context * pipe,void * call,uint64_t * last)3621 tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3622 {
3623    struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3624    ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3625                                                     p->base_level,
3626                                                     p->last_level,
3627                                                     p->first_layer,
3628                                                     p->last_layer);
3629    assert(result);
3630    tc_drop_resource_reference(p->res);
3631    return call_size(tc_generate_mipmap);
3632 }
3633 
3634 static bool
tc_generate_mipmap(struct pipe_context * _pipe,struct pipe_resource * res,enum pipe_format format,unsigned base_level,unsigned last_level,unsigned first_layer,unsigned last_layer)3635 tc_generate_mipmap(struct pipe_context *_pipe,
3636                    struct pipe_resource *res,
3637                    enum pipe_format format,
3638                    unsigned base_level,
3639                    unsigned last_level,
3640                    unsigned first_layer,
3641                    unsigned last_layer)
3642 {
3643    struct threaded_context *tc = threaded_context(_pipe);
3644    struct pipe_context *pipe = tc->pipe;
3645    struct pipe_screen *screen = pipe->screen;
3646    unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3647 
3648    if (util_format_is_depth_or_stencil(format))
3649       bind = PIPE_BIND_DEPTH_STENCIL;
3650    else
3651       bind = PIPE_BIND_RENDER_TARGET;
3652 
3653    if (!screen->is_format_supported(screen, format, res->target,
3654                                     res->nr_samples, res->nr_storage_samples,
3655                                     bind))
3656       return false;
3657 
3658    struct tc_generate_mipmap *p =
3659       tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3660 
3661    tc_set_resource_reference(&p->res, res);
3662    p->format = format;
3663    p->base_level = base_level;
3664    p->last_level = last_level;
3665    p->first_layer = first_layer;
3666    p->last_layer = last_layer;
3667    return true;
3668 }
3669 
3670 struct tc_resource_call {
3671    struct tc_call_base base;
3672    struct pipe_resource *resource;
3673 };
3674 
3675 static uint16_t
tc_call_flush_resource(struct pipe_context * pipe,void * call,uint64_t * last)3676 tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3677 {
3678    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3679 
3680    pipe->flush_resource(pipe, resource);
3681    tc_drop_resource_reference(resource);
3682    return call_size(tc_resource_call);
3683 }
3684 
3685 static void
tc_flush_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3686 tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3687 {
3688    struct threaded_context *tc = threaded_context(_pipe);
3689    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3690                                                tc_resource_call);
3691 
3692    tc_set_resource_reference(&call->resource, resource);
3693 }
3694 
3695 static uint16_t
tc_call_invalidate_resource(struct pipe_context * pipe,void * call,uint64_t * last)3696 tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3697 {
3698    struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3699 
3700    pipe->invalidate_resource(pipe, resource);
3701    tc_drop_resource_reference(resource);
3702    return call_size(tc_resource_call);
3703 }
3704 
3705 static void
tc_invalidate_resource(struct pipe_context * _pipe,struct pipe_resource * resource)3706 tc_invalidate_resource(struct pipe_context *_pipe,
3707                        struct pipe_resource *resource)
3708 {
3709    struct threaded_context *tc = threaded_context(_pipe);
3710 
3711    if (resource->target == PIPE_BUFFER) {
3712       tc_invalidate_buffer(tc, threaded_resource(resource));
3713       return;
3714    }
3715 
3716    struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3717                                                tc_resource_call);
3718    tc_set_resource_reference(&call->resource, resource);
3719 }
3720 
3721 struct tc_clear {
3722    struct tc_call_base base;
3723    bool scissor_state_set;
3724    uint8_t stencil;
3725    uint16_t buffers;
3726    float depth;
3727    struct pipe_scissor_state scissor_state;
3728    union pipe_color_union color;
3729 };
3730 
3731 static uint16_t
tc_call_clear(struct pipe_context * pipe,void * call,uint64_t * last)3732 tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3733 {
3734    struct tc_clear *p = to_call(call, tc_clear);
3735 
3736    pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3737    return call_size(tc_clear);
3738 }
3739 
3740 static void
tc_clear(struct pipe_context * _pipe,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)3741 tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3742          const union pipe_color_union *color, double depth,
3743          unsigned stencil)
3744 {
3745    struct threaded_context *tc = threaded_context(_pipe);
3746    struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3747 
3748    p->buffers = buffers;
3749    if (scissor_state)
3750       p->scissor_state = *scissor_state;
3751    p->scissor_state_set = !!scissor_state;
3752    p->color = *color;
3753    p->depth = depth;
3754    p->stencil = stencil;
3755 }
3756 
3757 struct tc_clear_render_target {
3758    struct tc_call_base base;
3759    bool render_condition_enabled;
3760    unsigned dstx;
3761    unsigned dsty;
3762    unsigned width;
3763    unsigned height;
3764    union pipe_color_union color;
3765    struct pipe_surface *dst;
3766 };
3767 
3768 static uint16_t
tc_call_clear_render_target(struct pipe_context * pipe,void * call,uint64_t * last)3769 tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3770 {
3771    struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3772 
3773    pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3774                              p->render_condition_enabled);
3775    tc_drop_surface_reference(p->dst);
3776    return call_size(tc_clear_render_target);
3777 }
3778 
3779 static void
tc_clear_render_target(struct pipe_context * _pipe,struct pipe_surface * dst,const union pipe_color_union * color,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3780 tc_clear_render_target(struct pipe_context *_pipe,
3781                        struct pipe_surface *dst,
3782                        const union pipe_color_union *color,
3783                        unsigned dstx, unsigned dsty,
3784                        unsigned width, unsigned height,
3785                        bool render_condition_enabled)
3786 {
3787    struct threaded_context *tc = threaded_context(_pipe);
3788    struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3789    p->dst = NULL;
3790    pipe_surface_reference(&p->dst, dst);
3791    p->color = *color;
3792    p->dstx = dstx;
3793    p->dsty = dsty;
3794    p->width = width;
3795    p->height = height;
3796    p->render_condition_enabled = render_condition_enabled;
3797 }
3798 
3799 
3800 struct tc_clear_depth_stencil {
3801    struct tc_call_base base;
3802    bool render_condition_enabled;
3803    float depth;
3804    unsigned clear_flags;
3805    unsigned stencil;
3806    unsigned dstx;
3807    unsigned dsty;
3808    unsigned width;
3809    unsigned height;
3810    struct pipe_surface *dst;
3811 };
3812 
3813 
3814 static uint16_t
tc_call_clear_depth_stencil(struct pipe_context * pipe,void * call,uint64_t * last)3815 tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
3816 {
3817    struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
3818 
3819    pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
3820                              p->dstx, p->dsty, p->width, p->height,
3821                              p->render_condition_enabled);
3822    tc_drop_surface_reference(p->dst);
3823    return call_size(tc_clear_depth_stencil);
3824 }
3825 
3826 static void
tc_clear_depth_stencil(struct pipe_context * _pipe,struct pipe_surface * dst,unsigned clear_flags,double depth,unsigned stencil,unsigned dstx,unsigned dsty,unsigned width,unsigned height,bool render_condition_enabled)3827 tc_clear_depth_stencil(struct pipe_context *_pipe,
3828                        struct pipe_surface *dst, unsigned clear_flags,
3829                        double depth, unsigned stencil, unsigned dstx,
3830                        unsigned dsty, unsigned width, unsigned height,
3831                        bool render_condition_enabled)
3832 {
3833    struct threaded_context *tc = threaded_context(_pipe);
3834    struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
3835    p->dst = NULL;
3836    pipe_surface_reference(&p->dst, dst);
3837    p->clear_flags = clear_flags;
3838    p->depth = depth;
3839    p->stencil = stencil;
3840    p->dstx = dstx;
3841    p->dsty = dsty;
3842    p->width = width;
3843    p->height = height;
3844    p->render_condition_enabled = render_condition_enabled;
3845 }
3846 
3847 struct tc_clear_buffer {
3848    struct tc_call_base base;
3849    uint8_t clear_value_size;
3850    unsigned offset;
3851    unsigned size;
3852    char clear_value[16];
3853    struct pipe_resource *res;
3854 };
3855 
3856 static uint16_t
tc_call_clear_buffer(struct pipe_context * pipe,void * call,uint64_t * last)3857 tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
3858 {
3859    struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
3860 
3861    pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
3862                       p->clear_value_size);
3863    tc_drop_resource_reference(p->res);
3864    return call_size(tc_clear_buffer);
3865 }
3866 
3867 static void
tc_clear_buffer(struct pipe_context * _pipe,struct pipe_resource * res,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)3868 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
3869                 unsigned offset, unsigned size,
3870                 const void *clear_value, int clear_value_size)
3871 {
3872    struct threaded_context *tc = threaded_context(_pipe);
3873    struct threaded_resource *tres = threaded_resource(res);
3874    struct tc_clear_buffer *p =
3875       tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
3876 
3877    tc_set_resource_reference(&p->res, res);
3878    tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
3879    p->offset = offset;
3880    p->size = size;
3881    memcpy(p->clear_value, clear_value, clear_value_size);
3882    p->clear_value_size = clear_value_size;
3883 
3884    util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3885 }
3886 
3887 struct tc_clear_texture {
3888    struct tc_call_base base;
3889    unsigned level;
3890    struct pipe_box box;
3891    char data[16];
3892    struct pipe_resource *res;
3893 };
3894 
3895 static uint16_t
tc_call_clear_texture(struct pipe_context * pipe,void * call,uint64_t * last)3896 tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
3897 {
3898    struct tc_clear_texture *p = to_call(call, tc_clear_texture);
3899 
3900    pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
3901    tc_drop_resource_reference(p->res);
3902    return call_size(tc_clear_texture);
3903 }
3904 
3905 static void
tc_clear_texture(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,const struct pipe_box * box,const void * data)3906 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
3907                  unsigned level, const struct pipe_box *box, const void *data)
3908 {
3909    struct threaded_context *tc = threaded_context(_pipe);
3910    struct tc_clear_texture *p =
3911       tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
3912 
3913    tc_set_resource_reference(&p->res, res);
3914    p->level = level;
3915    p->box = *box;
3916    memcpy(p->data, data,
3917           util_format_get_blocksize(res->format));
3918 }
3919 
3920 struct tc_resource_commit {
3921    struct tc_call_base base;
3922    bool commit;
3923    unsigned level;
3924    struct pipe_box box;
3925    struct pipe_resource *res;
3926 };
3927 
3928 static uint16_t
tc_call_resource_commit(struct pipe_context * pipe,void * call,uint64_t * last)3929 tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
3930 {
3931    struct tc_resource_commit *p = to_call(call, tc_resource_commit);
3932 
3933    pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
3934    tc_drop_resource_reference(p->res);
3935    return call_size(tc_resource_commit);
3936 }
3937 
3938 static bool
tc_resource_commit(struct pipe_context * _pipe,struct pipe_resource * res,unsigned level,struct pipe_box * box,bool commit)3939 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
3940                    unsigned level, struct pipe_box *box, bool commit)
3941 {
3942    struct threaded_context *tc = threaded_context(_pipe);
3943    struct tc_resource_commit *p =
3944       tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
3945 
3946    tc_set_resource_reference(&p->res, res);
3947    p->level = level;
3948    p->box = *box;
3949    p->commit = commit;
3950    return true; /* we don't care about the return value for this call */
3951 }
3952 
3953 static unsigned
tc_init_intel_perf_query_info(struct pipe_context * _pipe)3954 tc_init_intel_perf_query_info(struct pipe_context *_pipe)
3955 {
3956    struct threaded_context *tc = threaded_context(_pipe);
3957    struct pipe_context *pipe = tc->pipe;
3958 
3959    return pipe->init_intel_perf_query_info(pipe);
3960 }
3961 
3962 static void
tc_get_intel_perf_query_info(struct pipe_context * _pipe,unsigned query_index,const char ** name,uint32_t * data_size,uint32_t * n_counters,uint32_t * n_active)3963 tc_get_intel_perf_query_info(struct pipe_context *_pipe,
3964                              unsigned query_index,
3965                              const char **name,
3966                              uint32_t *data_size,
3967                              uint32_t *n_counters,
3968                              uint32_t *n_active)
3969 {
3970    struct threaded_context *tc = threaded_context(_pipe);
3971    struct pipe_context *pipe = tc->pipe;
3972 
3973    tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
3974    pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
3975          n_counters, n_active);
3976 }
3977 
3978 static void
tc_get_intel_perf_query_counter_info(struct pipe_context * _pipe,unsigned query_index,unsigned counter_index,const char ** name,const char ** desc,uint32_t * offset,uint32_t * data_size,uint32_t * type_enum,uint32_t * data_type_enum,uint64_t * raw_max)3979 tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
3980                                      unsigned query_index,
3981                                      unsigned counter_index,
3982                                      const char **name,
3983                                      const char **desc,
3984                                      uint32_t *offset,
3985                                      uint32_t *data_size,
3986                                      uint32_t *type_enum,
3987                                      uint32_t *data_type_enum,
3988                                      uint64_t *raw_max)
3989 {
3990    struct threaded_context *tc = threaded_context(_pipe);
3991    struct pipe_context *pipe = tc->pipe;
3992 
3993    pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
3994          name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
3995 }
3996 
3997 static struct pipe_query *
tc_new_intel_perf_query_obj(struct pipe_context * _pipe,unsigned query_index)3998 tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
3999 {
4000    struct threaded_context *tc = threaded_context(_pipe);
4001    struct pipe_context *pipe = tc->pipe;
4002 
4003    return pipe->new_intel_perf_query_obj(pipe, query_index);
4004 }
4005 
4006 static uint16_t
tc_call_begin_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4007 tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4008 {
4009    (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4010    return call_size(tc_query_call);
4011 }
4012 
4013 static bool
tc_begin_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4014 tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4015 {
4016    struct threaded_context *tc = threaded_context(_pipe);
4017 
4018    tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4019 
4020    /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4021    return true;
4022 }
4023 
4024 static uint16_t
tc_call_end_intel_perf_query(struct pipe_context * pipe,void * call,uint64_t * last)4025 tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4026 {
4027    pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4028    return call_size(tc_query_call);
4029 }
4030 
4031 static void
tc_end_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4032 tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4033 {
4034    struct threaded_context *tc = threaded_context(_pipe);
4035 
4036    tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4037 }
4038 
4039 static void
tc_delete_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4040 tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4041 {
4042    struct threaded_context *tc = threaded_context(_pipe);
4043    struct pipe_context *pipe = tc->pipe;
4044 
4045    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4046    pipe->delete_intel_perf_query(pipe, q);
4047 }
4048 
4049 static void
tc_wait_intel_perf_query(struct pipe_context * _pipe,struct pipe_query * q)4050 tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4051 {
4052    struct threaded_context *tc = threaded_context(_pipe);
4053    struct pipe_context *pipe = tc->pipe;
4054 
4055    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4056    pipe->wait_intel_perf_query(pipe, q);
4057 }
4058 
4059 static bool
tc_is_intel_perf_query_ready(struct pipe_context * _pipe,struct pipe_query * q)4060 tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4061 {
4062    struct threaded_context *tc = threaded_context(_pipe);
4063    struct pipe_context *pipe = tc->pipe;
4064 
4065    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4066    return pipe->is_intel_perf_query_ready(pipe, q);
4067 }
4068 
4069 static bool
tc_get_intel_perf_query_data(struct pipe_context * _pipe,struct pipe_query * q,size_t data_size,uint32_t * data,uint32_t * bytes_written)4070 tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4071                              struct pipe_query *q,
4072                              size_t data_size,
4073                              uint32_t *data,
4074                              uint32_t *bytes_written)
4075 {
4076    struct threaded_context *tc = threaded_context(_pipe);
4077    struct pipe_context *pipe = tc->pipe;
4078 
4079    tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4080    return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4081 }
4082 
4083 /********************************************************************
4084  * callback
4085  */
4086 
4087 struct tc_callback_call {
4088    struct tc_call_base base;
4089    void (*fn)(void *data);
4090    void *data;
4091 };
4092 
4093 static uint16_t
tc_call_callback(UNUSED struct pipe_context * pipe,void * call,uint64_t * last)4094 tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4095 {
4096    struct tc_callback_call *p = to_call(call, tc_callback_call);
4097 
4098    p->fn(p->data);
4099    return call_size(tc_callback_call);
4100 }
4101 
4102 static void
tc_callback(struct pipe_context * _pipe,void (* fn)(void *),void * data,bool asap)4103 tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4104             bool asap)
4105 {
4106    struct threaded_context *tc = threaded_context(_pipe);
4107 
4108    if (asap && tc_is_sync(tc)) {
4109       fn(data);
4110       return;
4111    }
4112 
4113    struct tc_callback_call *p =
4114       tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4115    p->fn = fn;
4116    p->data = data;
4117 }
4118 
4119 
4120 /********************************************************************
4121  * create & destroy
4122  */
4123 
4124 static void
tc_destroy(struct pipe_context * _pipe)4125 tc_destroy(struct pipe_context *_pipe)
4126 {
4127    struct threaded_context *tc = threaded_context(_pipe);
4128    struct pipe_context *pipe = tc->pipe;
4129 
4130    if (tc->base.const_uploader &&
4131        tc->base.stream_uploader != tc->base.const_uploader)
4132       u_upload_destroy(tc->base.const_uploader);
4133 
4134    if (tc->base.stream_uploader)
4135       u_upload_destroy(tc->base.stream_uploader);
4136 
4137    tc_sync(tc);
4138 
4139    if (util_queue_is_initialized(&tc->queue)) {
4140       util_queue_destroy(&tc->queue);
4141 
4142       for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4143          util_queue_fence_destroy(&tc->batch_slots[i].fence);
4144          assert(!tc->batch_slots[i].token);
4145       }
4146    }
4147 
4148    slab_destroy_child(&tc->pool_transfers);
4149    assert(tc->batch_slots[tc->next].num_total_slots == 0);
4150    pipe->destroy(pipe);
4151 
4152    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4153       if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4154          util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4155       util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4156    }
4157 
4158    FREE(tc);
4159 }
4160 
4161 static const tc_execute execute_func[TC_NUM_CALLS] = {
4162 #define CALL(name) tc_call_##name,
4163 #include "u_threaded_context_calls.h"
4164 #undef CALL
4165 };
4166 
tc_driver_internal_flush_notify(struct threaded_context * tc)4167 void tc_driver_internal_flush_notify(struct threaded_context *tc)
4168 {
4169    /* Allow drivers to call this function even for internal contexts that
4170     * don't have tc. It simplifies drivers.
4171     */
4172    if (!tc)
4173       return;
4174 
4175    /* Signal fences set by tc_batch_execute. */
4176    for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4177       util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4178 
4179    tc->num_signal_fences_next_flush = 0;
4180 }
4181 
4182 /**
4183  * Wrap an existing pipe_context into a threaded_context.
4184  *
4185  * \param pipe                 pipe_context to wrap
4186  * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4187  *                             transfer objects; the driver should have one
4188  *                             in pipe_screen.
4189  * \param replace_buffer  callback for replacing a pipe_resource's storage
4190  *                        with another pipe_resource's storage.
4191  * \param options         optional TC options/callbacks
4192  * \param out  if successful, the threaded_context will be returned here in
4193  *             addition to the return value if "out" != NULL
4194  */
4195 struct pipe_context *
threaded_context_create(struct pipe_context * pipe,struct slab_parent_pool * parent_transfer_pool,tc_replace_buffer_storage_func replace_buffer,const struct threaded_context_options * options,struct threaded_context ** out)4196 threaded_context_create(struct pipe_context *pipe,
4197                         struct slab_parent_pool *parent_transfer_pool,
4198                         tc_replace_buffer_storage_func replace_buffer,
4199                         const struct threaded_context_options *options,
4200                         struct threaded_context **out)
4201 {
4202    struct threaded_context *tc;
4203 
4204    if (!pipe)
4205       return NULL;
4206 
4207    util_cpu_detect();
4208 
4209    if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4210       return pipe;
4211 
4212    tc = CALLOC_STRUCT(threaded_context);
4213    if (!tc) {
4214       pipe->destroy(pipe);
4215       return NULL;
4216    }
4217 
4218    if (options)
4219       tc->options = *options;
4220 
4221    pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4222 
4223    /* The driver context isn't wrapped, so set its "priv" to NULL. */
4224    pipe->priv = NULL;
4225 
4226    tc->pipe = pipe;
4227    tc->replace_buffer_storage = replace_buffer;
4228    tc->map_buffer_alignment =
4229       pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4230    tc->ubo_alignment =
4231       MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4232    tc->base.priv = pipe; /* priv points to the wrapped driver context */
4233    tc->base.screen = pipe->screen;
4234    tc->base.destroy = tc_destroy;
4235    tc->base.callback = tc_callback;
4236 
4237    tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4238    if (pipe->stream_uploader == pipe->const_uploader)
4239       tc->base.const_uploader = tc->base.stream_uploader;
4240    else
4241       tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4242 
4243    if (!tc->base.stream_uploader || !tc->base.const_uploader)
4244       goto fail;
4245 
4246    tc->use_forced_staging_uploads = true;
4247 
4248    /* The queue size is the number of batches "waiting". Batches are removed
4249     * from the queue before being executed, so keep one tc_batch slot for that
4250     * execution. Also, keep one unused slot for an unflushed batch.
4251     */
4252    if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4253       goto fail;
4254 
4255    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4256 #if !defined(NDEBUG) && TC_DEBUG >= 1
4257       tc->batch_slots[i].sentinel = TC_SENTINEL;
4258 #endif
4259       tc->batch_slots[i].tc = tc;
4260       util_queue_fence_init(&tc->batch_slots[i].fence);
4261    }
4262    for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4263       util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4264 
4265    list_inithead(&tc->unflushed_queries);
4266 
4267    slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4268 
4269    /* If you have different limits in each shader stage, set the maximum. */
4270    struct pipe_screen *screen = pipe->screen;;
4271    tc->max_vertex_buffers =
4272       screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4273    tc->max_const_buffers =
4274       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4275                                PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4276    tc->max_shader_buffers =
4277       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4278                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4279    tc->max_images =
4280       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4281                                PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4282    tc->max_samplers =
4283       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4284                                PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4285 
4286    tc->base.set_context_param = tc_set_context_param; /* always set this */
4287 
4288 #define CTX_INIT(_member) \
4289    tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4290 
4291    CTX_INIT(flush);
4292    CTX_INIT(draw_vbo);
4293    CTX_INIT(draw_vertex_state);
4294    CTX_INIT(launch_grid);
4295    CTX_INIT(resource_copy_region);
4296    CTX_INIT(blit);
4297    CTX_INIT(clear);
4298    CTX_INIT(clear_render_target);
4299    CTX_INIT(clear_depth_stencil);
4300    CTX_INIT(clear_buffer);
4301    CTX_INIT(clear_texture);
4302    CTX_INIT(flush_resource);
4303    CTX_INIT(generate_mipmap);
4304    CTX_INIT(render_condition);
4305    CTX_INIT(create_query);
4306    CTX_INIT(create_batch_query);
4307    CTX_INIT(destroy_query);
4308    CTX_INIT(begin_query);
4309    CTX_INIT(end_query);
4310    CTX_INIT(get_query_result);
4311    CTX_INIT(get_query_result_resource);
4312    CTX_INIT(set_active_query_state);
4313    CTX_INIT(create_blend_state);
4314    CTX_INIT(bind_blend_state);
4315    CTX_INIT(delete_blend_state);
4316    CTX_INIT(create_sampler_state);
4317    CTX_INIT(bind_sampler_states);
4318    CTX_INIT(delete_sampler_state);
4319    CTX_INIT(create_rasterizer_state);
4320    CTX_INIT(bind_rasterizer_state);
4321    CTX_INIT(delete_rasterizer_state);
4322    CTX_INIT(create_depth_stencil_alpha_state);
4323    CTX_INIT(bind_depth_stencil_alpha_state);
4324    CTX_INIT(delete_depth_stencil_alpha_state);
4325    CTX_INIT(create_fs_state);
4326    CTX_INIT(bind_fs_state);
4327    CTX_INIT(delete_fs_state);
4328    CTX_INIT(create_vs_state);
4329    CTX_INIT(bind_vs_state);
4330    CTX_INIT(delete_vs_state);
4331    CTX_INIT(create_gs_state);
4332    CTX_INIT(bind_gs_state);
4333    CTX_INIT(delete_gs_state);
4334    CTX_INIT(create_tcs_state);
4335    CTX_INIT(bind_tcs_state);
4336    CTX_INIT(delete_tcs_state);
4337    CTX_INIT(create_tes_state);
4338    CTX_INIT(bind_tes_state);
4339    CTX_INIT(delete_tes_state);
4340    CTX_INIT(create_compute_state);
4341    CTX_INIT(bind_compute_state);
4342    CTX_INIT(delete_compute_state);
4343    CTX_INIT(create_vertex_elements_state);
4344    CTX_INIT(bind_vertex_elements_state);
4345    CTX_INIT(delete_vertex_elements_state);
4346    CTX_INIT(set_blend_color);
4347    CTX_INIT(set_stencil_ref);
4348    CTX_INIT(set_sample_mask);
4349    CTX_INIT(set_min_samples);
4350    CTX_INIT(set_clip_state);
4351    CTX_INIT(set_constant_buffer);
4352    CTX_INIT(set_inlinable_constants);
4353    CTX_INIT(set_framebuffer_state);
4354    CTX_INIT(set_polygon_stipple);
4355    CTX_INIT(set_sample_locations);
4356    CTX_INIT(set_scissor_states);
4357    CTX_INIT(set_viewport_states);
4358    CTX_INIT(set_window_rectangles);
4359    CTX_INIT(set_sampler_views);
4360    CTX_INIT(set_tess_state);
4361    CTX_INIT(set_patch_vertices);
4362    CTX_INIT(set_shader_buffers);
4363    CTX_INIT(set_shader_images);
4364    CTX_INIT(set_vertex_buffers);
4365    CTX_INIT(create_stream_output_target);
4366    CTX_INIT(stream_output_target_destroy);
4367    CTX_INIT(set_stream_output_targets);
4368    CTX_INIT(create_sampler_view);
4369    CTX_INIT(sampler_view_destroy);
4370    CTX_INIT(create_surface);
4371    CTX_INIT(surface_destroy);
4372    CTX_INIT(buffer_map);
4373    CTX_INIT(texture_map);
4374    CTX_INIT(transfer_flush_region);
4375    CTX_INIT(buffer_unmap);
4376    CTX_INIT(texture_unmap);
4377    CTX_INIT(buffer_subdata);
4378    CTX_INIT(texture_subdata);
4379    CTX_INIT(texture_barrier);
4380    CTX_INIT(memory_barrier);
4381    CTX_INIT(resource_commit);
4382    CTX_INIT(create_video_codec);
4383    CTX_INIT(create_video_buffer);
4384    CTX_INIT(set_compute_resources);
4385    CTX_INIT(set_global_binding);
4386    CTX_INIT(get_sample_position);
4387    CTX_INIT(invalidate_resource);
4388    CTX_INIT(get_device_reset_status);
4389    CTX_INIT(set_device_reset_callback);
4390    CTX_INIT(dump_debug_state);
4391    CTX_INIT(set_log_context);
4392    CTX_INIT(emit_string_marker);
4393    CTX_INIT(set_debug_callback);
4394    CTX_INIT(create_fence_fd);
4395    CTX_INIT(fence_server_sync);
4396    CTX_INIT(fence_server_signal);
4397    CTX_INIT(get_timestamp);
4398    CTX_INIT(create_texture_handle);
4399    CTX_INIT(delete_texture_handle);
4400    CTX_INIT(make_texture_handle_resident);
4401    CTX_INIT(create_image_handle);
4402    CTX_INIT(delete_image_handle);
4403    CTX_INIT(make_image_handle_resident);
4404    CTX_INIT(set_frontend_noop);
4405    CTX_INIT(init_intel_perf_query_info);
4406    CTX_INIT(get_intel_perf_query_info);
4407    CTX_INIT(get_intel_perf_query_counter_info);
4408    CTX_INIT(new_intel_perf_query_obj);
4409    CTX_INIT(begin_intel_perf_query);
4410    CTX_INIT(end_intel_perf_query);
4411    CTX_INIT(delete_intel_perf_query);
4412    CTX_INIT(wait_intel_perf_query);
4413    CTX_INIT(is_intel_perf_query_ready);
4414    CTX_INIT(get_intel_perf_query_data);
4415 #undef CTX_INIT
4416 
4417    if (out)
4418       *out = tc;
4419 
4420    tc_begin_next_buffer_list(tc);
4421    return &tc->base;
4422 
4423 fail:
4424    tc_destroy(&tc->base);
4425    return NULL;
4426 }
4427 
4428 void
threaded_context_init_bytes_mapped_limit(struct threaded_context * tc,unsigned divisor)4429 threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4430 {
4431    uint64_t total_ram;
4432    if (os_get_total_physical_memory(&total_ram)) {
4433       tc->bytes_mapped_limit = total_ram / divisor;
4434       if (sizeof(void*) == 4)
4435          tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4436    }
4437 }
4438