1 /**************************************************************************
2  *
3  * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "util/u_cpu_detect.h"
29 #include "util/u_helpers.h"
30 #include "util/u_inlines.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/u_thread.h"
33 #include "util/os_time.h"
34 #include <inttypes.h>
35 
36 /**
37  * This function is used to copy an array of pipe_vertex_buffer structures,
38  * while properly referencing the pipe_vertex_buffer::buffer member.
39  *
40  * enabled_buffers is updated such that the bits corresponding to the indices
41  * of disabled buffers are set to 0 and the enabled ones are set to 1.
42  *
43  * \sa util_copy_framebuffer_state
44  */
util_set_vertex_buffers_mask(struct pipe_vertex_buffer * dst,uint32_t * enabled_buffers,const struct pipe_vertex_buffer * src,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership)45 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
46                                   uint32_t *enabled_buffers,
47                                   const struct pipe_vertex_buffer *src,
48                                   unsigned start_slot, unsigned count,
49                                   unsigned unbind_num_trailing_slots,
50                                   bool take_ownership)
51 {
52    unsigned i;
53    uint32_t bitmask = 0;
54 
55    dst += start_slot;
56 
57    *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
58 
59    if (src) {
60       for (i = 0; i < count; i++) {
61          if (src[i].buffer.resource)
62             bitmask |= 1 << i;
63 
64          pipe_vertex_buffer_unreference(&dst[i]);
65 
66          if (!take_ownership && !src[i].is_user_buffer)
67             pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
68       }
69 
70       /* Copy over the other members of pipe_vertex_buffer. */
71       memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
72 
73       *enabled_buffers |= bitmask << start_slot;
74    }
75    else {
76       /* Unreference the buffers. */
77       for (i = 0; i < count; i++)
78          pipe_vertex_buffer_unreference(&dst[i]);
79    }
80 
81    for (i = 0; i < unbind_num_trailing_slots; i++)
82       pipe_vertex_buffer_unreference(&dst[count + i]);
83 }
84 
85 /**
86  * Same as util_set_vertex_buffers_mask, but it only returns the number
87  * of bound buffers.
88  */
util_set_vertex_buffers_count(struct pipe_vertex_buffer * dst,unsigned * dst_count,const struct pipe_vertex_buffer * src,unsigned start_slot,unsigned count,unsigned unbind_num_trailing_slots,bool take_ownership)89 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
90                                    unsigned *dst_count,
91                                    const struct pipe_vertex_buffer *src,
92                                    unsigned start_slot, unsigned count,
93                                    unsigned unbind_num_trailing_slots,
94                                    bool take_ownership)
95 {
96    unsigned i;
97    uint32_t enabled_buffers = 0;
98 
99    for (i = 0; i < *dst_count; i++) {
100       if (dst[i].buffer.resource)
101          enabled_buffers |= (1ull << i);
102    }
103 
104    util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
105                                 count, unbind_num_trailing_slots,
106                                 take_ownership);
107 
108    *dst_count = util_last_bit(enabled_buffers);
109 }
110 
111 /**
112  * This function is used to copy an array of pipe_shader_buffer structures,
113  * while properly referencing the pipe_shader_buffer::buffer member.
114  *
115  * \sa util_set_vertex_buffer_mask
116  */
util_set_shader_buffers_mask(struct pipe_shader_buffer * dst,uint32_t * enabled_buffers,const struct pipe_shader_buffer * src,unsigned start_slot,unsigned count)117 void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
118                                   uint32_t *enabled_buffers,
119                                   const struct pipe_shader_buffer *src,
120                                   unsigned start_slot, unsigned count)
121 {
122    unsigned i;
123 
124    dst += start_slot;
125 
126    if (src) {
127       for (i = 0; i < count; i++) {
128          pipe_resource_reference(&dst[i].buffer, src[i].buffer);
129 
130          if (src[i].buffer)
131             *enabled_buffers |= (1ull << (start_slot + i));
132          else
133             *enabled_buffers &= ~(1ull << (start_slot + i));
134       }
135 
136       /* Copy over the other members of pipe_shader_buffer. */
137       memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
138    }
139    else {
140       /* Unreference the buffers. */
141       for (i = 0; i < count; i++)
142          pipe_resource_reference(&dst[i].buffer, NULL);
143 
144       *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
145    }
146 }
147 
148 /**
149  * Given a user index buffer, save the structure to "saved", and upload it.
150  */
151 bool
util_upload_index_buffer(struct pipe_context * pipe,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct pipe_resource ** out_buffer,unsigned * out_offset,unsigned alignment)152 util_upload_index_buffer(struct pipe_context *pipe,
153                          const struct pipe_draw_info *info,
154                          const struct pipe_draw_start_count_bias *draw,
155                          struct pipe_resource **out_buffer,
156                          unsigned *out_offset, unsigned alignment)
157 {
158    unsigned start_offset = draw->start * info->index_size;
159 
160    u_upload_data(pipe->stream_uploader, start_offset,
161                  draw->count * info->index_size, alignment,
162                  (char*)info->index.user + start_offset,
163                  out_offset, out_buffer);
164    u_upload_unmap(pipe->stream_uploader);
165    *out_offset -= start_offset;
166    return *out_buffer != NULL;
167 }
168 
169 /**
170  * Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements.
171  * 3 and 4 component formats are expanded into 2 slots.
172  *
173  * @param velems        Original vertex elements, will be updated to contain
174  *                      the lowered vertex elements.
175  * @param velem_count   Original count, will be updated to contain the count
176  *                      after lowering.
177  * @param tmp           Temporary array of PIPE_MAX_ATTRIBS vertex elements.
178  */
179 void
util_lower_uint64_vertex_elements(const struct pipe_vertex_element ** velems,unsigned * velem_count,struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])180 util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
181                                   unsigned *velem_count,
182                                   struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])
183 {
184    const struct pipe_vertex_element *input = *velems;
185    unsigned count = *velem_count;
186    bool has_64bit = false;
187 
188    for (unsigned i = 0; i < count; i++) {
189       has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT &&
190                    input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT;
191    }
192 
193    /* Return the original vertex elements if there is nothing to do. */
194    if (!has_64bit)
195       return;
196 
197    /* Lower 64_UINT to 32_UINT. */
198    unsigned new_count = 0;
199 
200    for (unsigned i = 0; i < count; i++) {
201       enum pipe_format format = input[i].src_format;
202 
203       /* If the shader input is dvec2 or smaller, reduce the number of
204        * components to 2 at most. If the shader input is dvec3 or larger,
205        * expand the number of components to 3 at least. If the 3rd component
206        * is out of bounds, the hardware shouldn't skip loading the first
207        * 2 components.
208        */
209       if (format >= PIPE_FORMAT_R64_UINT &&
210           format <= PIPE_FORMAT_R64G64B64A64_UINT) {
211          if (input[i].dual_slot)
212             format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT);
213          else
214             format = MIN2(format, PIPE_FORMAT_R64G64_UINT);
215       }
216 
217       switch (format) {
218       case PIPE_FORMAT_R64_UINT:
219          tmp[new_count] = input[i];
220          tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT;
221          new_count++;
222          break;
223 
224       case PIPE_FORMAT_R64G64_UINT:
225          tmp[new_count] = input[i];
226          tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
227          new_count++;
228          break;
229 
230       case PIPE_FORMAT_R64G64B64_UINT:
231       case PIPE_FORMAT_R64G64B64A64_UINT:
232          assert(new_count + 2 <= PIPE_MAX_ATTRIBS);
233          tmp[new_count] = tmp[new_count + 1] = input[i];
234          tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
235          tmp[new_count + 1].src_format =
236             format == PIPE_FORMAT_R64G64B64_UINT ?
237                   PIPE_FORMAT_R32G32_UINT :
238                   PIPE_FORMAT_R32G32B32A32_UINT;
239          tmp[new_count + 1].src_offset += 16;
240          new_count += 2;
241          break;
242 
243       default:
244          tmp[new_count++] = input[i];
245          break;
246       }
247    }
248 
249    *velem_count = new_count;
250    *velems = tmp;
251 }
252 
253 /* This is a helper for hardware bring-up. Don't remove. */
254 struct pipe_query *
util_begin_pipestat_query(struct pipe_context * ctx)255 util_begin_pipestat_query(struct pipe_context *ctx)
256 {
257    struct pipe_query *q =
258       ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
259    if (!q)
260       return NULL;
261 
262    ctx->begin_query(ctx, q);
263    return q;
264 }
265 
266 /* This is a helper for hardware bring-up. Don't remove. */
267 void
util_end_pipestat_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f)268 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
269                         FILE *f)
270 {
271    static unsigned counter;
272    struct pipe_query_data_pipeline_statistics stats;
273 
274    ctx->end_query(ctx, q);
275    ctx->get_query_result(ctx, q, true, (void*)&stats);
276    ctx->destroy_query(ctx, q);
277 
278    fprintf(f,
279            "Draw call %u:\n"
280            "    ia_vertices    = %"PRIu64"\n"
281            "    ia_primitives  = %"PRIu64"\n"
282            "    vs_invocations = %"PRIu64"\n"
283            "    gs_invocations = %"PRIu64"\n"
284            "    gs_primitives  = %"PRIu64"\n"
285            "    c_invocations  = %"PRIu64"\n"
286            "    c_primitives   = %"PRIu64"\n"
287            "    ps_invocations = %"PRIu64"\n"
288            "    hs_invocations = %"PRIu64"\n"
289            "    ds_invocations = %"PRIu64"\n"
290            "    cs_invocations = %"PRIu64"\n",
291            (unsigned)p_atomic_inc_return(&counter),
292            stats.ia_vertices,
293            stats.ia_primitives,
294            stats.vs_invocations,
295            stats.gs_invocations,
296            stats.gs_primitives,
297            stats.c_invocations,
298            stats.c_primitives,
299            stats.ps_invocations,
300            stats.hs_invocations,
301            stats.ds_invocations,
302            stats.cs_invocations);
303 }
304 
305 /* This is a helper for profiling. Don't remove. */
306 struct pipe_query *
util_begin_time_query(struct pipe_context * ctx)307 util_begin_time_query(struct pipe_context *ctx)
308 {
309    struct pipe_query *q =
310       ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
311    if (!q)
312       return NULL;
313 
314    ctx->begin_query(ctx, q);
315    return q;
316 }
317 
318 /* This is a helper for profiling. Don't remove. */
319 void
util_end_time_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f,const char * name)320 util_end_time_query(struct pipe_context *ctx, struct pipe_query *q, FILE *f,
321                     const char *name)
322 {
323    union pipe_query_result result;
324 
325    ctx->end_query(ctx, q);
326    ctx->get_query_result(ctx, q, true, &result);
327    ctx->destroy_query(ctx, q);
328 
329    fprintf(f, "Time elapsed: %s - %"PRIu64".%u us\n", name, result.u64 / 1000, (unsigned)(result.u64 % 1000) / 100);
330 }
331 
332 /* This is a helper for hardware bring-up. Don't remove. */
333 void
util_wait_for_idle(struct pipe_context * ctx)334 util_wait_for_idle(struct pipe_context *ctx)
335 {
336    struct pipe_fence_handle *fence = NULL;
337 
338    ctx->flush(ctx, &fence, 0);
339    ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
340 }
341 
342 void
util_throttle_init(struct util_throttle * t,uint64_t max_mem_usage)343 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
344 {
345    t->max_mem_usage = max_mem_usage;
346 }
347 
348 void
util_throttle_deinit(struct pipe_screen * screen,struct util_throttle * t)349 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
350 {
351    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
352       screen->fence_reference(screen, &t->ring[i].fence, NULL);
353 }
354 
355 static uint64_t
util_get_throttle_total_memory_usage(struct util_throttle * t)356 util_get_throttle_total_memory_usage(struct util_throttle *t)
357 {
358    uint64_t total_usage = 0;
359 
360    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
361       total_usage += t->ring[i].mem_usage;
362    return total_usage;
363 }
364 
util_dump_throttle_ring(struct util_throttle * t)365 static void util_dump_throttle_ring(struct util_throttle *t)
366 {
367    printf("Throttle:\n");
368    for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
369       printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
370              i, t->ring[i].fence ? "yes" : " no",
371              t->ring[i].mem_usage,
372              t->flush_index == i ? " [flush]" : "",
373              t->wait_index == i ? " [wait]" : "");
374    }
375 }
376 
377 /**
378  * Notify util_throttle that the next operation allocates memory.
379  * util_throttle tracks memory usage and waits for fences until its tracked
380  * memory usage decreases.
381  *
382  * Example:
383  *   util_throttle_memory_usage(..., w*h*d*Bpp);
384  *   TexSubImage(..., w, h, d, ...);
385  *
386  * This means that TexSubImage can't allocate more memory its maximum limit
387  * set during initialization.
388  */
389 void
util_throttle_memory_usage(struct pipe_context * pipe,struct util_throttle * t,uint64_t memory_size)390 util_throttle_memory_usage(struct pipe_context *pipe,
391                            struct util_throttle *t, uint64_t memory_size)
392 {
393    (void)util_dump_throttle_ring; /* silence warning */
394 
395    if (!t->max_mem_usage)
396       return;
397 
398    struct pipe_screen *screen = pipe->screen;
399    struct pipe_fence_handle **fence = NULL;
400    unsigned ring_size = ARRAY_SIZE(t->ring);
401    uint64_t total = util_get_throttle_total_memory_usage(t);
402 
403    /* If there is not enough memory, walk the list of fences and find
404     * the latest one that we need to wait for.
405     */
406    while (t->wait_index != t->flush_index &&
407           total && total + memory_size > t->max_mem_usage) {
408       assert(t->ring[t->wait_index].fence);
409 
410       /* Release an older fence if we need to wait for a newer one. */
411       if (fence)
412          screen->fence_reference(screen, fence, NULL);
413 
414       fence = &t->ring[t->wait_index].fence;
415       t->ring[t->wait_index].mem_usage = 0;
416       t->wait_index = (t->wait_index + 1) % ring_size;
417 
418       total = util_get_throttle_total_memory_usage(t);
419    }
420 
421    /* Wait for the fence to decrease memory usage. */
422    if (fence) {
423       screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
424       screen->fence_reference(screen, fence, NULL);
425    }
426 
427    /* Flush and get a fence if we've exhausted memory usage for the current
428     * slot.
429     */
430    if (t->ring[t->flush_index].mem_usage &&
431        t->ring[t->flush_index].mem_usage + memory_size >
432        t->max_mem_usage / (ring_size / 2)) {
433       struct pipe_fence_handle **fence =
434          &t->ring[t->flush_index].fence;
435 
436       /* Expect that the current flush slot doesn't have a fence yet. */
437       assert(!*fence);
438 
439       pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
440       t->flush_index = (t->flush_index + 1) % ring_size;
441 
442       /* Vacate the next slot if it's occupied. This should be rare. */
443       if (t->flush_index == t->wait_index) {
444          struct pipe_fence_handle **fence =
445             &t->ring[t->wait_index].fence;
446 
447          t->ring[t->wait_index].mem_usage = 0;
448          t->wait_index = (t->wait_index + 1) % ring_size;
449 
450          assert(*fence);
451          screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
452          screen->fence_reference(screen, fence, NULL);
453       }
454 
455       assert(!t->ring[t->flush_index].mem_usage);
456       assert(!t->ring[t->flush_index].fence);
457    }
458 
459    t->ring[t->flush_index].mem_usage += memory_size;
460 }
461 
462 bool
util_lower_clearsize_to_dword(const void * clearValue,int * clearValueSize,uint32_t * clamped)463 util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped)
464 {
465    /* Reduce a large clear value size if possible. */
466    if (*clearValueSize > 4) {
467       bool clear_dword_duplicated = true;
468       const uint32_t *clear_value = clearValue;
469 
470       /* See if we can lower large fills to dword fills. */
471       for (unsigned i = 1; i < *clearValueSize / 4; i++) {
472          if (clear_value[0] != clear_value[i]) {
473             clear_dword_duplicated = false;
474             break;
475          }
476       }
477       if (clear_dword_duplicated) {
478          *clamped = *clear_value;
479          *clearValueSize = 4;
480       }
481       return clear_dword_duplicated;
482    }
483 
484    /* Expand a small clear value size. */
485    if (*clearValueSize <= 2) {
486       if (*clearValueSize == 1) {
487          *clamped = *(uint8_t *)clearValue;
488          *clamped |=
489             (*clamped << 8) | (*clamped << 16) | (*clamped << 24);
490       } else {
491          *clamped = *(uint16_t *)clearValue;
492          *clamped |= *clamped << 16;
493       }
494       *clearValueSize = 4;
495       return true;
496    }
497    return false;
498 }
499 
500 void
util_init_pipe_vertex_state(struct pipe_screen * screen,struct pipe_vertex_buffer * buffer,const struct pipe_vertex_element * elements,unsigned num_elements,struct pipe_resource * indexbuf,uint32_t full_velem_mask,struct pipe_vertex_state * state)501 util_init_pipe_vertex_state(struct pipe_screen *screen,
502                             struct pipe_vertex_buffer *buffer,
503                             const struct pipe_vertex_element *elements,
504                             unsigned num_elements,
505                             struct pipe_resource *indexbuf,
506                             uint32_t full_velem_mask,
507                             struct pipe_vertex_state *state)
508 {
509    assert(num_elements == util_bitcount(full_velem_mask));
510 
511    pipe_reference_init(&state->reference, 1);
512    state->screen = screen;
513 
514    pipe_vertex_buffer_reference(&state->input.vbuffer, buffer);
515    pipe_resource_reference(&state->input.indexbuf, indexbuf);
516    state->input.num_elements = num_elements;
517    for (unsigned i = 0; i < num_elements; i++)
518       state->input.elements[i] = elements[i];
519    state->input.full_velem_mask = full_velem_mask;
520 }
521