1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright 2003 VMware, Inc.
5  * Copyright 2009 VMware, Inc.
6  * All Rights Reserved.
7  * Copyright (C) 2016 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26  * USE OR OTHER DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36 #include "util/u_memory.h"
37 #include "pipe/p_state.h"
38 
39 struct minmax_cache_key {
40    GLintptr offset;
41    GLuint count;
42    unsigned index_size;
43 };
44 
45 
46 struct minmax_cache_entry {
47    struct minmax_cache_key key;
48    GLuint min;
49    GLuint max;
50 };
51 
52 
53 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)54 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
55 {
56    return _mesa_hash_data(key, sizeof(*key));
57 }
58 
59 
60 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)61 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
62                            const struct minmax_cache_key *b)
63 {
64    return (a->offset == b->offset) && (a->count == b->count) &&
65           (a->index_size == b->index_size);
66 }
67 
68 
69 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)70 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
71 {
72    free(entry->data);
73 }
74 
75 
76 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)77 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
78 {
79    if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
80                                   USAGE_ATOMIC_COUNTER_BUFFER |
81                                   USAGE_SHADER_STORAGE_BUFFER |
82                                   USAGE_TRANSFORM_FEEDBACK_BUFFER |
83                                   USAGE_PIXEL_PACK_BUFFER |
84                                   USAGE_DISABLE_MINMAX_CACHE))
85       return GL_FALSE;
86 
87    if ((bufferObj->Mappings[MAP_USER].AccessFlags &
88         (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
89        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
90       return GL_FALSE;
91 
92    return GL_TRUE;
93 }
94 
95 
96 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)97 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
98 {
99    _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
100    bufferObj->MinMaxCache = NULL;
101 }
102 
103 
104 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)105 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
106                       unsigned index_size, GLintptr offset, GLuint count,
107                       GLuint *min_index, GLuint *max_index)
108 {
109    GLboolean found = GL_FALSE;
110    struct minmax_cache_key key;
111    uint32_t hash;
112    struct hash_entry *result;
113 
114    if (!bufferObj->MinMaxCache)
115       return GL_FALSE;
116    if (!vbo_use_minmax_cache(bufferObj))
117       return GL_FALSE;
118 
119    simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
120 
121    if (bufferObj->MinMaxCacheDirty) {
122       /* Disable the cache permanently for this BO if the number of hits
123        * is asymptotically less than the number of misses. This happens when
124        * applications use the BO for streaming.
125        *
126        * However, some initial optimism allows applications that interleave
127        * draw calls with glBufferSubData during warmup.
128        */
129       unsigned optimism = bufferObj->Size;
130       if (bufferObj->MinMaxCacheMissIndices > optimism &&
131           bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
132          bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
133          vbo_delete_minmax_cache(bufferObj);
134          goto out_disable;
135       }
136 
137       _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
138       bufferObj->MinMaxCacheDirty = false;
139       goto out_invalidate;
140    }
141 
142    key.index_size = index_size;
143    key.offset = offset;
144    key.count = count;
145    hash = vbo_minmax_cache_hash(&key);
146    result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
147    if (result) {
148       struct minmax_cache_entry *entry = result->data;
149       *min_index = entry->min;
150       *max_index = entry->max;
151       found = GL_TRUE;
152    }
153 
154 out_invalidate:
155    if (found) {
156       /* The hit counter saturates so that we don't accidently disable the
157        * cache in a long-running program.
158        */
159       unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
160 
161       if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
162          bufferObj->MinMaxCacheHitIndices = new_hit_count;
163       else
164          bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
165    } else {
166       bufferObj->MinMaxCacheMissIndices += count;
167    }
168 
169 out_disable:
170    simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
171    return found;
172 }
173 
174 
175 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint min,GLuint max)176 vbo_minmax_cache_store(struct gl_context *ctx,
177                        struct gl_buffer_object *bufferObj,
178                        unsigned index_size, GLintptr offset, GLuint count,
179                        GLuint min, GLuint max)
180 {
181    struct minmax_cache_entry *entry;
182    struct hash_entry *table_entry;
183    uint32_t hash;
184 
185    if (!vbo_use_minmax_cache(bufferObj))
186       return;
187 
188    simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
189 
190    if (!bufferObj->MinMaxCache) {
191       bufferObj->MinMaxCache =
192          _mesa_hash_table_create(NULL,
193                                  (uint32_t (*)(const void *))vbo_minmax_cache_hash,
194                                  (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
195       if (!bufferObj->MinMaxCache)
196          goto out;
197    }
198 
199    entry = MALLOC_STRUCT(minmax_cache_entry);
200    if (!entry)
201       goto out;
202 
203    entry->key.offset = offset;
204    entry->key.count = count;
205    entry->key.index_size = index_size;
206    entry->min = min;
207    entry->max = max;
208    hash = vbo_minmax_cache_hash(&entry->key);
209 
210    table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
211                                                     hash, &entry->key);
212    if (table_entry) {
213       /* It seems like this could happen when two contexts are rendering using
214        * the same buffer object from multiple threads.
215        */
216       _mesa_debug(ctx, "duplicate entry in minmax cache\n");
217       free(entry);
218       goto out;
219    }
220 
221    table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
222                                                     hash, &entry->key, entry);
223    if (!table_entry)
224       free(entry);
225 
226 out:
227    simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
228 }
229 
230 
231 void
vbo_get_minmax_index_mapped(unsigned count,unsigned index_size,unsigned restartIndex,bool restart,const void * indices,unsigned * min_index,unsigned * max_index)232 vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
233                             unsigned restartIndex, bool restart,
234                             const void *indices,
235                             unsigned *min_index, unsigned *max_index)
236 {
237    switch (index_size) {
238    case 4: {
239       const GLuint *ui_indices = (const GLuint *)indices;
240       GLuint max_ui = 0;
241       GLuint min_ui = ~0U;
242       if (restart) {
243          for (unsigned i = 0; i < count; i++) {
244             if (ui_indices[i] != restartIndex) {
245                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
246                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
247             }
248          }
249       }
250       else {
251 #if defined(USE_SSE41)
252          if (cpu_has_sse4_1) {
253             _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
254          }
255          else
256 #endif
257             for (unsigned i = 0; i < count; i++) {
258                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
259                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
260             }
261       }
262       *min_index = min_ui;
263       *max_index = max_ui;
264       break;
265    }
266    case 2: {
267       const GLushort *us_indices = (const GLushort *)indices;
268       GLuint max_us = 0;
269       GLuint min_us = ~0U;
270       if (restart) {
271          for (unsigned i = 0; i < count; i++) {
272             if (us_indices[i] != restartIndex) {
273                if (us_indices[i] > max_us) max_us = us_indices[i];
274                if (us_indices[i] < min_us) min_us = us_indices[i];
275             }
276          }
277       }
278       else {
279          for (unsigned i = 0; i < count; i++) {
280             if (us_indices[i] > max_us) max_us = us_indices[i];
281             if (us_indices[i] < min_us) min_us = us_indices[i];
282          }
283       }
284       *min_index = min_us;
285       *max_index = max_us;
286       break;
287    }
288    case 1: {
289       const GLubyte *ub_indices = (const GLubyte *)indices;
290       GLuint max_ub = 0;
291       GLuint min_ub = ~0U;
292       if (restart) {
293          for (unsigned i = 0; i < count; i++) {
294             if (ub_indices[i] != restartIndex) {
295                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
296                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
297             }
298          }
299       }
300       else {
301          for (unsigned i = 0; i < count; i++) {
302             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
303             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
304          }
305       }
306       *min_index = min_ub;
307       *max_index = max_ub;
308       break;
309    }
310    default:
311       unreachable("not reached");
312    }
313 }
314 
315 
316 /**
317  * Compute min and max elements by scanning the index buffer for
318  * glDraw[Range]Elements() calls.
319  * If primitive restart is enabled, we need to ignore restart
320  * indexes when computing min/max.
321  */
322 static void
vbo_get_minmax_index(struct gl_context * ctx,struct gl_buffer_object * obj,const void * ptr,GLintptr offset,unsigned count,unsigned index_size,bool primitive_restart,unsigned restart_index,GLuint * min_index,GLuint * max_index)323 vbo_get_minmax_index(struct gl_context *ctx, struct gl_buffer_object *obj,
324                      const void *ptr, GLintptr offset, unsigned count,
325                      unsigned index_size, bool primitive_restart,
326                      unsigned restart_index, GLuint *min_index,
327                      GLuint *max_index)
328 {
329    const char *indices;
330 
331    if (!obj) {
332       indices = (const char *)ptr + offset;
333    } else {
334       GLsizeiptr size = MIN2((GLsizeiptr)count * index_size, obj->Size);
335 
336       if (vbo_get_minmax_cached(obj, index_size, offset, count, min_index,
337                                 max_index))
338          return;
339 
340       indices = _mesa_bufferobj_map_range(ctx, offset, size, GL_MAP_READ_BIT,
341                                           obj, MAP_INTERNAL);
342    }
343 
344    vbo_get_minmax_index_mapped(count, index_size, restart_index,
345                                primitive_restart, indices,
346                                min_index, max_index);
347 
348    if (obj) {
349       vbo_minmax_cache_store(ctx, obj, index_size, offset, count, *min_index,
350                              *max_index);
351       _mesa_bufferobj_unmap(ctx, obj, MAP_INTERNAL);
352    }
353 }
354 
355 /**
356  * Compute min and max elements for nr_prims
357  */
358 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims,bool primitive_restart,unsigned restart_index)359 vbo_get_minmax_indices(struct gl_context *ctx,
360                        const struct _mesa_prim *prims,
361                        const struct _mesa_index_buffer *ib,
362                        GLuint *min_index,
363                        GLuint *max_index,
364                        GLuint nr_prims,
365                        bool primitive_restart,
366                        unsigned restart_index)
367 {
368    GLuint tmp_min, tmp_max;
369    GLuint i;
370    GLuint count;
371 
372    *min_index = ~0;
373    *max_index = 0;
374 
375    for (i = 0; i < nr_prims; i++) {
376       const struct _mesa_prim *start_prim;
377 
378       start_prim = &prims[i];
379       count = start_prim->count;
380       /* Do combination if possible to reduce map/unmap count */
381       while ((i + 1 < nr_prims) &&
382              (prims[i].start + prims[i].count == prims[i+1].start)) {
383          count += prims[i+1].count;
384          i++;
385       }
386       vbo_get_minmax_index(ctx, ib->obj, ib->ptr,
387                            (ib->obj ? (GLintptr)ib->ptr : 0) +
388                            (start_prim->start << ib->index_size_shift),
389                            count, 1 << ib->index_size_shift,
390                            primitive_restart, restart_index,
391                            &tmp_min, &tmp_max);
392       *min_index = MIN2(*min_index, tmp_min);
393       *max_index = MAX2(*max_index, tmp_max);
394    }
395 }
396 
397 /**
398  * Same as vbo_get_minmax_index, but using gallium draw structures.
399  */
400 bool
vbo_get_minmax_indices_gallium(struct gl_context * ctx,struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)401 vbo_get_minmax_indices_gallium(struct gl_context *ctx,
402                                struct pipe_draw_info *info,
403                                const struct pipe_draw_start_count_bias *draws,
404                                unsigned num_draws)
405 {
406    info->min_index = ~0;
407    info->max_index = 0;
408 
409    for (unsigned i = 0; i < num_draws; i++) {
410       struct pipe_draw_start_count_bias draw = draws[i];
411 
412       /* Do combination if possible to reduce map/unmap count */
413       while ((i + 1 < num_draws) &&
414              (draws[i].start + draws[i].count == draws[i+1].start)) {
415          draw.count += draws[i+1].count;
416          i++;
417       }
418 
419       if (!draw.count)
420          continue;
421 
422       unsigned tmp_min, tmp_max;
423       vbo_get_minmax_index(ctx, info->has_user_indices ?
424                               NULL : info->index.gl_bo,
425                            info->index.user,
426                            (GLintptr)draw.start * info->index_size,
427                            draw.count, info->index_size,
428                            info->primitive_restart, info->restart_index,
429                            &tmp_min, &tmp_max);
430       info->min_index = MIN2(info->min_index, tmp_min);
431       info->max_index = MAX2(info->max_index, tmp_max);
432    }
433 
434    return info->min_index <= info->max_index;
435 }
436