1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright 2003 VMware, Inc.
5  * Copyright 2009 VMware, Inc.
6  * All Rights Reserved.
7  * Copyright (C) 2016 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26  * USE OR OTHER DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36 #include "util/u_memory.h"
37 #include "pipe/p_state.h"
38 
39 
40 struct minmax_cache_key {
41    GLintptr offset;
42    GLuint count;
43    unsigned index_size;
44 };
45 
46 
47 struct minmax_cache_entry {
48    struct minmax_cache_key key;
49    GLuint min;
50    GLuint max;
51 };
52 
53 
54 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)55 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
56 {
57    return _mesa_hash_data(key, sizeof(*key));
58 }
59 
60 
61 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)62 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
63                            const struct minmax_cache_key *b)
64 {
65    return (a->offset == b->offset) && (a->count == b->count) &&
66           (a->index_size == b->index_size);
67 }
68 
69 
70 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)71 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
72 {
73    free(entry->data);
74 }
75 
76 
77 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)78 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
79 {
80    if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
81                                   USAGE_ATOMIC_COUNTER_BUFFER |
82                                   USAGE_SHADER_STORAGE_BUFFER |
83                                   USAGE_TRANSFORM_FEEDBACK_BUFFER |
84                                   USAGE_PIXEL_PACK_BUFFER |
85                                   USAGE_DISABLE_MINMAX_CACHE))
86       return GL_FALSE;
87 
88    if ((bufferObj->Mappings[MAP_USER].AccessFlags &
89         (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
90        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
91       return GL_FALSE;
92 
93    return GL_TRUE;
94 }
95 
96 
97 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)98 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
99 {
100    _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
101    bufferObj->MinMaxCache = NULL;
102 }
103 
104 
105 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)106 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
107                       unsigned index_size, GLintptr offset, GLuint count,
108                       GLuint *min_index, GLuint *max_index)
109 {
110    GLboolean found = GL_FALSE;
111    struct minmax_cache_key key;
112    uint32_t hash;
113    struct hash_entry *result;
114 
115    if (!bufferObj->MinMaxCache)
116       return GL_FALSE;
117    if (!vbo_use_minmax_cache(bufferObj))
118       return GL_FALSE;
119 
120    simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
121 
122    if (bufferObj->MinMaxCacheDirty) {
123       /* Disable the cache permanently for this BO if the number of hits
124        * is asymptotically less than the number of misses. This happens when
125        * applications use the BO for streaming.
126        *
127        * However, some initial optimism allows applications that interleave
128        * draw calls with glBufferSubData during warmup.
129        */
130       unsigned optimism = bufferObj->Size;
131       if (bufferObj->MinMaxCacheMissIndices > optimism &&
132           bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
133          bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
134          vbo_delete_minmax_cache(bufferObj);
135          goto out_disable;
136       }
137 
138       _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
139       bufferObj->MinMaxCacheDirty = false;
140       goto out_invalidate;
141    }
142 
143    key.index_size = index_size;
144    key.offset = offset;
145    key.count = count;
146    hash = vbo_minmax_cache_hash(&key);
147    result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
148    if (result) {
149       struct minmax_cache_entry *entry = result->data;
150       *min_index = entry->min;
151       *max_index = entry->max;
152       found = GL_TRUE;
153    }
154 
155 out_invalidate:
156    if (found) {
157       /* The hit counter saturates so that we don't accidently disable the
158        * cache in a long-running program.
159        */
160       unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
161 
162       if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
163          bufferObj->MinMaxCacheHitIndices = new_hit_count;
164       else
165          bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
166    } else {
167       bufferObj->MinMaxCacheMissIndices += count;
168    }
169 
170 out_disable:
171    simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
172    return found;
173 }
174 
175 
176 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint min,GLuint max)177 vbo_minmax_cache_store(struct gl_context *ctx,
178                        struct gl_buffer_object *bufferObj,
179                        unsigned index_size, GLintptr offset, GLuint count,
180                        GLuint min, GLuint max)
181 {
182    struct minmax_cache_entry *entry;
183    struct hash_entry *table_entry;
184    uint32_t hash;
185 
186    if (!vbo_use_minmax_cache(bufferObj))
187       return;
188 
189    simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
190 
191    if (!bufferObj->MinMaxCache) {
192       bufferObj->MinMaxCache =
193          _mesa_hash_table_create(NULL,
194                                  (uint32_t (*)(const void *))vbo_minmax_cache_hash,
195                                  (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
196       if (!bufferObj->MinMaxCache)
197          goto out;
198    }
199 
200    entry = MALLOC_STRUCT(minmax_cache_entry);
201    if (!entry)
202       goto out;
203 
204    entry->key.offset = offset;
205    entry->key.count = count;
206    entry->key.index_size = index_size;
207    entry->min = min;
208    entry->max = max;
209    hash = vbo_minmax_cache_hash(&entry->key);
210 
211    table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
212                                                     hash, &entry->key);
213    if (table_entry) {
214       /* It seems like this could happen when two contexts are rendering using
215        * the same buffer object from multiple threads.
216        */
217       _mesa_debug(ctx, "duplicate entry in minmax cache\n");
218       free(entry);
219       goto out;
220    }
221 
222    table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
223                                                     hash, &entry->key, entry);
224    if (!table_entry)
225       free(entry);
226 
227 out:
228    simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
229 }
230 
231 
232 void
vbo_get_minmax_index_mapped(unsigned count,unsigned index_size,unsigned restartIndex,bool restart,const void * indices,unsigned * min_index,unsigned * max_index)233 vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
234                             unsigned restartIndex, bool restart,
235                             const void *indices,
236                             unsigned *min_index, unsigned *max_index)
237 {
238    switch (index_size) {
239    case 4: {
240       const GLuint *ui_indices = (const GLuint *)indices;
241       GLuint max_ui = 0;
242       GLuint min_ui = ~0U;
243       if (restart) {
244          for (unsigned i = 0; i < count; i++) {
245             if (ui_indices[i] != restartIndex) {
246                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
247                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
248             }
249          }
250       }
251       else {
252 #if defined(USE_SSE41)
253          if (cpu_has_sse4_1) {
254             _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
255          }
256          else
257 #endif
258             for (unsigned i = 0; i < count; i++) {
259                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
260                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
261             }
262       }
263       *min_index = min_ui;
264       *max_index = max_ui;
265       break;
266    }
267    case 2: {
268       const GLushort *us_indices = (const GLushort *)indices;
269       GLuint max_us = 0;
270       GLuint min_us = ~0U;
271       if (restart) {
272          for (unsigned i = 0; i < count; i++) {
273             if (us_indices[i] != restartIndex) {
274                if (us_indices[i] > max_us) max_us = us_indices[i];
275                if (us_indices[i] < min_us) min_us = us_indices[i];
276             }
277          }
278       }
279       else {
280          for (unsigned i = 0; i < count; i++) {
281             if (us_indices[i] > max_us) max_us = us_indices[i];
282             if (us_indices[i] < min_us) min_us = us_indices[i];
283          }
284       }
285       *min_index = min_us;
286       *max_index = max_us;
287       break;
288    }
289    case 1: {
290       const GLubyte *ub_indices = (const GLubyte *)indices;
291       GLuint max_ub = 0;
292       GLuint min_ub = ~0U;
293       if (restart) {
294          for (unsigned i = 0; i < count; i++) {
295             if (ub_indices[i] != restartIndex) {
296                if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
297                if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
298             }
299          }
300       }
301       else {
302          for (unsigned i = 0; i < count; i++) {
303             if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
304             if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
305          }
306       }
307       *min_index = min_ub;
308       *max_index = max_ub;
309       break;
310    }
311    default:
312       unreachable("not reached");
313    }
314 }
315 
316 
317 /**
318  * Compute min and max elements by scanning the index buffer for
319  * glDraw[Range]Elements() calls.
320  * If primitive restart is enabled, we need to ignore restart
321  * indexes when computing min/max.
322  */
323 static void
vbo_get_minmax_index(struct gl_context * ctx,struct gl_buffer_object * obj,const void * ptr,GLintptr offset,unsigned count,unsigned index_size,bool primitive_restart,unsigned restart_index,GLuint * min_index,GLuint * max_index)324 vbo_get_minmax_index(struct gl_context *ctx, struct gl_buffer_object *obj,
325                      const void *ptr, GLintptr offset, unsigned count,
326                      unsigned index_size, bool primitive_restart,
327                      unsigned restart_index, GLuint *min_index,
328                      GLuint *max_index)
329 {
330    const char *indices;
331 
332    if (!obj) {
333       indices = (const char *)ptr + offset;
334    } else {
335       GLsizeiptr size = MIN2((GLsizeiptr)count * index_size, obj->Size);
336 
337       if (vbo_get_minmax_cached(obj, index_size, offset, count, min_index,
338                                 max_index))
339          return;
340 
341       indices = ctx->Driver.MapBufferRange(ctx, offset, size, GL_MAP_READ_BIT,
342                                            obj, MAP_INTERNAL);
343    }
344 
345    vbo_get_minmax_index_mapped(count, index_size, restart_index,
346                                primitive_restart, indices,
347                                min_index, max_index);
348 
349    if (obj) {
350       vbo_minmax_cache_store(ctx, obj, index_size, offset, count, *min_index,
351                              *max_index);
352       ctx->Driver.UnmapBuffer(ctx, obj, MAP_INTERNAL);
353    }
354 }
355 
356 /**
357  * Compute min and max elements for nr_prims
358  */
359 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims,bool primitive_restart,unsigned restart_index)360 vbo_get_minmax_indices(struct gl_context *ctx,
361                        const struct _mesa_prim *prims,
362                        const struct _mesa_index_buffer *ib,
363                        GLuint *min_index,
364                        GLuint *max_index,
365                        GLuint nr_prims,
366                        bool primitive_restart,
367                        unsigned restart_index)
368 {
369    GLuint tmp_min, tmp_max;
370    GLuint i;
371    GLuint count;
372 
373    *min_index = ~0;
374    *max_index = 0;
375 
376    for (i = 0; i < nr_prims; i++) {
377       const struct _mesa_prim *start_prim;
378 
379       start_prim = &prims[i];
380       count = start_prim->count;
381       /* Do combination if possible to reduce map/unmap count */
382       while ((i + 1 < nr_prims) &&
383              (prims[i].start + prims[i].count == prims[i+1].start)) {
384          count += prims[i+1].count;
385          i++;
386       }
387       vbo_get_minmax_index(ctx, ib->obj, ib->ptr,
388                            (ib->obj ? (GLintptr)ib->ptr : 0) +
389                            (start_prim->start << ib->index_size_shift),
390                            count, 1 << ib->index_size_shift,
391                            primitive_restart, restart_index,
392                            &tmp_min, &tmp_max);
393       *min_index = MIN2(*min_index, tmp_min);
394       *max_index = MAX2(*max_index, tmp_max);
395    }
396 }
397 
398 /**
399  * Same as vbo_get_minmax_index, but using gallium draw structures.
400  */
401 bool
vbo_get_minmax_indices_gallium(struct gl_context * ctx,struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)402 vbo_get_minmax_indices_gallium(struct gl_context *ctx,
403                                struct pipe_draw_info *info,
404                                const struct pipe_draw_start_count_bias *draws,
405                                unsigned num_draws)
406 {
407    info->min_index = ~0;
408    info->max_index = 0;
409 
410    for (unsigned i = 0; i < num_draws; i++) {
411       struct pipe_draw_start_count_bias draw = draws[i];
412 
413       /* Do combination if possible to reduce map/unmap count */
414       while ((i + 1 < num_draws) &&
415              (draws[i].start + draws[i].count == draws[i+1].start)) {
416          draw.count += draws[i+1].count;
417          i++;
418       }
419 
420       if (!draw.count)
421          continue;
422 
423       unsigned tmp_min, tmp_max;
424       vbo_get_minmax_index(ctx, info->has_user_indices ?
425                               NULL : info->index.gl_bo,
426                            info->index.user,
427                            (GLintptr)draw.start * info->index_size,
428                            draw.count, info->index_size,
429                            info->primitive_restart, info->restart_index,
430                            &tmp_min, &tmp_max);
431       info->min_index = MIN2(info->min_index, tmp_min);
432       info->max_index = MAX2(info->max_index, tmp_max);
433    }
434 
435    return info->min_index <= info->max_index;
436 }
437