1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36 #include "util/u_memory.h"
37 #include "pipe/p_state.h"
38
39
40 struct minmax_cache_key {
41 GLintptr offset;
42 GLuint count;
43 unsigned index_size;
44 };
45
46
47 struct minmax_cache_entry {
48 struct minmax_cache_key key;
49 GLuint min;
50 GLuint max;
51 };
52
53
54 static uint32_t
vbo_minmax_cache_hash(const struct minmax_cache_key * key)55 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
56 {
57 return _mesa_hash_data(key, sizeof(*key));
58 }
59
60
61 static bool
vbo_minmax_cache_key_equal(const struct minmax_cache_key * a,const struct minmax_cache_key * b)62 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
63 const struct minmax_cache_key *b)
64 {
65 return (a->offset == b->offset) && (a->count == b->count) &&
66 (a->index_size == b->index_size);
67 }
68
69
70 static void
vbo_minmax_cache_delete_entry(struct hash_entry * entry)71 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
72 {
73 free(entry->data);
74 }
75
76
77 static GLboolean
vbo_use_minmax_cache(struct gl_buffer_object * bufferObj)78 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
79 {
80 if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
81 USAGE_ATOMIC_COUNTER_BUFFER |
82 USAGE_SHADER_STORAGE_BUFFER |
83 USAGE_TRANSFORM_FEEDBACK_BUFFER |
84 USAGE_PIXEL_PACK_BUFFER |
85 USAGE_DISABLE_MINMAX_CACHE))
86 return GL_FALSE;
87
88 if ((bufferObj->Mappings[MAP_USER].AccessFlags &
89 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
90 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
91 return GL_FALSE;
92
93 return GL_TRUE;
94 }
95
96
97 void
vbo_delete_minmax_cache(struct gl_buffer_object * bufferObj)98 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
99 {
100 _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
101 bufferObj->MinMaxCache = NULL;
102 }
103
104
105 static GLboolean
vbo_get_minmax_cached(struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint * min_index,GLuint * max_index)106 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
107 unsigned index_size, GLintptr offset, GLuint count,
108 GLuint *min_index, GLuint *max_index)
109 {
110 GLboolean found = GL_FALSE;
111 struct minmax_cache_key key;
112 uint32_t hash;
113 struct hash_entry *result;
114
115 if (!bufferObj->MinMaxCache)
116 return GL_FALSE;
117 if (!vbo_use_minmax_cache(bufferObj))
118 return GL_FALSE;
119
120 simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
121
122 if (bufferObj->MinMaxCacheDirty) {
123 /* Disable the cache permanently for this BO if the number of hits
124 * is asymptotically less than the number of misses. This happens when
125 * applications use the BO for streaming.
126 *
127 * However, some initial optimism allows applications that interleave
128 * draw calls with glBufferSubData during warmup.
129 */
130 unsigned optimism = bufferObj->Size;
131 if (bufferObj->MinMaxCacheMissIndices > optimism &&
132 bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
133 bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
134 vbo_delete_minmax_cache(bufferObj);
135 goto out_disable;
136 }
137
138 _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
139 bufferObj->MinMaxCacheDirty = false;
140 goto out_invalidate;
141 }
142
143 key.index_size = index_size;
144 key.offset = offset;
145 key.count = count;
146 hash = vbo_minmax_cache_hash(&key);
147 result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
148 if (result) {
149 struct minmax_cache_entry *entry = result->data;
150 *min_index = entry->min;
151 *max_index = entry->max;
152 found = GL_TRUE;
153 }
154
155 out_invalidate:
156 if (found) {
157 /* The hit counter saturates so that we don't accidently disable the
158 * cache in a long-running program.
159 */
160 unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
161
162 if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
163 bufferObj->MinMaxCacheHitIndices = new_hit_count;
164 else
165 bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
166 } else {
167 bufferObj->MinMaxCacheMissIndices += count;
168 }
169
170 out_disable:
171 simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
172 return found;
173 }
174
175
176 static void
vbo_minmax_cache_store(struct gl_context * ctx,struct gl_buffer_object * bufferObj,unsigned index_size,GLintptr offset,GLuint count,GLuint min,GLuint max)177 vbo_minmax_cache_store(struct gl_context *ctx,
178 struct gl_buffer_object *bufferObj,
179 unsigned index_size, GLintptr offset, GLuint count,
180 GLuint min, GLuint max)
181 {
182 struct minmax_cache_entry *entry;
183 struct hash_entry *table_entry;
184 uint32_t hash;
185
186 if (!vbo_use_minmax_cache(bufferObj))
187 return;
188
189 simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
190
191 if (!bufferObj->MinMaxCache) {
192 bufferObj->MinMaxCache =
193 _mesa_hash_table_create(NULL,
194 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
195 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
196 if (!bufferObj->MinMaxCache)
197 goto out;
198 }
199
200 entry = MALLOC_STRUCT(minmax_cache_entry);
201 if (!entry)
202 goto out;
203
204 entry->key.offset = offset;
205 entry->key.count = count;
206 entry->key.index_size = index_size;
207 entry->min = min;
208 entry->max = max;
209 hash = vbo_minmax_cache_hash(&entry->key);
210
211 table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
212 hash, &entry->key);
213 if (table_entry) {
214 /* It seems like this could happen when two contexts are rendering using
215 * the same buffer object from multiple threads.
216 */
217 _mesa_debug(ctx, "duplicate entry in minmax cache\n");
218 free(entry);
219 goto out;
220 }
221
222 table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
223 hash, &entry->key, entry);
224 if (!table_entry)
225 free(entry);
226
227 out:
228 simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
229 }
230
231
232 void
vbo_get_minmax_index_mapped(unsigned count,unsigned index_size,unsigned restartIndex,bool restart,const void * indices,unsigned * min_index,unsigned * max_index)233 vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
234 unsigned restartIndex, bool restart,
235 const void *indices,
236 unsigned *min_index, unsigned *max_index)
237 {
238 switch (index_size) {
239 case 4: {
240 const GLuint *ui_indices = (const GLuint *)indices;
241 GLuint max_ui = 0;
242 GLuint min_ui = ~0U;
243 if (restart) {
244 for (unsigned i = 0; i < count; i++) {
245 if (ui_indices[i] != restartIndex) {
246 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
247 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
248 }
249 }
250 }
251 else {
252 #if defined(USE_SSE41)
253 if (cpu_has_sse4_1) {
254 _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
255 }
256 else
257 #endif
258 for (unsigned i = 0; i < count; i++) {
259 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
260 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
261 }
262 }
263 *min_index = min_ui;
264 *max_index = max_ui;
265 break;
266 }
267 case 2: {
268 const GLushort *us_indices = (const GLushort *)indices;
269 GLuint max_us = 0;
270 GLuint min_us = ~0U;
271 if (restart) {
272 for (unsigned i = 0; i < count; i++) {
273 if (us_indices[i] != restartIndex) {
274 if (us_indices[i] > max_us) max_us = us_indices[i];
275 if (us_indices[i] < min_us) min_us = us_indices[i];
276 }
277 }
278 }
279 else {
280 for (unsigned i = 0; i < count; i++) {
281 if (us_indices[i] > max_us) max_us = us_indices[i];
282 if (us_indices[i] < min_us) min_us = us_indices[i];
283 }
284 }
285 *min_index = min_us;
286 *max_index = max_us;
287 break;
288 }
289 case 1: {
290 const GLubyte *ub_indices = (const GLubyte *)indices;
291 GLuint max_ub = 0;
292 GLuint min_ub = ~0U;
293 if (restart) {
294 for (unsigned i = 0; i < count; i++) {
295 if (ub_indices[i] != restartIndex) {
296 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
297 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
298 }
299 }
300 }
301 else {
302 for (unsigned i = 0; i < count; i++) {
303 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
304 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
305 }
306 }
307 *min_index = min_ub;
308 *max_index = max_ub;
309 break;
310 }
311 default:
312 unreachable("not reached");
313 }
314 }
315
316
317 /**
318 * Compute min and max elements by scanning the index buffer for
319 * glDraw[Range]Elements() calls.
320 * If primitive restart is enabled, we need to ignore restart
321 * indexes when computing min/max.
322 */
323 static void
vbo_get_minmax_index(struct gl_context * ctx,struct gl_buffer_object * obj,const void * ptr,GLintptr offset,unsigned count,unsigned index_size,bool primitive_restart,unsigned restart_index,GLuint * min_index,GLuint * max_index)324 vbo_get_minmax_index(struct gl_context *ctx, struct gl_buffer_object *obj,
325 const void *ptr, GLintptr offset, unsigned count,
326 unsigned index_size, bool primitive_restart,
327 unsigned restart_index, GLuint *min_index,
328 GLuint *max_index)
329 {
330 const char *indices;
331
332 if (!obj) {
333 indices = (const char *)ptr + offset;
334 } else {
335 GLsizeiptr size = MIN2((GLsizeiptr)count * index_size, obj->Size);
336
337 if (vbo_get_minmax_cached(obj, index_size, offset, count, min_index,
338 max_index))
339 return;
340
341 indices = ctx->Driver.MapBufferRange(ctx, offset, size, GL_MAP_READ_BIT,
342 obj, MAP_INTERNAL);
343 }
344
345 vbo_get_minmax_index_mapped(count, index_size, restart_index,
346 primitive_restart, indices,
347 min_index, max_index);
348
349 if (obj) {
350 vbo_minmax_cache_store(ctx, obj, index_size, offset, count, *min_index,
351 *max_index);
352 ctx->Driver.UnmapBuffer(ctx, obj, MAP_INTERNAL);
353 }
354 }
355
356 /**
357 * Compute min and max elements for nr_prims
358 */
359 void
vbo_get_minmax_indices(struct gl_context * ctx,const struct _mesa_prim * prims,const struct _mesa_index_buffer * ib,GLuint * min_index,GLuint * max_index,GLuint nr_prims,bool primitive_restart,unsigned restart_index)360 vbo_get_minmax_indices(struct gl_context *ctx,
361 const struct _mesa_prim *prims,
362 const struct _mesa_index_buffer *ib,
363 GLuint *min_index,
364 GLuint *max_index,
365 GLuint nr_prims,
366 bool primitive_restart,
367 unsigned restart_index)
368 {
369 GLuint tmp_min, tmp_max;
370 GLuint i;
371 GLuint count;
372
373 *min_index = ~0;
374 *max_index = 0;
375
376 for (i = 0; i < nr_prims; i++) {
377 const struct _mesa_prim *start_prim;
378
379 start_prim = &prims[i];
380 count = start_prim->count;
381 /* Do combination if possible to reduce map/unmap count */
382 while ((i + 1 < nr_prims) &&
383 (prims[i].start + prims[i].count == prims[i+1].start)) {
384 count += prims[i+1].count;
385 i++;
386 }
387 vbo_get_minmax_index(ctx, ib->obj, ib->ptr,
388 (ib->obj ? (GLintptr)ib->ptr : 0) +
389 (start_prim->start << ib->index_size_shift),
390 count, 1 << ib->index_size_shift,
391 primitive_restart, restart_index,
392 &tmp_min, &tmp_max);
393 *min_index = MIN2(*min_index, tmp_min);
394 *max_index = MAX2(*max_index, tmp_max);
395 }
396 }
397
398 /**
399 * Same as vbo_get_minmax_index, but using gallium draw structures.
400 */
401 bool
vbo_get_minmax_indices_gallium(struct gl_context * ctx,struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draws,unsigned num_draws)402 vbo_get_minmax_indices_gallium(struct gl_context *ctx,
403 struct pipe_draw_info *info,
404 const struct pipe_draw_start_count_bias *draws,
405 unsigned num_draws)
406 {
407 info->min_index = ~0;
408 info->max_index = 0;
409
410 for (unsigned i = 0; i < num_draws; i++) {
411 struct pipe_draw_start_count_bias draw = draws[i];
412
413 /* Do combination if possible to reduce map/unmap count */
414 while ((i + 1 < num_draws) &&
415 (draws[i].start + draws[i].count == draws[i+1].start)) {
416 draw.count += draws[i+1].count;
417 i++;
418 }
419
420 if (!draw.count)
421 continue;
422
423 unsigned tmp_min, tmp_max;
424 vbo_get_minmax_index(ctx, info->has_user_indices ?
425 NULL : info->index.gl_bo,
426 info->index.user,
427 (GLintptr)draw.start * info->index_size,
428 draw.count, info->index_size,
429 info->primitive_restart, info->restart_index,
430 &tmp_min, &tmp_max);
431 info->min_index = MIN2(info->min_index, tmp_min);
432 info->max_index = MAX2(info->max_index, tmp_max);
433 }
434
435 return info->min_index <= info->max_index;
436 }
437