1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "util/hash_table.h"
28 #include "util/set.h"
29 #include "util/list.h"
30 #include "util/u_string.h"
31 #define XXH_INLINE_ALL
32 #include "util/xxhash.h"
33 
34 #include "freedreno_batch.h"
35 #include "freedreno_batch_cache.h"
36 #include "freedreno_context.h"
37 #include "freedreno_resource.h"
38 
39 /* Overview:
40  *
41  *   The batch cache provides lookup for mapping pipe_framebuffer_state
42  *   to a batch.
43  *
44  *   It does this via hashtable, with key that roughly matches the
45  *   pipe_framebuffer_state, as described below.
46  *
47  * Batch Cache hashtable key:
48  *
49  *   To serialize the key, and to avoid dealing with holding a reference to
50  *   pipe_surface's (which hold a reference to pipe_resource and complicate
51  *   the whole refcnting thing), the key is variable length and inline's the
52  *   pertinent details of the pipe_surface.
53  *
54  * Batch:
55  *
56  *   Each batch needs to hold a reference to each resource it depends on (ie.
57  *   anything that needs a mem2gmem).  And a weak reference to resources it
58  *   renders to.  (If both src[n] and dst[n] are not NULL then they are the
59  *   same.)
60  *
61  *   When a resource is destroyed, we need to remove entries in the batch
62  *   cache that reference the resource, to avoid dangling pointer issues.
63  *   So each resource holds a hashset of batches which have reference them
64  *   in their hashtable key.
65  *
66  *   When a batch has weak reference to no more resources (ie. all the
67  *   surfaces it rendered to are destroyed) the batch can be destroyed.
68  *   Could happen in an app that renders and never uses the result.  More
69  *   common scenario, I think, will be that some, but not all, of the
70  *   surfaces are destroyed before the batch is submitted.
71  *
72  *   If (for example), batch writes to zsbuf but that surface is destroyed
73  *   before batch is submitted, we can skip gmem2mem (but still need to
74  *   alloc gmem space as before.  If the batch depended on previous contents
75  *   of that surface, it would be holding a reference so the surface would
76  *   not have been destroyed.
77  */
78 
79 struct key {
80 	uint32_t width, height, layers;
81 	uint16_t samples, num_surfs;
82 	struct fd_context *ctx;
83 	struct {
84 		struct pipe_resource *texture;
85 		union pipe_surface_desc u;
86 		uint8_t pos, samples;
87 		uint16_t format;
88 	} surf[0];
89 };
90 
91 static struct key *
key_alloc(unsigned num_surfs)92 key_alloc(unsigned num_surfs)
93 {
94 	struct key *key =
95 		CALLOC_VARIANT_LENGTH_STRUCT(key, sizeof(key->surf[0]) * num_surfs);
96 	return key;
97 }
98 
99 static uint32_t
key_hash(const void * _key)100 key_hash(const void *_key)
101 {
102 	const struct key *key = _key;
103 	uint32_t hash = 0;
104 	hash = XXH32(key, offsetof(struct key, surf[0]), hash);
105 	hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs , hash);
106 	return hash;
107 }
108 
109 static bool
key_equals(const void * _a,const void * _b)110 key_equals(const void *_a, const void *_b)
111 {
112 	const struct key *a = _a;
113 	const struct key *b = _b;
114 	return (memcmp(a, b, offsetof(struct key, surf[0])) == 0) &&
115 		(memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
116 }
117 
118 void
fd_bc_init(struct fd_batch_cache * cache)119 fd_bc_init(struct fd_batch_cache *cache)
120 {
121 	cache->ht = _mesa_hash_table_create(NULL, key_hash, key_equals);
122 }
123 
124 void
fd_bc_fini(struct fd_batch_cache * cache)125 fd_bc_fini(struct fd_batch_cache *cache)
126 {
127 	_mesa_hash_table_destroy(cache->ht, NULL);
128 }
129 
130 static void
bc_flush(struct fd_batch_cache * cache,struct fd_context * ctx,bool deferred)131 bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx, bool deferred)
132 {
133 	/* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
134 	 * can cause batches to be unref'd and freed under our feet, so grab
135 	 * a reference to all the batches we need up-front.
136 	 */
137 	struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
138 	struct fd_batch *batch;
139 	unsigned n = 0;
140 
141 	fd_context_lock(ctx);
142 
143 	foreach_batch(batch, cache, cache->batch_mask) {
144 		if (batch->ctx == ctx) {
145 			fd_batch_reference_locked(&batches[n++], batch);
146 		}
147 	}
148 
149 	if (deferred) {
150 		struct fd_batch *current_batch = fd_context_batch(ctx);
151 
152 		for (unsigned i = 0; i < n; i++) {
153 			if (batches[i] && (batches[i]->ctx == ctx) &&
154 					(batches[i] != current_batch)) {
155 				fd_batch_add_dep(current_batch, batches[i]);
156 			}
157 		}
158 
159 		fd_context_unlock(ctx);
160 	} else {
161 		fd_context_unlock(ctx);
162 
163 		for (unsigned i = 0; i < n; i++) {
164 			fd_batch_flush(batches[i]);
165 		}
166 	}
167 
168 	for (unsigned i = 0; i < n; i++) {
169 		fd_batch_reference(&batches[i], NULL);
170 	}
171 }
172 
173 void
fd_bc_flush(struct fd_batch_cache * cache,struct fd_context * ctx)174 fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
175 {
176 	bc_flush(cache, ctx, false);
177 }
178 
179 /* deferred flush doesn't actually flush, but it marks every other
180  * batch associated with the context as dependent on the current
181  * batch.  So when the current batch gets flushed, all other batches
182  * that came before also get flushed.
183  */
184 void
fd_bc_flush_deferred(struct fd_batch_cache * cache,struct fd_context * ctx)185 fd_bc_flush_deferred(struct fd_batch_cache *cache, struct fd_context *ctx)
186 {
187 	bc_flush(cache, ctx, true);
188 }
189 
190 static bool
batch_in_cache(struct fd_batch_cache * cache,struct fd_batch * batch)191 batch_in_cache(struct fd_batch_cache *cache, struct fd_batch *batch)
192 {
193 	struct fd_batch *b;
194 
195 	foreach_batch (b, cache, cache->batch_mask)
196 		if (b == batch)
197 			return true;
198 
199 	return false;
200 }
201 
202 void
fd_bc_dump(struct fd_screen * screen,const char * fmt,...)203 fd_bc_dump(struct fd_screen *screen, const char *fmt, ...)
204 {
205 	struct fd_batch_cache *cache = &screen->batch_cache;
206 
207 	if (!BATCH_DEBUG)
208 		return;
209 
210 	fd_screen_lock(screen);
211 
212 	va_list ap;
213 	va_start(ap, fmt);
214 	vprintf(fmt, ap);
215 	va_end(ap);
216 
217 	set_foreach (screen->live_batches, entry) {
218 		struct fd_batch *batch = (struct fd_batch *)entry->key;
219 		printf("  %p<%u>%s%s\n", batch, batch->seqno,
220 				batch->needs_flush ? ", NEEDS FLUSH" : "",
221 				batch_in_cache(cache, batch) ? "" : ", ORPHAN");
222 	}
223 
224 	printf("----\n");
225 
226 	fd_screen_unlock(screen);
227 }
228 
229 void
fd_bc_invalidate_context(struct fd_context * ctx)230 fd_bc_invalidate_context(struct fd_context *ctx)
231 {
232 	struct fd_batch_cache *cache = &ctx->screen->batch_cache;
233 	struct fd_batch *batch;
234 
235 	fd_screen_lock(ctx->screen);
236 
237 	foreach_batch(batch, cache, cache->batch_mask) {
238 		if (batch->ctx == ctx)
239 			fd_bc_invalidate_batch(batch, true);
240 	}
241 
242 	fd_screen_unlock(ctx->screen);
243 }
244 
245 /**
246  * Note that when batch is flushed, it needs to remain in the cache so
247  * that fd_bc_invalidate_resource() can work.. otherwise we can have
248  * the case where a rsc is destroyed while a batch still has a dangling
249  * reference to it.
250  *
251  * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
252  * would have a reference to the underlying bo, so it is ok for the
253  * rsc to be destroyed before the batch.
254  */
255 void
fd_bc_invalidate_batch(struct fd_batch * batch,bool remove)256 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
257 {
258 	if (!batch)
259 		return;
260 
261 	struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
262 	struct key *key = (struct key *)batch->key;
263 
264 	fd_context_assert_locked(batch->ctx);
265 
266 	if (remove) {
267 		cache->batches[batch->idx] = NULL;
268 		cache->batch_mask &= ~(1 << batch->idx);
269 	}
270 
271 	if (!key)
272 		return;
273 
274 	DBG("%p: key=%p", batch, batch->key);
275 	for (unsigned idx = 0; idx < key->num_surfs; idx++) {
276 		struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
277 		rsc->bc_batch_mask &= ~(1 << batch->idx);
278 	}
279 
280 	struct hash_entry *entry =
281 		_mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
282 	_mesa_hash_table_remove(cache->ht, entry);
283 
284 	batch->key = NULL;
285 	free(key);
286 }
287 
288 void
fd_bc_invalidate_resource(struct fd_resource * rsc,bool destroy)289 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
290 {
291 	struct fd_screen *screen = fd_screen(rsc->base.screen);
292 	struct fd_batch *batch;
293 
294 	fd_screen_lock(screen);
295 
296 	if (destroy) {
297 		foreach_batch(batch, &screen->batch_cache, rsc->batch_mask) {
298 			struct set_entry *entry = _mesa_set_search(batch->resources, rsc);
299 			_mesa_set_remove(batch->resources, entry);
300 		}
301 		rsc->batch_mask = 0;
302 
303 		fd_batch_reference_locked(&rsc->write_batch, NULL);
304 	}
305 
306 	foreach_batch(batch, &screen->batch_cache, rsc->bc_batch_mask)
307 		fd_bc_invalidate_batch(batch, false);
308 
309 	rsc->bc_batch_mask = 0;
310 
311 	fd_screen_unlock(screen);
312 }
313 
314 struct fd_batch *
fd_bc_alloc_batch(struct fd_batch_cache * cache,struct fd_context * ctx,bool nondraw)315 fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw)
316 {
317 	struct fd_batch *batch;
318 	uint32_t idx;
319 
320 	fd_screen_lock(ctx->screen);
321 
322 	while ((idx = ffs(~cache->batch_mask)) == 0) {
323 #if 0
324 		for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
325 			batch = cache->batches[i];
326 			debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
327 			set_foreach(batch->dependencies, entry) {
328 				struct fd_batch *dep = (struct fd_batch *)entry->key;
329 				debug_printf(" %d", dep->idx);
330 			}
331 			debug_printf("\n");
332 		}
333 #endif
334 		/* TODO: is LRU the better policy?  Or perhaps the batch that
335 		 * depends on the fewest other batches?
336 		 */
337 		struct fd_batch *flush_batch = NULL;
338 		for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
339 			if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
340 				fd_batch_reference_locked(&flush_batch, cache->batches[i]);
341 		}
342 
343 		/* we can drop lock temporarily here, since we hold a ref,
344 		 * flush_batch won't disappear under us.
345 		 */
346 		fd_screen_unlock(ctx->screen);
347 		DBG("%p: too many batches!  flush forced!", flush_batch);
348 		fd_batch_flush(flush_batch);
349 		fd_screen_lock(ctx->screen);
350 
351 		/* While the resources get cleaned up automatically, the flush_batch
352 		 * doesn't get removed from the dependencies of other batches, so
353 		 * it won't be unref'd and will remain in the table.
354 		 *
355 		 * TODO maybe keep a bitmask of batches that depend on me, to make
356 		 * this easier:
357 		 */
358 		for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
359 			struct fd_batch *other = cache->batches[i];
360 			if (!other)
361 				continue;
362 			if (other->dependents_mask & (1 << flush_batch->idx)) {
363 				other->dependents_mask &= ~(1 << flush_batch->idx);
364 				struct fd_batch *ref = flush_batch;
365 				fd_batch_reference_locked(&ref, NULL);
366 			}
367 		}
368 
369 		fd_batch_reference_locked(&flush_batch, NULL);
370 	}
371 
372 	idx--;              /* bit zero returns 1 for ffs() */
373 
374 	batch = fd_batch_create(ctx, nondraw);
375 	if (!batch)
376 		goto out;
377 
378 	batch->seqno = cache->cnt++;
379 	batch->idx = idx;
380 	cache->batch_mask |= (1 << idx);
381 
382 	debug_assert(cache->batches[idx] == NULL);
383 	cache->batches[idx] = batch;
384 
385 out:
386 	fd_screen_unlock(ctx->screen);
387 
388 	return batch;
389 }
390 
391 static struct fd_batch *
batch_from_key(struct fd_batch_cache * cache,struct key * key,struct fd_context * ctx)392 batch_from_key(struct fd_batch_cache *cache, struct key *key,
393 		struct fd_context *ctx)
394 {
395 	struct fd_batch *batch = NULL;
396 	uint32_t hash = key_hash(key);
397 	struct hash_entry *entry =
398 		_mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
399 
400 	if (entry) {
401 		free(key);
402 		fd_batch_reference(&batch, (struct fd_batch *)entry->data);
403 		return batch;
404 	}
405 
406 	batch = fd_bc_alloc_batch(cache, ctx, false);
407 #ifdef DEBUG
408 	DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash,
409 			key->width, key->height, key->layers, key->samples);
410 	for (unsigned idx = 0; idx < key->num_surfs; idx++) {
411 		DBG("%p:  surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch, key->surf[idx].pos,
412 			key->surf[idx].texture, util_format_name(key->surf[idx].format),
413 			key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
414 			key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
415 			key->surf[idx].u.tex.level);
416 	}
417 #endif
418 	if (!batch)
419 		return NULL;
420 
421 	/* reset max_scissor, which will be adjusted on draws
422 	 * according to the actual scissor.
423 	 */
424 	batch->max_scissor.minx = ~0;
425 	batch->max_scissor.miny = ~0;
426 	batch->max_scissor.maxx = 0;
427 	batch->max_scissor.maxy = 0;
428 
429 	fd_screen_lock(ctx->screen);
430 
431 	_mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
432 	batch->key = key;
433 	batch->hash = hash;
434 
435 	for (unsigned idx = 0; idx < key->num_surfs; idx++) {
436 		struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
437 		rsc->bc_batch_mask = (1 << batch->idx);
438 	}
439 
440 	fd_screen_unlock(ctx->screen);
441 
442 	return batch;
443 }
444 
445 static void
key_surf(struct key * key,unsigned idx,unsigned pos,struct pipe_surface * psurf)446 key_surf(struct key *key, unsigned idx, unsigned pos, struct pipe_surface *psurf)
447 {
448 	key->surf[idx].texture = psurf->texture;
449 	key->surf[idx].u = psurf->u;
450 	key->surf[idx].pos = pos;
451 	key->surf[idx].samples = MAX2(1, psurf->nr_samples);
452 	key->surf[idx].format = psurf->format;
453 }
454 
455 struct fd_batch *
fd_batch_from_fb(struct fd_batch_cache * cache,struct fd_context * ctx,const struct pipe_framebuffer_state * pfb)456 fd_batch_from_fb(struct fd_batch_cache *cache, struct fd_context *ctx,
457 		const struct pipe_framebuffer_state *pfb)
458 {
459 	unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
460 	struct key *key = key_alloc(n);
461 
462 	key->width = pfb->width;
463 	key->height = pfb->height;
464 	key->layers = pfb->layers;
465 	key->samples = util_framebuffer_get_num_samples(pfb);
466 	key->ctx = ctx;
467 
468 	if (pfb->zsbuf)
469 		key_surf(key, idx++, 0, pfb->zsbuf);
470 
471 	for (unsigned i = 0; i < pfb->nr_cbufs; i++)
472 		if (pfb->cbufs[i])
473 			key_surf(key, idx++, i + 1, pfb->cbufs[i]);
474 
475 	key->num_surfs = idx;
476 
477 	return batch_from_key(cache, key, ctx);
478 }
479