1 /*
2  * Copyright (c) 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "sna.h"
33 #include "sna_reg.h"
34 
35 #include <unistd.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <time.h>
40 #include <sched.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 
44 #include <xf86drm.h>
45 
46 #ifdef HAVE_VALGRIND
47 #include <valgrind.h>
48 #include <memcheck.h>
49 #endif
50 
51 #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
52 #include <sys/sysinfo.h>
53 #endif
54 
55 #include "sna_cpuid.h"
56 
57 static struct kgem_bo *
58 search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
59 
60 static struct kgem_bo *
61 search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
62 
63 #define DBG_NO_HW 0
64 #define DBG_NO_EXEC 0
65 #define DBG_NO_TILING 0
66 #define DBG_NO_CACHE 0
67 #define DBG_NO_SNOOP_CACHE 0
68 #define DBG_NO_CACHE_LEVEL 0
69 #define DBG_NO_CPU 0
70 #define DBG_NO_CREATE2 0
71 #define DBG_NO_USERPTR 0
72 #define DBG_NO_UNSYNCHRONIZED_USERPTR 0
73 #define DBG_NO_COHERENT_MMAP_GTT 0
74 #define DBG_NO_LLC 1
75 #define DBG_NO_SEMAPHORES 0
76 #define DBG_NO_MADV 0
77 #define DBG_NO_UPLOAD_CACHE 0
78 #define DBG_NO_UPLOAD_ACTIVE 0
79 #define DBG_NO_MAP_UPLOAD 0
80 #define DBG_NO_RELAXED_FENCING 0
81 #define DBG_NO_SECURE_BATCHES 0
82 #define DBG_NO_PINNED_BATCHES 0
83 #define DBG_NO_SHRINK_BATCHES 0
84 #define DBG_NO_FAST_RELOC 0
85 #define DBG_NO_HANDLE_LUT 0
86 #define DBG_NO_WT 0
87 #define DBG_NO_WC_MMAP 0
88 #define DBG_NO_BLT_Y 0
89 #define DBG_NO_SCANOUT_Y 0
90 #define DBG_NO_DIRTYFB 0
91 #define DBG_NO_DETILING 0
92 #define DBG_DUMP 0
93 #define DBG_NO_MALLOC_CACHE 0
94 
95 #define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
96 
97 #ifndef DEBUG_SYNC
98 #define DEBUG_SYNC 0
99 #endif
100 
101 #define SHOW_BATCH_BEFORE 0
102 #define SHOW_BATCH_AFTER 0
103 
104 #if 0
105 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
106 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
107 #else
108 #define ASSERT_IDLE(kgem__, handle__)
109 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
110 #endif
111 
112 /* Worst case seems to be 965gm where we cannot write within a cacheline that
113  * is being simultaneously being read by the GPU, or within the sampler
114  * prefetch. In general, the chipsets seem to have a requirement that sampler
115  * offsets be aligned to a cacheline (64 bytes).
116  *
117  * Actually, it turns out the BLT color pattern (BR15) has the most severe
118  * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256
119  * bytes for 32-bpp.
120  */
121 #define UPLOAD_ALIGNMENT 256
122 
123 #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
124 #define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
125 
126 #define MAX_GTT_VMA_CACHE 512
127 #define MAX_CPU_VMA_CACHE INT16_MAX
128 #define MAP_PRESERVE_TIME 10
129 
130 #define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
131 #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1)
132 
133 #define LOCAL_I915_PARAM_HAS_BLT		11
134 #define LOCAL_I915_PARAM_HAS_RELAXED_FENCING	12
135 #define LOCAL_I915_PARAM_HAS_RELAXED_DELTA	15
136 #define LOCAL_I915_PARAM_HAS_LLC		17
137 #define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
138 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
139 #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES	24
140 #define LOCAL_I915_PARAM_HAS_NO_RELOC		25
141 #define LOCAL_I915_PARAM_HAS_HANDLE_LUT		26
142 #define LOCAL_I915_PARAM_HAS_WT			27
143 #define LOCAL_I915_PARAM_MMAP_VERSION		30
144 #define LOCAL_I915_PARAM_MMAP_GTT_COHERENT	52
145 
146 #define LOCAL_I915_EXEC_IS_PINNED		(1<<10)
147 #define LOCAL_I915_EXEC_NO_RELOC		(1<<11)
148 #define LOCAL_I915_EXEC_HANDLE_LUT		(1<<12)
149 
150 #define LOCAL_I915_GEM_CREATE2       0x34
151 #define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2)
152 struct local_i915_gem_create2 {
153 	uint64_t size;
154 	uint32_t placement;
155 #define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0
156 #define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
157 	uint32_t domain;
158 	uint32_t caching;
159 	uint32_t tiling_mode;
160 	uint32_t stride;
161 	uint32_t flags;
162 	uint32_t pad;
163 	uint32_t handle;
164 };
165 
166 #define LOCAL_I915_GEM_USERPTR       0x33
167 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
168 struct local_i915_gem_userptr {
169 	uint64_t user_ptr;
170 	uint64_t user_size;
171 	uint32_t flags;
172 #define I915_USERPTR_READ_ONLY		0x1
173 #define I915_USERPTR_UNSYNCHRONIZED	0x80000000
174 	uint32_t handle;
175 };
176 
177 #define UNCACHED	0
178 #define SNOOPED		1
179 #define DISPLAY		2
180 
181 struct local_i915_gem_caching {
182 	uint32_t handle;
183 	uint32_t caching;
184 };
185 
186 #define LOCAL_I915_GEM_SET_CACHING	0x2f
187 #define LOCAL_I915_GEM_GET_CACHING	0x30
188 #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
189 #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
190 
191 struct local_i915_gem_mmap {
192 	uint32_t handle;
193 	uint32_t pad;
194 	uint64_t offset;
195 	uint64_t size;
196 	uint64_t addr_ptr;
197 };
198 #define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap)
199 
200 struct local_i915_gem_mmap2 {
201 	uint32_t handle;
202 	uint32_t pad;
203 	uint64_t offset;
204 	uint64_t size;
205 	uint64_t addr_ptr;
206 	uint64_t flags;
207 #define I915_MMAP_WC 0x1
208 };
209 #define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
210 
211 struct kgem_buffer {
212 	struct kgem_bo base;
213 	void *mem;
214 	uint32_t used;
215 	uint32_t need_io : 1;
216 	uint32_t write : 2;
217 	uint32_t mmapped : 2;
218 };
219 enum {
220 	MMAPPED_NONE,
221 	MMAPPED_GTT,
222 	MMAPPED_CPU
223 };
224 
225 static struct kgem_bo *__kgem_freed_bo;
226 static struct kgem_request *__kgem_freed_request;
227 static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
228 
__to_sna(struct kgem * kgem)229 static inline struct sna *__to_sna(struct kgem *kgem)
230 {
231 	/* minor layering violations */
232 	return container_of(kgem, struct sna, kgem);
233 }
234 
bytes(struct kgem_bo * bo)235 static inline int bytes(struct kgem_bo *bo)
236 {
237 	return __kgem_bo_size(bo);
238 }
239 
240 #define bucket(B) (B)->size.pages.bucket
241 #define num_pages(B) (B)->size.pages.count
242 
__do_ioctl(int fd,unsigned long req,void * arg)243 static int __do_ioctl(int fd, unsigned long req, void *arg)
244 {
245 	do {
246 		int err;
247 
248 		switch ((err = errno)) {
249 		case EAGAIN:
250 			sched_yield();
251 		case EINTR:
252 			break;
253 		default:
254 			return -err;
255 		}
256 
257 		if (likely(ioctl(fd, req, arg) == 0))
258 			return 0;
259 	} while (1);
260 }
261 
do_ioctl(int fd,unsigned long req,void * arg)262 inline static int do_ioctl(int fd, unsigned long req, void *arg)
263 {
264 	if (likely(ioctl(fd, req, arg) == 0))
265 		return 0;
266 
267 	return __do_ioctl(fd, req, arg);
268 }
269 
270 #ifdef DEBUG_MEMORY
debug_alloc(struct kgem * kgem,size_t size)271 static void debug_alloc(struct kgem *kgem, size_t size)
272 {
273 	kgem->debug_memory.bo_allocs++;
274 	kgem->debug_memory.bo_bytes += size;
275 }
debug_alloc__bo(struct kgem * kgem,struct kgem_bo * bo)276 static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
277 {
278 	debug_alloc(kgem, bytes(bo));
279 }
280 #else
281 #define debug_alloc__bo(k, b)
282 #endif
283 
284 #ifndef NDEBUG
assert_tiling(struct kgem * kgem,struct kgem_bo * bo)285 static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
286 {
287 	struct drm_i915_gem_get_tiling tiling;
288 
289 	assert(bo);
290 
291 	if (!kgem->can_fence && kgem->gen >= 040 && bo->tiling)
292 		return; /* lies */
293 
294 	VG_CLEAR(tiling);
295 	tiling.handle = bo->handle;
296 	tiling.tiling_mode = bo->tiling;
297 	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
298 	assert(tiling.tiling_mode == bo->tiling);
299 }
300 
assert_caching(struct kgem * kgem,struct kgem_bo * bo)301 static void assert_caching(struct kgem *kgem, struct kgem_bo *bo)
302 {
303 	struct local_i915_gem_caching arg;
304 	int expect = kgem->has_llc ? SNOOPED : UNCACHED;
305 
306 	VG_CLEAR(arg);
307 	arg.handle = bo->handle;
308 	arg.caching = expect;
309 
310 	(void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg);
311 
312 	assert(arg.caching == expect);
313 }
314 
assert_bo_retired(struct kgem_bo * bo)315 static void assert_bo_retired(struct kgem_bo *bo)
316 {
317 	DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
318 	     bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
319 	assert(bo->refcnt);
320 	assert(bo->rq == NULL);
321 	assert(bo->exec == NULL);
322 	assert(!bo->needs_flush);
323 	assert(list_is_empty(&bo->request));
324 }
325 #else
326 #define assert_tiling(kgem, bo)
327 #define assert_caching(kgem, bo)
328 #define assert_bo_retired(bo)
329 #endif
330 
__find_debugfs(struct kgem * kgem)331 static int __find_debugfs(struct kgem *kgem)
332 {
333 	int i;
334 
335 	for (i = 0; i < DRM_MAX_MINOR; i++) {
336 		char path[80];
337 
338 		sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i);
339 		if (access(path, R_OK) == 0)
340 			return i;
341 
342 		sprintf(path, "/debug/dri/%d/i915_wedged", i);
343 		if (access(path, R_OK) == 0)
344 			return i;
345 	}
346 
347 	return -1;
348 }
349 
kgem_get_minor(struct kgem * kgem)350 static int kgem_get_minor(struct kgem *kgem)
351 {
352 	struct stat st;
353 
354 	if (fstat(kgem->fd, &st))
355 		return __find_debugfs(kgem);
356 
357 	if (!S_ISCHR(st.st_mode))
358 		return __find_debugfs(kgem);
359 
360 	return st.st_rdev & 0x63;
361 }
362 
find_hang_state(struct kgem * kgem,char * path,int maxlen)363 static bool find_hang_state(struct kgem *kgem, char *path, int maxlen)
364 {
365 	int minor = kgem_get_minor(kgem);
366 
367 	/* Search for our hang state in a few canonical locations.
368 	 * In the unlikely event of having multiple devices, we
369 	 * will need to check which minor actually corresponds to ours.
370 	 */
371 
372 	snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor);
373 	if (access(path, R_OK) == 0)
374 		return true;
375 
376 	snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor);
377 	if (access(path, R_OK) == 0)
378 		return true;
379 
380 	snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor);
381 	if (access(path, R_OK) == 0)
382 		return true;
383 
384 	path[0] = '\0';
385 	return false;
386 }
387 
has_error_state(struct kgem * kgem,char * path)388 static bool has_error_state(struct kgem *kgem, char *path)
389 {
390    bool ret = false;
391    char no;
392    int fd;
393 
394    fd = open(path, O_RDONLY);
395    if (fd >= 0) {
396       ret = read(fd, &no, 1) == 1 && no != 'N';
397       close(fd);
398    }
399 
400    return ret;
401 }
402 
kgem_get_screen_index(struct kgem * kgem)403 static int kgem_get_screen_index(struct kgem *kgem)
404 {
405 	return __to_sna(kgem)->scrn->scrnIndex;
406 }
407 
408 static void
__kgem_set_wedged(struct kgem * kgem)409 __kgem_set_wedged(struct kgem *kgem)
410 {
411 	static int once;
412 	char path[256];
413 
414 	if (kgem->wedged)
415 		return;
416 
417 	if (!once &&
418 	    find_hang_state(kgem, path, sizeof(path)) &&
419             has_error_state(kgem, path)) {
420 		xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
421 			   "When reporting this, please include %s and the full dmesg.\n",
422 			   path);
423 		once = 1;
424 	}
425 
426 	kgem->wedged = true;
427 	sna_render_mark_wedged(__to_sna(kgem));
428 }
429 
kgem_sna_reset(struct kgem * kgem)430 static void kgem_sna_reset(struct kgem *kgem)
431 {
432 	struct sna *sna = __to_sna(kgem);
433 
434 	sna->render.reset(sna);
435 	sna->blt_state.fill_bo = 0;
436 }
437 
kgem_sna_flush(struct kgem * kgem)438 static void kgem_sna_flush(struct kgem *kgem)
439 {
440 	struct sna *sna = __to_sna(kgem);
441 
442 	sna->render.flush(sna);
443 
444 	if (sna->render.solid_cache.dirty)
445 		sna_render_flush_solid(sna);
446 }
447 
kgem_bo_rmfb(struct kgem * kgem,struct kgem_bo * bo)448 static bool kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo)
449 {
450 	if (bo->scanout && bo->delta) {
451 		DBG(("%s: releasing fb=%d for handle=%d\n",
452 		     __FUNCTION__, bo->delta, bo->handle));
453 		/* XXX will leak if we are not DRM_MASTER. *shrug* */
454 		do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta);
455 		bo->delta = 0;
456 		return true;
457 	} else
458 		return false;
459 }
460 
kgem_set_tiling(struct kgem * kgem,struct kgem_bo * bo,int tiling,int stride)461 static bool kgem_set_tiling(struct kgem *kgem, struct kgem_bo *bo,
462 			    int tiling, int stride)
463 {
464 	struct drm_i915_gem_set_tiling set_tiling;
465 	int err;
466 
467 	if (tiling == bo->tiling) {
468 		if (tiling == I915_TILING_NONE) {
469 			bo->pitch = stride;
470 			return true;
471 		}
472 		if (stride == bo->pitch)
473 			return true;
474 	}
475 
476 	if (DBG_NO_TILING)
477 		return false;
478 
479 	VG_CLEAR(set_tiling);
480 restart:
481 	set_tiling.handle = bo->handle;
482 	set_tiling.tiling_mode = tiling;
483 	set_tiling.stride = tiling ? stride : 0;
484 
485 	if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) {
486 		bo->tiling = set_tiling.tiling_mode;
487 		bo->pitch = set_tiling.tiling_mode ? set_tiling.stride : stride;
488 		DBG(("%s: handle=%d, tiling=%d [%d], pitch=%d [%d]: %d\n",
489 		     __FUNCTION__, bo->handle,
490 		     bo->tiling, tiling,
491 		     bo->pitch, stride,
492 		     set_tiling.tiling_mode == tiling));
493 		return set_tiling.tiling_mode == tiling && bo->pitch >= stride;
494 	}
495 
496 	err = errno;
497 	if (err == EINTR)
498 		goto restart;
499 
500 	if (err == EAGAIN) {
501 		sched_yield();
502 		goto restart;
503 	}
504 
505 	if (err == EBUSY && kgem_bo_rmfb(kgem, bo))
506 		goto restart;
507 
508 	ERR(("%s: failed to set-tiling(tiling=%d, pitch=%d) for handle=%d: %d\n",
509 	     __FUNCTION__, tiling, stride, bo->handle, err));
510 	return false;
511 }
512 
gem_set_caching(int fd,uint32_t handle,int caching)513 static bool gem_set_caching(int fd, uint32_t handle, int caching)
514 {
515 	struct local_i915_gem_caching arg;
516 
517 	VG_CLEAR(arg);
518 	arg.handle = handle;
519 	arg.caching = caching;
520 	return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
521 }
522 
gem_userptr(int fd,void * ptr,size_t size,int read_only)523 static uint32_t gem_userptr(int fd, void *ptr, size_t size, int read_only)
524 {
525 	struct local_i915_gem_userptr arg;
526 
527 	VG_CLEAR(arg);
528 	arg.user_ptr = (uintptr_t)ptr;
529 	arg.user_size = size;
530 	arg.flags = I915_USERPTR_UNSYNCHRONIZED;
531 	if (read_only)
532 		arg.flags |= I915_USERPTR_READ_ONLY;
533 
534 	if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
535 	    do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
536 		arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
537 		if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
538 			DBG(("%s: failed to map %p + %d bytes: %d\n",
539 			     __FUNCTION__, ptr, size, errno));
540 			return 0;
541 		}
542 	}
543 
544 	return arg.handle;
545 }
546 
__kgem_throttle(struct kgem * kgem,bool harder)547 static bool __kgem_throttle(struct kgem *kgem, bool harder)
548 {
549 	/* Let this be woken up by sigtimer so that we don't block here
550 	 * too much and completely starve X. We will sleep again shortly,
551 	 * and so catch up or detect the hang.
552 	 */
553 	do {
554 		if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) {
555 			kgem->need_throttle = 0;
556 			return false;
557 		}
558 
559 		if (errno == EIO)
560 			return true;
561 	} while (harder);
562 
563 	return false;
564 }
565 
__kgem_throttle_retire(struct kgem * kgem,unsigned flags)566 static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
567 {
568 	if (flags & CREATE_NO_RETIRE || !kgem->need_retire) {
569 		DBG(("%s: not retiring\n", __FUNCTION__));
570 		return false;
571 	}
572 
573 	if (kgem_retire(kgem))
574 		return true;
575 
576 	if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
577 		DBG(("%s: not throttling\n", __FUNCTION__));
578 		return false;
579 	}
580 
581 	__kgem_throttle(kgem, false);
582 	return kgem_retire(kgem);
583 }
584 
__kgem_bo_map__gtt(struct kgem * kgem,struct kgem_bo * bo)585 static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
586 {
587 	struct drm_i915_gem_mmap_gtt gtt;
588 	void *ptr;
589 	int err;
590 
591 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
592 	     bo->handle, bytes(bo)));
593 
594 	if (bo->tiling && !kgem->can_fence)
595 		return NULL;
596 
597 	VG_CLEAR(gtt);
598 retry_gtt:
599 	gtt.handle = bo->handle;
600 	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt))) {
601 		DBG(("%s: failed %d, throttling/cleaning caches\n",
602 		     __FUNCTION__, err));
603 		assert(err != EINVAL);
604 
605 		(void)__kgem_throttle_retire(kgem, 0);
606 		if (kgem_expire_cache(kgem))
607 			goto retry_gtt;
608 
609 		if (kgem_cleanup_cache(kgem))
610 			goto retry_gtt;
611 
612 		ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n",
613 		     __FUNCTION__, bo->handle, -err));
614 		return NULL;
615 	}
616 
617 retry_mmap:
618 	ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
619 		   kgem->fd, gtt.offset);
620 	if (ptr == MAP_FAILED) {
621 		err = errno;
622 		DBG(("%s: failed %d, throttling/cleaning caches\n",
623 		     __FUNCTION__, err));
624 		assert(err != EINVAL);
625 
626 		if (__kgem_throttle_retire(kgem, 0))
627 			goto retry_mmap;
628 
629 		if (kgem_cleanup_cache(kgem))
630 			goto retry_mmap;
631 
632 		ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n",
633 		     __FUNCTION__, bo->handle, bytes(bo), err));
634 		ptr = NULL;
635 	}
636 
637 	/* Cache this mapping to avoid the overhead of an
638 	 * excruciatingly slow GTT pagefault. This is more an
639 	 * issue with compositing managers which need to
640 	 * frequently flush CPU damage to their GPU bo.
641 	 */
642 	return bo->map__gtt = ptr;
643 }
644 
__kgem_bo_map__wc(struct kgem * kgem,struct kgem_bo * bo)645 static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
646 {
647 	struct local_i915_gem_mmap2 wc;
648 	int err;
649 
650 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
651 	     bo->handle, bytes(bo)));
652 	assert(kgem->has_wc_mmap);
653 
654 	VG_CLEAR(wc);
655 
656 retry_wc:
657 	wc.handle = bo->handle;
658 	wc.offset = 0;
659 	wc.size = bytes(bo);
660 	wc.flags = I915_MMAP_WC;
661 	if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
662 		DBG(("%s: failed %d, throttling/cleaning caches\n",
663 		     __FUNCTION__, err));
664 		assert(err != EINVAL);
665 
666 		if (__kgem_throttle_retire(kgem, 0))
667 			goto retry_wc;
668 
669 		if (kgem_cleanup_cache(kgem))
670 			goto retry_wc;
671 
672 		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
673 		     __FUNCTION__, bo->handle, bytes(bo), -err));
674 		return NULL;
675 	}
676 
677 	VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
678 
679 	DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
680 	return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
681 }
682 
__kgem_bo_map__cpu(struct kgem * kgem,struct kgem_bo * bo)683 static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
684 {
685 	struct local_i915_gem_mmap arg;
686 	int err;
687 
688 	VG_CLEAR(arg);
689 	arg.offset = 0;
690 
691 retry:
692 	arg.handle = bo->handle;
693 	arg.size = bytes(bo);
694 	if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) {
695 		DBG(("%s: failed %d, throttling/cleaning caches\n",
696 		     __FUNCTION__, err));
697 		assert(err != -EINVAL || bo->prime);
698 
699 		if (__kgem_throttle_retire(kgem, 0))
700 			goto retry;
701 
702 		if (kgem_cleanup_cache(kgem))
703 			goto retry;
704 
705 		ERR(("%s: failed to mmap handle=%d (prime? %d), %d bytes, into CPU domain: %d\n",
706 		     __FUNCTION__, bo->handle, bo->prime, bytes(bo), -err));
707 		bo->purged = 1;
708 		return NULL;
709 	}
710 
711 	VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo)));
712 
713 	DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
714 	return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr;
715 }
716 
gem_write(int fd,uint32_t handle,int offset,int length,const void * src)717 static int gem_write(int fd, uint32_t handle,
718 		     int offset, int length,
719 		     const void *src)
720 {
721 	struct drm_i915_gem_pwrite pwrite;
722 
723 	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
724 	     handle, offset, length));
725 
726 	VG_CLEAR(pwrite);
727 	pwrite.handle = handle;
728 	pwrite.offset = offset;
729 	pwrite.size = length;
730 	pwrite.data_ptr = (uintptr_t)src;
731 	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
732 }
733 
gem_write__cachealigned(int fd,uint32_t handle,int offset,int length,const void * src)734 static int gem_write__cachealigned(int fd, uint32_t handle,
735 				   int offset, int length,
736 				   const void *src)
737 {
738 	struct drm_i915_gem_pwrite pwrite;
739 
740 	DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
741 	     handle, offset, length));
742 
743 	VG_CLEAR(pwrite);
744 	pwrite.handle = handle;
745 	/* align the transfer to cachelines; fortuitously this is safe! */
746 	if ((offset | length) & 63) {
747 		pwrite.offset = offset & ~63;
748 		pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
749 		pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
750 	} else {
751 		pwrite.offset = offset;
752 		pwrite.size = length;
753 		pwrite.data_ptr = (uintptr_t)src;
754 	}
755 	return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
756 }
757 
gem_read(int fd,uint32_t handle,const void * dst,int offset,int length)758 static int gem_read(int fd, uint32_t handle, const void *dst,
759 		    int offset, int length)
760 {
761 	struct drm_i915_gem_pread pread;
762 	int ret;
763 
764 	DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__,
765 	     handle, length));
766 
767 	VG_CLEAR(pread);
768 	pread.handle = handle;
769 	pread.offset = offset;
770 	pread.size = length;
771 	pread.data_ptr = (uintptr_t)dst;
772 	ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
773 	if (ret) {
774 		DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret));
775 		return ret;
776 	}
777 
778 	VG(VALGRIND_MAKE_MEM_DEFINED(dst, length));
779 	return 0;
780 }
781 
__kgem_busy(struct kgem * kgem,int handle)782 bool __kgem_busy(struct kgem *kgem, int handle)
783 {
784 	struct drm_i915_gem_busy busy;
785 
786 	VG_CLEAR(busy);
787 	busy.handle = handle;
788 	busy.busy = !kgem->wedged;
789 	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
790 	DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
791 	     __FUNCTION__, handle, busy.busy, kgem->wedged));
792 
793 	return busy.busy;
794 }
795 
kgem_bo_retire(struct kgem * kgem,struct kgem_bo * bo)796 static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
797 {
798 	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
799 	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
800 	     __kgem_busy(kgem, bo->handle)));
801 	assert(bo->exec == NULL);
802 	assert(list_is_empty(&bo->vma));
803 
804 	if (bo->rq)
805 		__kgem_retire_requests_upto(kgem, bo);
806 	ASSERT_IDLE(kgem, bo->handle);
807 	assert_bo_retired(bo);
808 }
809 
kgem_bo_maybe_retire(struct kgem * kgem,struct kgem_bo * bo)810 static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo)
811 {
812 	DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
813 	     __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
814 	     __kgem_busy(kgem, bo->handle)));
815 	assert(bo->exec == NULL);
816 	assert(list_is_empty(&bo->vma));
817 
818 	if (bo->rq) {
819 		if (!__kgem_busy(kgem, bo->handle))
820 			__kgem_retire_requests_upto(kgem, bo);
821 	} else {
822 		assert(!bo->needs_flush);
823 		ASSERT_IDLE(kgem, bo->handle);
824 	}
825 }
826 
kgem_bo_write(struct kgem * kgem,struct kgem_bo * bo,const void * data,int length)827 bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
828 		   const void *data, int length)
829 {
830 	void *ptr;
831 	int err;
832 
833 	assert(bo->refcnt);
834 	assert(bo->proxy == NULL);
835 	ASSERT_IDLE(kgem, bo->handle);
836 
837 	assert(length <= bytes(bo));
838 retry:
839 	ptr = NULL;
840 	if (bo->domain == DOMAIN_CPU || (kgem->has_llc && !bo->scanout)) {
841 		ptr = bo->map__cpu;
842 		if (ptr == NULL)
843 			ptr = __kgem_bo_map__cpu(kgem, bo);
844 	} else if (kgem->has_wc_mmap) {
845 		ptr = bo->map__wc;
846 		if (ptr == NULL)
847 			ptr = __kgem_bo_map__wc(kgem, bo);
848 	}
849 	if (ptr) {
850 		/* XXX unsynchronized? */
851 		memcpy(ptr, data, length);
852 		return true;
853 	}
854 
855 	if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) {
856 		DBG(("%s: failed %d, throttling/cleaning caches\n",
857 		     __FUNCTION__, err));
858 		assert(err != EINVAL);
859 
860 		(void)__kgem_throttle_retire(kgem, 0);
861 		if (kgem_expire_cache(kgem))
862 			goto retry;
863 
864 		if (kgem_cleanup_cache(kgem))
865 			goto retry;
866 
867 		ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n",
868 		     __FUNCTION__, length, bo->handle, -err));
869 		return false;
870 	}
871 
872 	DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
873 	if (bo->exec == NULL)
874 		kgem_bo_maybe_retire(kgem, bo);
875 	bo->domain = DOMAIN_NONE;
876 	bo->gtt_dirty = true;
877 	return true;
878 }
879 
gem_create(int fd,int num_pages)880 static uint32_t gem_create(int fd, int num_pages)
881 {
882 	struct drm_i915_gem_create create;
883 
884 	VG_CLEAR(create);
885 	create.handle = 0;
886 	create.size = PAGE_SIZE * num_pages;
887 	(void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
888 
889 	return create.handle;
890 }
891 
892 static void
kgem_bo_set_purgeable(struct kgem * kgem,struct kgem_bo * bo)893 kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
894 {
895 #if !DBG_NO_MADV
896 	struct drm_i915_gem_madvise madv;
897 
898 	assert(bo->exec == NULL);
899 
900 	VG_CLEAR(madv);
901 	madv.handle = bo->handle;
902 	madv.madv = I915_MADV_DONTNEED;
903 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
904 		bo->purged = true;
905 		kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU;
906 	}
907 #endif
908 }
909 
910 static bool
kgem_bo_is_retained(struct kgem * kgem,struct kgem_bo * bo)911 kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
912 {
913 #if DBG_NO_MADV
914 	return true;
915 #else
916 	struct drm_i915_gem_madvise madv;
917 
918 	if (!bo->purged)
919 		return true;
920 
921 	VG_CLEAR(madv);
922 	madv.handle = bo->handle;
923 	madv.madv = I915_MADV_DONTNEED;
924 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
925 		return madv.retained;
926 
927 	return false;
928 #endif
929 }
930 
931 static bool
kgem_bo_clear_purgeable(struct kgem * kgem,struct kgem_bo * bo)932 kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
933 {
934 #if DBG_NO_MADV
935 	return true;
936 #else
937 	struct drm_i915_gem_madvise madv;
938 
939 	assert(bo->purged);
940 
941 	VG_CLEAR(madv);
942 	madv.handle = bo->handle;
943 	madv.madv = I915_MADV_WILLNEED;
944 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
945 		bo->purged = !madv.retained;
946 		kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU;
947 		return madv.retained;
948 	}
949 
950 	return false;
951 #endif
952 }
953 
gem_close(int fd,uint32_t handle)954 static void gem_close(int fd, uint32_t handle)
955 {
956 	struct drm_gem_close close;
957 
958 	VG_CLEAR(close);
959 	close.handle = handle;
960 	(void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
961 }
962 
__fls(unsigned long word)963 constant inline static unsigned long __fls(unsigned long word)
964 {
965 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
966 	asm("bsr %1,%0"
967 	    : "=r" (word)
968 	    : "rm" (word));
969 	return word;
970 #else
971 	unsigned int v = 0;
972 
973 	while (word >>= 1)
974 		v++;
975 
976 	return v;
977 #endif
978 }
979 
cache_bucket(int num_pages)980 constant inline static int cache_bucket(int num_pages)
981 {
982 	return __fls(num_pages);
983 }
984 
__kgem_bo_init(struct kgem_bo * bo,int handle,int num_pages)985 static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
986 				      int handle, int num_pages)
987 {
988 	DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages));
989 
990 	assert(num_pages);
991 	memset(bo, 0, sizeof(*bo));
992 
993 	bo->refcnt = 1;
994 	bo->handle = handle;
995 	bo->target_handle = -1;
996 	num_pages(bo) = num_pages;
997 	bucket(bo) = cache_bucket(num_pages);
998 	bo->reusable = true;
999 	bo->domain = DOMAIN_CPU;
1000 	list_init(&bo->request);
1001 	list_init(&bo->list);
1002 	list_init(&bo->vma);
1003 
1004 	return bo;
1005 }
1006 
__kgem_bo_alloc(int handle,int num_pages)1007 static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
1008 {
1009 	struct kgem_bo *bo;
1010 
1011 	if (__kgem_freed_bo) {
1012 		bo = __kgem_freed_bo;
1013 		__kgem_freed_bo = *(struct kgem_bo **)bo;
1014 	} else {
1015 		bo = malloc(sizeof(*bo));
1016 		if (bo == NULL)
1017 			return NULL;
1018 	}
1019 
1020 	return __kgem_bo_init(bo, handle, num_pages);
1021 }
1022 
__kgem_request_alloc(struct kgem * kgem)1023 static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
1024 {
1025 	struct kgem_request *rq;
1026 
1027 	if (unlikely(kgem->wedged)) {
1028 		rq = &kgem->static_request;
1029 	} else {
1030 		rq = __kgem_freed_request;
1031 		if (rq) {
1032 			__kgem_freed_request = *(struct kgem_request **)rq;
1033 		} else {
1034 			rq = malloc(sizeof(*rq));
1035 			if (rq == NULL)
1036 				rq = &kgem->static_request;
1037 		}
1038 	}
1039 
1040 	list_init(&rq->buffers);
1041 	rq->bo = NULL;
1042 	rq->ring = 0;
1043 
1044 	return rq;
1045 }
1046 
__kgem_request_free(struct kgem_request * rq)1047 static void __kgem_request_free(struct kgem_request *rq)
1048 {
1049 	_list_del(&rq->list);
1050 	if (DBG_NO_MALLOC_CACHE) {
1051 		free(rq);
1052 	} else {
1053 		*(struct kgem_request **)rq = __kgem_freed_request;
1054 		__kgem_freed_request = rq;
1055 	}
1056 }
1057 
inactive(struct kgem * kgem,int num_pages)1058 static struct list *inactive(struct kgem *kgem, int num_pages)
1059 {
1060 	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
1061 	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
1062 	return &kgem->inactive[cache_bucket(num_pages)];
1063 }
1064 
active(struct kgem * kgem,int num_pages,int tiling)1065 static struct list *active(struct kgem *kgem, int num_pages, int tiling)
1066 {
1067 	assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
1068 	assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
1069 	return &kgem->active[cache_bucket(num_pages)][tiling];
1070 }
1071 
1072 static size_t
agp_aperture_size(struct pci_device * dev,unsigned gen)1073 agp_aperture_size(struct pci_device *dev, unsigned gen)
1074 {
1075 	/* XXX assume that only future chipsets are unknown and follow
1076 	 * the post gen2 PCI layout.
1077 	 */
1078 	return dev->regions[gen < 030 ? 0 : 2].size;
1079 }
1080 
1081 static size_t
total_ram_size(void)1082 total_ram_size(void)
1083 {
1084 #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
1085 	struct sysinfo info;
1086 	if (sysinfo(&info) == 0)
1087 		return (size_t)info.totalram * info.mem_unit;
1088 #endif
1089 
1090 #ifdef _SC_PHYS_PAGES
1091 	 return (size_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE);
1092 #endif
1093 
1094 	return 0;
1095 }
1096 
1097 static unsigned
cpu_cache_size__cpuid4(void)1098 cpu_cache_size__cpuid4(void)
1099 {
1100 	/* Deterministic Cache Parameters (Function 04h)":
1101 	 *    When EAX is initialized to a value of 4, the CPUID instruction
1102 	 *    returns deterministic cache information in the EAX, EBX, ECX
1103 	 *    and EDX registers.  This function requires ECX be initialized
1104 	 *    with an index which indicates which cache to return information
1105 	 *    about. The OS is expected to call this function (CPUID.4) with
1106 	 *    ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
1107 	 *    The order in which the caches are returned is not specified
1108 	 *    and may change at Intel's discretion.
1109 	 *
1110 	 * Calculating the Cache Size in bytes:
1111 	 *          = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
1112 	 */
1113 
1114 	 unsigned int eax, ebx, ecx, edx;
1115 	 unsigned int llc_size = 0;
1116 	 int cnt;
1117 
1118 	 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
1119 		 return 0;
1120 
1121 	 cnt = 0;
1122 	 do {
1123 		 unsigned associativity, line_partitions, line_size, sets;
1124 
1125 		 __cpuid_count(4, cnt++, eax, ebx, ecx, edx);
1126 
1127 		 if ((eax & 0x1f) == 0)
1128 			 break;
1129 
1130 		 associativity = ((ebx >> 22) & 0x3ff) + 1;
1131 		 line_partitions = ((ebx >> 12) & 0x3ff) + 1;
1132 		 line_size = (ebx & 0xfff) + 1;
1133 		 sets = ecx + 1;
1134 
1135 		 llc_size = associativity * line_partitions * line_size * sets;
1136 	 } while (1);
1137 
1138 	 return llc_size;
1139 }
1140 
1141 static unsigned
cpu_cache_size(void)1142 cpu_cache_size(void)
1143 {
1144 	unsigned size;
1145 	FILE *file;
1146 
1147 	size = cpu_cache_size__cpuid4();
1148 	if (size)
1149 		return size;
1150 
1151 	file = fopen("/proc/cpuinfo", "r");
1152 	if (file) {
1153 		size_t len = 0;
1154 		char *line = NULL;
1155 		while (getline(&line, &len, file) != -1) {
1156 			int kb;
1157 			if (sscanf(line, "cache size : %d KB", &kb) == 1) {
1158 				/* Paranoid check against gargantuan caches */
1159 				if (kb <= 1<<20)
1160 					size = kb * 1024;
1161 				break;
1162 			}
1163 		}
1164 		free(line);
1165 		fclose(file);
1166 	}
1167 
1168 	if (size == 0)
1169 		size = 64 * 1024;
1170 
1171 	return size;
1172 }
1173 
gem_param(struct kgem * kgem,int name)1174 static int gem_param(struct kgem *kgem, int name)
1175 {
1176 	drm_i915_getparam_t gp;
1177 	int v = -1; /* No param uses the sign bit, reserve it for errors */
1178 
1179 	VG_CLEAR(gp);
1180 	gp.param = name;
1181 	gp.value = &v;
1182 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
1183 		return -1;
1184 
1185 	VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
1186 	return v;
1187 }
1188 
test_has_execbuffer2(struct kgem * kgem)1189 static bool test_has_execbuffer2(struct kgem *kgem)
1190 {
1191 	struct drm_i915_gem_execbuffer2 execbuf;
1192 	int ret;
1193 
1194 	memset(&execbuf, 0, sizeof(execbuf));
1195 	execbuf.buffer_count = 1;
1196 
1197 	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
1198 #ifdef __FreeBSD__
1199 	/* XXX FreeBSD returns ENOENT instead of EFAULT. */
1200 	if (ret == -ENOENT)
1201 		return true;
1202 #endif
1203 	return ret == -EFAULT;
1204 }
1205 
test_has_no_reloc(struct kgem * kgem)1206 static bool test_has_no_reloc(struct kgem *kgem)
1207 {
1208 	if (DBG_NO_FAST_RELOC)
1209 		return false;
1210 
1211 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
1212 }
1213 
test_has_handle_lut(struct kgem * kgem)1214 static bool test_has_handle_lut(struct kgem *kgem)
1215 {
1216 	if (DBG_NO_HANDLE_LUT)
1217 		return false;
1218 
1219 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
1220 }
1221 
test_has_wt(struct kgem * kgem)1222 static bool test_has_wt(struct kgem *kgem)
1223 {
1224 	if (DBG_NO_WT)
1225 		return false;
1226 
1227 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
1228 }
1229 
test_has_semaphores_enabled(struct kgem * kgem)1230 static bool test_has_semaphores_enabled(struct kgem *kgem)
1231 {
1232 	FILE *file;
1233 	bool detected = false;
1234 	int ret;
1235 
1236 	if (DBG_NO_SEMAPHORES)
1237 		return false;
1238 
1239 	ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
1240 	if (ret != -1)
1241 		return ret > 0;
1242 
1243 	file = fopen("/sys/module/i915/parameters/semaphores", "r");
1244 	if (file) {
1245 		int value;
1246 		if (fscanf(file, "%d", &value) == 1)
1247 			detected = value != 0;
1248 		fclose(file);
1249 	}
1250 
1251 	return detected;
1252 }
1253 
is_hw_supported(struct kgem * kgem,struct pci_device * dev)1254 static bool is_hw_supported(struct kgem *kgem,
1255 			    struct pci_device *dev)
1256 {
1257 	if (DBG_NO_HW)
1258 		return false;
1259 
1260 	if (!test_has_execbuffer2(kgem))
1261 		return false;
1262 
1263 	if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
1264 		return kgem->has_blt;
1265 
1266 	/* Although pre-855gm the GMCH is fubar, it works mostly. So
1267 	 * let the user decide through "NoAccel" whether or not to risk
1268 	 * hw acceleration.
1269 	 */
1270 
1271 	if (kgem->gen == 060 && dev && dev->revision < 8) {
1272 		/* pre-production SNB with dysfunctional BLT */
1273 		return false;
1274 	}
1275 
1276 	if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
1277 		return kgem->has_blt;
1278 
1279 	return true;
1280 }
1281 
test_has_relaxed_fencing(struct kgem * kgem)1282 static bool test_has_relaxed_fencing(struct kgem *kgem)
1283 {
1284 	if (kgem->gen < 040) {
1285 		if (DBG_NO_RELAXED_FENCING)
1286 			return false;
1287 
1288 		return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
1289 	} else
1290 		return true;
1291 }
1292 
test_has_coherent_mmap_gtt(struct kgem * kgem)1293 static bool test_has_coherent_mmap_gtt(struct kgem *kgem)
1294 {
1295 	if (DBG_NO_COHERENT_MMAP_GTT)
1296 		return false;
1297 
1298 	return gem_param(kgem, LOCAL_I915_PARAM_MMAP_GTT_COHERENT) > 0;
1299 }
1300 
test_has_llc(struct kgem * kgem)1301 static bool test_has_llc(struct kgem *kgem)
1302 {
1303 	int has_llc = -1;
1304 
1305 	if (DBG_NO_LLC)
1306 		return false;
1307 
1308 	has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
1309 	if (has_llc == -1) {
1310 		DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
1311 		has_llc = kgem->gen >= 060;
1312 	}
1313 
1314 	return has_llc;
1315 }
1316 
test_has_wc_mmap(struct kgem * kgem)1317 static bool test_has_wc_mmap(struct kgem *kgem)
1318 {
1319 	struct local_i915_gem_mmap2 wc;
1320 	bool ret;
1321 
1322 	if (DBG_NO_WC_MMAP)
1323 		return false;
1324 
1325 	/* XXX See https://bugs.freedesktop.org/show_bug.cgi?id=90841 */
1326 	if (kgem->gen < 033)
1327 		return false;
1328 
1329 	if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
1330 		return false;
1331 
1332 	VG_CLEAR(wc);
1333 	wc.handle = gem_create(kgem->fd, 1);
1334 	wc.offset = 0;
1335 	wc.size = 4096;
1336 	wc.flags = I915_MMAP_WC;
1337 	ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
1338 	gem_close(kgem->fd, wc.handle);
1339 
1340 	return ret;
1341 }
1342 
test_has_caching(struct kgem * kgem)1343 static bool test_has_caching(struct kgem *kgem)
1344 {
1345 	uint32_t handle;
1346 	bool ret;
1347 
1348 	if (DBG_NO_CACHE_LEVEL)
1349 		return false;
1350 
1351 	/* Incoherent blt and sampler hangs the GPU */
1352 	if (kgem->gen == 040)
1353 		return false;
1354 
1355 	handle = gem_create(kgem->fd, 1);
1356 	if (handle == 0)
1357 		return false;
1358 
1359 	ret = gem_set_caching(kgem->fd, handle, UNCACHED);
1360 	gem_close(kgem->fd, handle);
1361 	return ret;
1362 }
1363 
test_has_userptr(struct kgem * kgem)1364 static bool test_has_userptr(struct kgem *kgem)
1365 {
1366 	struct local_i915_gem_userptr arg;
1367 	void *ptr;
1368 
1369 	if (DBG_NO_USERPTR)
1370 		return false;
1371 
1372 	/* Incoherent blt and sampler hangs the GPU */
1373 	if (kgem->gen == 040)
1374 		return false;
1375 
1376 	if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
1377 		return false;
1378 
1379 	VG_CLEAR(arg);
1380 	arg.user_ptr = (uintptr_t)ptr;
1381 	arg.user_size = PAGE_SIZE;
1382 	arg.flags = I915_USERPTR_UNSYNCHRONIZED;
1383 
1384 	if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
1385 	    do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
1386 		arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
1387 		if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg))
1388 			arg.handle = 0;
1389 		/* Leak the userptr bo to keep the mmu_notifier alive */
1390 	} else {
1391 		gem_close(kgem->fd, arg.handle);
1392 		free(ptr);
1393 	}
1394 
1395 	return arg.handle != 0;
1396 }
1397 
test_has_create2(struct kgem * kgem)1398 static bool test_has_create2(struct kgem *kgem)
1399 {
1400 #if defined(USE_CREATE2)
1401 	struct local_i915_gem_create2 args;
1402 
1403 	if (DBG_NO_CREATE2)
1404 		return false;
1405 
1406 	memset(&args, 0, sizeof(args));
1407 	args.size = PAGE_SIZE;
1408 	args.caching = DISPLAY;
1409 	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
1410 		gem_close(kgem->fd, args.handle);
1411 
1412 	return args.handle != 0;
1413 #else
1414 	return false;
1415 #endif
1416 }
1417 
test_can_blt_y(struct kgem * kgem)1418 static bool test_can_blt_y(struct kgem *kgem)
1419 {
1420 	struct drm_i915_gem_exec_object2 object;
1421 	uint32_t batch[] = {
1422 #define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2))
1423 #define BCS_SWCTRL 0x22200
1424 #define BCS_SRC_Y (1 << 0)
1425 #define BCS_DST_Y (1 << 1)
1426 		MI_LOAD_REGISTER_IMM,
1427 		BCS_SWCTRL,
1428 		(BCS_SRC_Y | BCS_DST_Y) << 16 | (BCS_SRC_Y | BCS_DST_Y),
1429 
1430 		MI_LOAD_REGISTER_IMM,
1431 		BCS_SWCTRL,
1432 		(BCS_SRC_Y | BCS_DST_Y) << 16,
1433 
1434 		MI_BATCH_BUFFER_END,
1435 		0,
1436 	};
1437 	int ret;
1438 
1439 	if (DBG_NO_BLT_Y)
1440 		return false;
1441 
1442 	if (kgem->gen < 060)
1443 		return false;
1444 
1445 	memset(&object, 0, sizeof(object));
1446 	object.handle = gem_create(kgem->fd, 1);
1447 
1448 	ret = gem_write(kgem->fd, object.handle, 0, sizeof(batch), batch);
1449 	if (ret == 0) {
1450 		struct drm_i915_gem_execbuffer2 execbuf;
1451 
1452 		memset(&execbuf, 0, sizeof(execbuf));
1453 		execbuf.buffers_ptr = (uintptr_t)&object;
1454 		execbuf.buffer_count = 1;
1455 		execbuf.flags = KGEM_BLT;
1456 
1457 		ret = do_ioctl(kgem->fd,
1458 			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
1459 			       &execbuf);
1460 	}
1461 	gem_close(kgem->fd, object.handle);
1462 
1463 	return ret == 0;
1464 }
1465 
gem_set_tiling(int fd,uint32_t handle,int tiling,int stride)1466 static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
1467 {
1468 	struct drm_i915_gem_set_tiling set_tiling;
1469 
1470 	if (DBG_NO_TILING)
1471 		return false;
1472 
1473 	VG_CLEAR(set_tiling);
1474 	set_tiling.handle = handle;
1475 	set_tiling.tiling_mode = tiling;
1476 	set_tiling.stride = stride;
1477 
1478 	if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0)
1479 		return set_tiling.tiling_mode == tiling;
1480 
1481 	return false;
1482 }
1483 
test_can_scanout_y(struct kgem * kgem)1484 static bool test_can_scanout_y(struct kgem *kgem)
1485 {
1486 	struct drm_mode_fb_cmd arg;
1487 	bool ret = false;
1488 
1489 	if (DBG_NO_SCANOUT_Y)
1490 		return false;
1491 
1492 	VG_CLEAR(arg);
1493 	arg.width = 32;
1494 	arg.height = 32;
1495 	arg.pitch = 4*32;
1496 	arg.bpp = 32;
1497 	arg.depth = 24;
1498 	arg.handle = gem_create(kgem->fd, 1);
1499 
1500 	if (gem_set_tiling(kgem->fd, arg.handle, I915_TILING_Y, arg.pitch))
1501 		ret = do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0;
1502 	if (!ret) {
1503 		struct local_mode_fb_cmd2 {
1504 			uint32_t fb_id;
1505 			uint32_t width, height;
1506 			uint32_t pixel_format;
1507 			uint32_t flags;
1508 
1509 			uint32_t handles[4];
1510 			uint32_t pitches[4];
1511 			uint32_t offsets[4];
1512 			uint64_t modifiers[4];
1513 		} f;
1514 #define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2)
1515 		memset(&f, 0, sizeof(f));
1516 		f.width = arg.width;
1517 		f.height = arg.height;
1518 		f.handles[0] = arg.handle;
1519 		f.pitches[0] = arg.pitch;
1520 		f.modifiers[0] = (uint64_t)1 << 56 | 2; /* MOD_Y_TILED */
1521 		f.pixel_format = 'X' | 'R' << 8 | '2' << 16 | '4' << 24; /* XRGB8888 */
1522 		f.flags = 1 << 1; /* + modifier */
1523 		if (drmIoctl(kgem->fd, LOCAL_IOCTL_MODE_ADDFB2, &f) == 0) {
1524 			ret = true;
1525 			arg.fb_id = f.fb_id;
1526 		}
1527 	}
1528 	do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &arg.fb_id);
1529 	gem_close(kgem->fd, arg.handle);
1530 
1531 	return ret;
1532 }
1533 
test_has_dirtyfb(struct kgem * kgem)1534 static bool test_has_dirtyfb(struct kgem *kgem)
1535 {
1536 	struct drm_mode_fb_cmd create;
1537 	bool ret = false;
1538 
1539 	if (DBG_NO_DIRTYFB)
1540 		return false;
1541 
1542 	VG_CLEAR(create);
1543 	create.width = 32;
1544 	create.height = 32;
1545 	create.pitch = 4*32;
1546 	create.bpp = 32;
1547 	create.depth = 24; /* {bpp:32, depth:24} -> x8r8g8b8 */
1548 	create.handle = gem_create(kgem->fd, 1);
1549 	if (create.handle == 0)
1550 		return false;
1551 
1552 	if (drmIoctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &create) == 0) {
1553 		struct drm_mode_fb_dirty_cmd dirty;
1554 
1555 		memset(&dirty, 0, sizeof(dirty));
1556 		dirty.fb_id = create.fb_id;
1557 		ret = drmIoctl(kgem->fd,
1558 			       DRM_IOCTL_MODE_DIRTYFB,
1559 			       &dirty) == 0;
1560 
1561 		/* XXX There may be multiple levels of DIRTYFB, depending on
1562 		 * whether the kernel thinks tracking dirty regions is
1563 		 * beneficial vs flagging the whole fb as dirty.
1564 		 */
1565 
1566 		drmIoctl(kgem->fd,
1567 			 DRM_IOCTL_MODE_RMFB,
1568 			 &create.fb_id);
1569 	}
1570 	gem_close(kgem->fd, create.handle);
1571 
1572 	return ret;
1573 }
1574 
test_has_secure_batches(struct kgem * kgem)1575 static bool test_has_secure_batches(struct kgem *kgem)
1576 {
1577 	if (DBG_NO_SECURE_BATCHES)
1578 		return false;
1579 
1580 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
1581 }
1582 
test_has_pinned_batches(struct kgem * kgem)1583 static bool test_has_pinned_batches(struct kgem *kgem)
1584 {
1585 	if (DBG_NO_PINNED_BATCHES)
1586 		return false;
1587 
1588 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
1589 }
1590 
kgem_init_pinned_batches(struct kgem * kgem)1591 static bool kgem_init_pinned_batches(struct kgem *kgem)
1592 {
1593 	int count[2] = { 16, 4 };
1594 	int size[2] = { 1, 4 };
1595 	int ret = 0;
1596 	int n, i;
1597 
1598 	if (unlikely(kgem->wedged))
1599 		return true;
1600 
1601 	for (n = 0; n < ARRAY_SIZE(count); n++) {
1602 		for (i = 0; i < count[n]; i++) {
1603 			struct drm_i915_gem_pin pin;
1604 			struct kgem_bo *bo;
1605 
1606 			VG_CLEAR(pin);
1607 
1608 			pin.handle = gem_create(kgem->fd, size[n]);
1609 			if (pin.handle == 0)
1610 				goto err;
1611 
1612 			DBG(("%s: new handle=%d, num_pages=%d\n",
1613 			     __FUNCTION__, pin.handle, size[n]));
1614 
1615 			bo = __kgem_bo_alloc(pin.handle, size[n]);
1616 			if (bo == NULL) {
1617 				gem_close(kgem->fd, pin.handle);
1618 				goto err;
1619 			}
1620 
1621 			pin.alignment = 0;
1622 			ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin);
1623 			if (ret) {
1624 				gem_close(kgem->fd, pin.handle);
1625 				free(bo);
1626 				goto err;
1627 			}
1628 			bo->presumed_offset = pin.offset;
1629 			debug_alloc__bo(kgem, bo);
1630 			list_add(&bo->list, &kgem->pinned_batches[n]);
1631 		}
1632 	}
1633 
1634 	return true;
1635 
1636 err:
1637 	for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
1638 		while (!list_is_empty(&kgem->pinned_batches[n])) {
1639 			kgem_bo_destroy(kgem,
1640 					list_first_entry(&kgem->pinned_batches[n],
1641 							 struct kgem_bo, list));
1642 		}
1643 	}
1644 
1645 	/* If we fail to pin some memory for 830gm/845g, we need to disable
1646 	 * acceleration as otherwise the machine will eventually fail. However,
1647 	 * the kernel started arbitrarily rejecting PIN, so hope for the best
1648 	 * if the ioctl no longer works.
1649 	 */
1650 	if (ret != -ENODEV && kgem->gen == 020)
1651 		return false;
1652 
1653 	kgem->has_pinned_batches = false;
1654 
1655 	/* For simplicity populate the lists with a single unpinned bo */
1656 	for (n = 0; n < ARRAY_SIZE(count); n++) {
1657 		struct kgem_bo *bo;
1658 		uint32_t handle;
1659 
1660 		handle = gem_create(kgem->fd, size[n]);
1661 		if (handle == 0)
1662 			return false;
1663 
1664 		bo = __kgem_bo_alloc(handle, size[n]);
1665 		if (bo == NULL) {
1666 			gem_close(kgem->fd, handle);
1667 			return false;
1668 		}
1669 
1670 		debug_alloc__bo(kgem, bo);
1671 		list_add(&bo->list, &kgem->pinned_batches[n]);
1672 	}
1673 	return true;
1674 }
1675 
kgem_init_swizzling(struct kgem * kgem)1676 static void kgem_init_swizzling(struct kgem *kgem)
1677 {
1678 	struct local_i915_gem_get_tiling_v2 {
1679 		uint32_t handle;
1680 		uint32_t tiling_mode;
1681 		uint32_t swizzle_mode;
1682 		uint32_t phys_swizzle_mode;
1683 	} tiling;
1684 #define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2)
1685 
1686 	memset(&tiling, 0, sizeof(tiling));
1687 	tiling.handle = gem_create(kgem->fd, 1);
1688 	if (!tiling.handle)
1689 		return;
1690 
1691 	if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
1692 		goto out;
1693 
1694 	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling))
1695 		goto out;
1696 
1697 	DBG(("%s: swizzle_mode=%d, phys_swizzle_mode=%d\n",
1698 	     __FUNCTION__, tiling.swizzle_mode, tiling.phys_swizzle_mode));
1699 
1700 	kgem->can_fence =
1701 		!DBG_NO_TILING &&
1702 		tiling.swizzle_mode != I915_BIT_6_SWIZZLE_UNKNOWN;
1703 
1704 	if (kgem->gen < 050 && tiling.phys_swizzle_mode != tiling.swizzle_mode)
1705 		goto out;
1706 
1707 	if (!DBG_NO_DETILING)
1708 		choose_memcpy_tiled_x(kgem,
1709 				      tiling.swizzle_mode,
1710 				      __to_sna(kgem)->cpu_features);
1711 out:
1712 	gem_close(kgem->fd, tiling.handle);
1713 	DBG(("%s: can fence?=%d\n", __FUNCTION__, kgem->can_fence));
1714 }
1715 
kgem_fixup_relocs(struct kgem * kgem,struct kgem_bo * bo,int shrink)1716 static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink)
1717 {
1718 	int n;
1719 
1720 	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
1721 
1722 	assert(kgem->nreloc__self <= 256);
1723 	if (kgem->nreloc__self == 0)
1724 		return;
1725 
1726 	DBG(("%s: fixing up %d%s self-relocations to handle=%p, presumed-offset=%llx\n",
1727 	     __FUNCTION__, kgem->nreloc__self,
1728 	     kgem->nreloc__self == 256 ? "+" : "",
1729 	     bo->handle, (long long)bo->presumed_offset));
1730 	for (n = 0; n < kgem->nreloc__self; n++) {
1731 		int i = kgem->reloc__self[n];
1732 		uint64_t addr;
1733 
1734 		assert(kgem->reloc[i].target_handle == ~0U);
1735 		kgem->reloc[i].target_handle = bo->target_handle;
1736 		kgem->reloc[i].presumed_offset = bo->presumed_offset;
1737 
1738 		if (kgem->reloc[i].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1739 			DBG(("%s: moving base of self-reloc[%d:%d] %d -> %d\n",
1740 			     __FUNCTION__, n, i,
1741 			     kgem->reloc[i].delta,
1742 			     kgem->reloc[i].delta - shrink));
1743 
1744 			kgem->reloc[i].delta -= shrink;
1745 		}
1746 		addr = (int)kgem->reloc[i].delta + bo->presumed_offset;
1747 		kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = addr;
1748 		if (kgem->gen >= 0100)
1749 			kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t) + 1] = addr >> 32;
1750 	}
1751 
1752 	if (n == 256) {
1753 		for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
1754 			if (kgem->reloc[n].target_handle == ~0U) {
1755 				uint64_t addr;
1756 
1757 				kgem->reloc[n].target_handle = bo->target_handle;
1758 				kgem->reloc[n].presumed_offset = bo->presumed_offset;
1759 
1760 				if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1761 					DBG(("%s: moving base of reloc[%d] %d -> %d\n",
1762 					     __FUNCTION__, n,
1763 					     kgem->reloc[n].delta,
1764 					     kgem->reloc[n].delta - shrink));
1765 					kgem->reloc[n].delta -= shrink;
1766 				}
1767 
1768 				addr = (int)kgem->reloc[n].delta + bo->presumed_offset;
1769 				kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = addr;
1770 				if (kgem->gen >= 0100)
1771 					kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t) + 1] = addr >> 32;
1772 			}
1773 		}
1774 	}
1775 
1776 	if (shrink) {
1777 		DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink));
1778 		for (n = 0; n < kgem->nreloc; n++) {
1779 			if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
1780 				kgem->reloc[n].offset -= shrink;
1781 		}
1782 	}
1783 }
1784 
kgem_bo_wait(struct kgem * kgem,struct kgem_bo * bo)1785 static int kgem_bo_wait(struct kgem *kgem, struct kgem_bo *bo)
1786 {
1787 	struct local_i915_gem_wait {
1788 		uint32_t handle;
1789 		uint32_t flags;
1790 		int64_t timeout;
1791 	} wait;
1792 #define LOCAL_I915_GEM_WAIT       0x2c
1793 #define LOCAL_IOCTL_I915_GEM_WAIT         DRM_IOWR(DRM_COMMAND_BASE + LOCAL_I915_GEM_WAIT, struct local_i915_gem_wait)
1794 	int ret;
1795 
1796 	DBG(("%s: waiting for handle=%d\n", __FUNCTION__, bo->handle));
1797 	if (bo->rq == NULL)
1798 		return 0;
1799 
1800 	VG_CLEAR(wait);
1801 	wait.handle = bo->handle;
1802 	wait.flags = 0;
1803 	wait.timeout = -1;
1804 	ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_WAIT, &wait);
1805 	if (ret) {
1806 		struct drm_i915_gem_set_domain set_domain;
1807 
1808 		VG_CLEAR(set_domain);
1809 		set_domain.handle = bo->handle;
1810 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1811 		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1812 		ret = do_ioctl(kgem->fd,
1813 			       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1814 			       &set_domain);
1815 	}
1816 
1817 	if (ret == 0)
1818 		__kgem_retire_requests_upto(kgem, bo);
1819 
1820 	return ret;
1821 }
1822 
kgem_new_batch(struct kgem * kgem)1823 static struct kgem_bo *kgem_new_batch(struct kgem *kgem)
1824 {
1825 	struct kgem_bo *last;
1826 	unsigned flags;
1827 
1828 	last = kgem->batch_bo;
1829 	if (last) {
1830 		kgem_fixup_relocs(kgem, last, 0);
1831 		kgem->batch = NULL;
1832 	}
1833 
1834 	if (kgem->batch) {
1835 		assert(last == NULL);
1836 		return NULL;
1837 	}
1838 
1839 	flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE;
1840 	if (!kgem->has_llc)
1841 		flags |= CREATE_UNCACHED;
1842 
1843 restart:
1844 	kgem->batch_bo = kgem_create_linear(kgem,
1845 					    sizeof(uint32_t)*kgem->batch_size,
1846 					    flags);
1847 	if (kgem->batch_bo)
1848 		kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo);
1849 	if (kgem->batch == NULL) {
1850 		int ring = kgem->ring == KGEM_BLT;
1851 		assert(ring < ARRAY_SIZE(kgem->requests));
1852 
1853 		if (kgem->batch_bo) {
1854 			kgem_bo_destroy(kgem, kgem->batch_bo);
1855 			kgem->batch_bo = NULL;
1856 		}
1857 
1858 		if (!list_is_empty(&kgem->requests[ring])) {
1859 			struct kgem_request *rq;
1860 
1861 			rq = list_first_entry(&kgem->requests[ring],
1862 					      struct kgem_request, list);
1863 			assert(rq->ring == ring);
1864 			assert(rq->bo);
1865 			assert(RQ(rq->bo->rq) == rq);
1866 			if (kgem_bo_wait(kgem, rq->bo) == 0)
1867 				goto restart;
1868 		}
1869 
1870 		if (flags & CREATE_NO_THROTTLE) {
1871 			flags &= ~CREATE_NO_THROTTLE;
1872 			if (kgem_cleanup_cache(kgem))
1873 				goto restart;
1874 		}
1875 
1876 		DBG(("%s: unable to map batch bo, mallocing(size=%d)\n",
1877 		     __FUNCTION__, sizeof(uint32_t)*kgem->batch_size));
1878 		if (posix_memalign((void **)&kgem->batch, PAGE_SIZE,
1879 				   ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) {
1880 			ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__));
1881 			__kgem_set_wedged(kgem);
1882 		}
1883 	} else {
1884 		DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n",
1885 		     __FUNCTION__, kgem->batch_bo->handle,
1886 		     sizeof(uint32_t)*kgem->batch_size));
1887 		kgem_bo_sync__cpu(kgem, kgem->batch_bo);
1888 	}
1889 
1890 	DBG(("%s: using last batch handle=%d\n",
1891 	     __FUNCTION__, last ? last->handle : 0));
1892 	return last;
1893 }
1894 
1895 static void
no_retire(struct kgem * kgem)1896 no_retire(struct kgem *kgem)
1897 {
1898 	(void)kgem;
1899 }
1900 
1901 static void
no_expire(struct kgem * kgem)1902 no_expire(struct kgem *kgem)
1903 {
1904 	(void)kgem;
1905 }
1906 
1907 static void
no_context_switch(struct kgem * kgem,int new_mode)1908 no_context_switch(struct kgem *kgem, int new_mode)
1909 {
1910 	(void)kgem;
1911 	(void)new_mode;
1912 }
1913 
get_gtt_size(int fd)1914 static uint64_t get_gtt_size(int fd)
1915 {
1916 	struct drm_i915_gem_get_aperture aperture;
1917 	struct local_i915_gem_context_param {
1918 		uint32_t context;
1919 		uint32_t size;
1920 		uint64_t param;
1921 #define LOCAL_CONTEXT_PARAM_BAN_PERIOD	0x1
1922 #define LOCAL_CONTEXT_PARAM_NO_ZEROMAP	0x2
1923 #define LOCAL_CONTEXT_PARAM_GTT_SIZE	0x3
1924 		uint64_t value;
1925 	} p;
1926 #define LOCAL_I915_GEM_CONTEXT_GETPARAM       0x34
1927 #define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param)
1928 
1929 	memset(&aperture, 0, sizeof(aperture));
1930 
1931 	memset(&p, 0, sizeof(p));
1932 	p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE;
1933 	if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0)
1934 		aperture.aper_size = p.value;
1935 	if (aperture.aper_size == 0)
1936 		(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1937 	if (aperture.aper_size == 0)
1938 		aperture.aper_size = 64*1024*1024;
1939 
1940 	DBG(("%s: aperture size %lld, available now %lld\n",
1941 	     __FUNCTION__,
1942 	     (long long)aperture.aper_size,
1943 	     (long long)aperture.aper_available_size));
1944 
1945 	/* clamp aperture to uint32_t for simplicity */
1946 	if (aperture.aper_size > 0xc0000000)
1947 		aperture.aper_size = 0xc0000000;
1948 
1949 	return aperture.aper_size;
1950 }
1951 
get_gtt_type(int fd)1952 static int get_gtt_type(int fd)
1953 {
1954         struct drm_i915_getparam p;
1955         int val = 0;
1956 
1957         memset(&p, 0, sizeof(p));
1958         p.param = I915_PARAM_HAS_ALIASING_PPGTT;
1959         p.value = &val;
1960 
1961 	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &p);
1962 	return val;
1963 }
1964 
kgem_init(struct kgem * kgem,int fd,struct pci_device * dev,unsigned gen)1965 void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
1966 {
1967 	size_t totalram;
1968 	unsigned half_gpu_max;
1969 	unsigned int i, j;
1970 	uint64_t gtt_size;
1971 
1972 	DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
1973 
1974 	kgem->fd = fd;
1975 	kgem->gen = gen;
1976 
1977 	kgem->retire = no_retire;
1978 	kgem->expire = no_expire;
1979 	kgem->context_switch = no_context_switch;
1980 
1981 	list_init(&kgem->requests[0]);
1982 	list_init(&kgem->requests[1]);
1983 	list_init(&kgem->batch_buffers);
1984 	list_init(&kgem->active_buffers);
1985 	list_init(&kgem->flushing);
1986 	list_init(&kgem->large);
1987 	list_init(&kgem->large_inactive);
1988 	list_init(&kgem->snoop);
1989 	list_init(&kgem->scanout);
1990 	for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
1991 		list_init(&kgem->pinned_batches[i]);
1992 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
1993 		list_init(&kgem->inactive[i]);
1994 	for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
1995 		for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
1996 			list_init(&kgem->active[i][j]);
1997 	}
1998 	for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
1999 		for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
2000 			list_init(&kgem->vma[i].inactive[j]);
2001 	}
2002 	kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
2003 	kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
2004 
2005 	kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
2006 	DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
2007 	     kgem->has_blt));
2008 
2009 	kgem->has_relaxed_delta =
2010 		gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
2011 	DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
2012 	     kgem->has_relaxed_delta));
2013 
2014 	kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
2015 	DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
2016 	     kgem->has_relaxed_fencing));
2017 
2018 	kgem->has_coherent_mmap_gtt = test_has_coherent_mmap_gtt(kgem);
2019 	DBG(("%s: has coherent writes into GTT maps? %d\n", __FUNCTION__,
2020 	     kgem->has_coherent_mmap_gtt));
2021 
2022 	kgem->has_llc = test_has_llc(kgem);
2023 	DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
2024 	     kgem->has_llc));
2025 
2026 	kgem->has_wt = test_has_wt(kgem);
2027 	DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
2028 	     kgem->has_wt));
2029 
2030 	kgem->has_wc_mmap = test_has_wc_mmap(kgem);
2031 	DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
2032 	     kgem->has_wc_mmap));
2033 
2034 	kgem->has_caching = test_has_caching(kgem);
2035 	DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
2036 	     kgem->has_caching));
2037 
2038 	kgem->has_userptr = test_has_userptr(kgem);
2039 	DBG(("%s: has userptr? %d\n", __FUNCTION__,
2040 	     kgem->has_userptr));
2041 
2042 	kgem->has_create2 = test_has_create2(kgem);
2043 	DBG(("%s: has create2? %d\n", __FUNCTION__,
2044 	     kgem->has_create2));
2045 
2046 	kgem->has_no_reloc = test_has_no_reloc(kgem);
2047 	DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
2048 	     kgem->has_no_reloc));
2049 
2050 	kgem->has_handle_lut = test_has_handle_lut(kgem);
2051 	DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
2052 	     kgem->has_handle_lut));
2053 
2054 	kgem->has_semaphores = false;
2055 	if (kgem->has_blt && test_has_semaphores_enabled(kgem))
2056 		kgem->has_semaphores = true;
2057 	DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
2058 	     kgem->has_semaphores));
2059 
2060 	kgem->can_blt_cpu = gen >= 030;
2061 	DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
2062 	     kgem->can_blt_cpu));
2063 
2064 	kgem->can_blt_y = test_can_blt_y(kgem);
2065 	DBG(("%s: can blit to Y-tiled surfaces? %d\n", __FUNCTION__,
2066 	     kgem->can_blt_y));
2067 
2068 	kgem->can_render_y = gen != 021 && (gen >> 3) != 4;
2069 	DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__,
2070 	     kgem->can_render_y));
2071 
2072 	kgem->can_scanout_y = test_can_scanout_y(kgem);
2073 	DBG(("%s: can scanout Y-tiled surfaces? %d\n", __FUNCTION__,
2074 	     kgem->can_scanout_y));
2075 
2076 	kgem->has_dirtyfb = test_has_dirtyfb(kgem);
2077 	DBG(("%s: has dirty fb? %d\n", __FUNCTION__, kgem->has_dirtyfb));
2078 
2079 	kgem->has_secure_batches = test_has_secure_batches(kgem);
2080 	DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
2081 	     kgem->has_secure_batches));
2082 
2083 	kgem->has_pinned_batches = test_has_pinned_batches(kgem);
2084 	DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
2085 	     kgem->has_pinned_batches));
2086 
2087 	if (!is_hw_supported(kgem, dev)) {
2088 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2089 			   "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
2090 		__kgem_set_wedged(kgem);
2091 	} else if (__kgem_throttle(kgem, false)) {
2092 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2093 			   "Detected a hung GPU, disabling acceleration.\n");
2094 		__kgem_set_wedged(kgem);
2095 	}
2096 
2097 	kgem->batch_size = UINT16_MAX & ~7;
2098 	if (gen == 020 && !kgem->has_pinned_batches)
2099 		/* Limited to what we can pin */
2100 		kgem->batch_size = 4*1024;
2101 	if (gen == 022)
2102 		/* 865g cannot handle a batch spanning multiple pages */
2103 		kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
2104 	if (gen >= 070)
2105 		kgem->batch_size = 16*1024;
2106 	if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
2107 		kgem->batch_size = 4*1024;
2108 
2109 	if (!kgem_init_pinned_batches(kgem)) {
2110 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2111 			   "Unable to reserve memory for GPU, disabling acceleration.\n");
2112 		__kgem_set_wedged(kgem);
2113 	}
2114 
2115 	DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
2116 	     kgem->batch_size));
2117 	kgem_new_batch(kgem);
2118 
2119 	kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
2120 	DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
2121 	     __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
2122 
2123 	kgem->next_request = __kgem_request_alloc(kgem);
2124 
2125 	DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
2126 	     !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
2127 	     kgem->has_llc, kgem->has_caching, kgem->has_userptr));
2128 
2129 	kgem->has_full_ppgtt = get_gtt_type(fd) > 1;
2130 
2131 	gtt_size = get_gtt_size(fd);
2132 	kgem->aperture_total = gtt_size;
2133 	kgem->aperture_high = gtt_size * 3/4;
2134 	kgem->aperture_low = gtt_size * 1/3;
2135 	if (gen < 033) {
2136 		/* Severe alignment penalties */
2137 		kgem->aperture_high /= 2;
2138 		kgem->aperture_low /= 2;
2139 	}
2140 	DBG(("%s: aperture low=%u [%u], high=%u [%u]\n", __FUNCTION__,
2141 	     kgem->aperture_low, kgem->aperture_low / (1024*1024),
2142 	     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
2143 
2144 	kgem->aperture_mappable = 256 * 1024 * 1024;
2145 	if (dev != NULL)
2146 		kgem->aperture_mappable = agp_aperture_size(dev, gen);
2147 	if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > gtt_size)
2148 		kgem->aperture_mappable = gtt_size;
2149 	DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
2150 	     kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
2151 
2152 	kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable);
2153 	DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__,
2154 	     kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024)));
2155 
2156 	kgem->buffer_size = 64 * 1024;
2157 	while (kgem->buffer_size < kgem->aperture_mappable >> 10)
2158 		kgem->buffer_size *= 2;
2159 	if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
2160 		kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
2161 	kgem->buffer_size = 1 << __fls(kgem->buffer_size);
2162 	DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
2163 	     kgem->buffer_size, kgem->buffer_size / 1024));
2164 	assert(kgem->buffer_size);
2165 
2166 	kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
2167 	kgem->max_gpu_size = kgem->max_object_size;
2168 	if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
2169 		kgem->max_gpu_size = MAX_CACHE_SIZE;
2170 
2171 	totalram = total_ram_size();
2172 	if (totalram == 0) {
2173 		DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
2174 		     __FUNCTION__));
2175 		totalram = kgem->aperture_total;
2176 	}
2177 	DBG(("%s: total ram=%lld\n", __FUNCTION__, (long long)totalram));
2178 	if (kgem->max_object_size > totalram / 2)
2179 		kgem->max_object_size = totalram / 2;
2180 	if (kgem->max_gpu_size > totalram / 4)
2181 		kgem->max_gpu_size = totalram / 4;
2182 
2183 	if (kgem->aperture_high > totalram / 2) {
2184 		kgem->aperture_high = totalram / 2;
2185 		kgem->aperture_low = kgem->aperture_high / 4;
2186 		DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__,
2187 		     kgem->aperture_low, kgem->aperture_low / (1024*1024),
2188 		     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
2189 	}
2190 
2191 	kgem->max_cpu_size = kgem->max_object_size;
2192 
2193 	half_gpu_max = kgem->max_gpu_size / 2;
2194 	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
2195 	if (kgem->max_copy_tile_size > half_gpu_max)
2196 		kgem->max_copy_tile_size = half_gpu_max;
2197 
2198 	if (kgem->has_llc)
2199 		kgem->max_upload_tile_size = kgem->max_copy_tile_size;
2200 	else
2201 		kgem->max_upload_tile_size = kgem->aperture_fenceable / 4;
2202 	if (kgem->max_upload_tile_size > half_gpu_max)
2203 		kgem->max_upload_tile_size = half_gpu_max;
2204 	if (kgem->max_upload_tile_size > kgem->aperture_high/2)
2205 		kgem->max_upload_tile_size = kgem->aperture_high/2;
2206 	if (kgem->max_upload_tile_size > kgem->aperture_low)
2207 		kgem->max_upload_tile_size = kgem->aperture_low;
2208 	if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
2209 		kgem->max_upload_tile_size = 16*PAGE_SIZE;
2210 
2211 	kgem->large_object_size = MAX_CACHE_SIZE;
2212 	if (kgem->large_object_size > half_gpu_max)
2213 		kgem->large_object_size = half_gpu_max;
2214 	if (kgem->max_copy_tile_size > kgem->aperture_high/2)
2215 		kgem->max_copy_tile_size = kgem->aperture_high/2;
2216 	if (kgem->max_copy_tile_size > kgem->aperture_low)
2217 		kgem->max_copy_tile_size = kgem->aperture_low;
2218 	if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
2219 		kgem->max_copy_tile_size = 16*PAGE_SIZE;
2220 
2221 	if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
2222 		if (kgem->large_object_size > kgem->max_cpu_size)
2223 			kgem->large_object_size = kgem->max_cpu_size;
2224 	} else
2225 		kgem->max_cpu_size = 0;
2226 	if (DBG_NO_CPU)
2227 		kgem->max_cpu_size = 0;
2228 
2229 	DBG(("%s: maximum object size=%u\n",
2230 	     __FUNCTION__, kgem->max_object_size));
2231 	DBG(("%s: large object thresold=%u\n",
2232 	     __FUNCTION__, kgem->large_object_size));
2233 	DBG(("%s: max object sizes (gpu=%u, cpu=%u, tile upload=%u, copy=%u)\n",
2234 	     __FUNCTION__,
2235 	     kgem->max_gpu_size, kgem->max_cpu_size,
2236 	     kgem->max_upload_tile_size, kgem->max_copy_tile_size));
2237 
2238 	/* Convert the aperture thresholds to pages */
2239 	kgem->aperture_mappable /= PAGE_SIZE;
2240 	kgem->aperture_fenceable /= PAGE_SIZE;
2241 	kgem->aperture_low /= PAGE_SIZE;
2242 	kgem->aperture_high /= PAGE_SIZE;
2243 	kgem->aperture_total /= PAGE_SIZE;
2244 
2245 	kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
2246 	if ((int)kgem->fence_max < 0)
2247 		kgem->fence_max = 5; /* minimum safe value for all hw */
2248 	DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
2249 
2250 	kgem->batch_flags_base = 0;
2251 	if (kgem->has_no_reloc)
2252 		kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
2253 	if (kgem->has_handle_lut)
2254 		kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
2255 	if (kgem->has_pinned_batches)
2256 		kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
2257 
2258 	kgem_init_swizzling(kgem);
2259 }
2260 
2261 /* XXX hopefully a good approximation */
kgem_get_unique_id(struct kgem * kgem)2262 static uint32_t kgem_get_unique_id(struct kgem *kgem)
2263 {
2264 	uint32_t id;
2265 	id = ++kgem->unique_id;
2266 	if (id == 0)
2267 		id = ++kgem->unique_id;
2268 	return id;
2269 }
2270 
kgem_pitch_alignment(struct kgem * kgem,unsigned flags)2271 inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
2272 {
2273 	if (flags & CREATE_PRIME)
2274 		return 256;
2275 	if (flags & CREATE_SCANOUT)
2276 		return 64;
2277 	if (kgem->gen >= 0100)
2278 		return 32;
2279 	return 8;
2280 }
2281 
kgem_get_tile_size(struct kgem * kgem,int tiling,int pitch,int * tile_width,int * tile_height,int * tile_size)2282 void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
2283 			int *tile_width, int *tile_height, int *tile_size)
2284 {
2285 	if (kgem->gen <= 030) {
2286 		if (tiling) {
2287 			if (kgem->gen < 030) {
2288 				*tile_width = 128;
2289 				*tile_height = 16;
2290 				*tile_size = 2048;
2291 			} else {
2292 				*tile_width = 512;
2293 				*tile_height = 8;
2294 				*tile_size = 4096;
2295 			}
2296 		} else {
2297 			*tile_width = 1;
2298 			*tile_height = 1;
2299 			*tile_size = 1;
2300 		}
2301 	} else switch (tiling) {
2302 	default:
2303 	case I915_TILING_NONE:
2304 		*tile_width = 1;
2305 		*tile_height = 1;
2306 		*tile_size = 1;
2307 		break;
2308 	case I915_TILING_X:
2309 		*tile_width = 512;
2310 		*tile_height = 8;
2311 		*tile_size = 4096;
2312 		break;
2313 	case I915_TILING_Y:
2314 		*tile_width = 128;
2315 		*tile_height = 32;
2316 		*tile_size = 4096;
2317 		break;
2318 	}
2319 
2320 	/* Force offset alignment to tile-row */
2321 	if (tiling && kgem->gen < 033)
2322 		*tile_width = pitch;
2323 }
2324 
kgem_surface_size(struct kgem * kgem,bool relaxed_fencing,unsigned flags,uint32_t width,uint32_t height,uint32_t bpp,uint32_t tiling,uint32_t * pitch)2325 static uint32_t kgem_surface_size(struct kgem *kgem,
2326 				  bool relaxed_fencing,
2327 				  unsigned flags,
2328 				  uint32_t width,
2329 				  uint32_t height,
2330 				  uint32_t bpp,
2331 				  uint32_t tiling,
2332 				  uint32_t *pitch)
2333 {
2334 	uint32_t tile_width, tile_height;
2335 	uint32_t size;
2336 
2337 	assert(width <= MAXSHORT);
2338 	assert(height <= MAXSHORT);
2339 	assert(bpp >= 8);
2340 
2341 	if (kgem->gen <= 030) {
2342 		if (tiling) {
2343 			if (kgem->gen < 030) {
2344 				tile_width = 128;
2345 				tile_height = 16;
2346 			} else {
2347 				tile_width = 512;
2348 				tile_height = 8;
2349 			}
2350 		} else {
2351 			tile_width = 2 * bpp >> 3;
2352 			tile_width = ALIGN(tile_width,
2353 					   kgem_pitch_alignment(kgem, flags));
2354 			tile_height = 1;
2355 		}
2356 	} else switch (tiling) {
2357 	default:
2358 	case I915_TILING_NONE:
2359 		tile_width = 2 * bpp >> 3;
2360 		tile_width = ALIGN(tile_width,
2361 				   kgem_pitch_alignment(kgem, flags));
2362 		tile_height = 1;
2363 		break;
2364 
2365 	case I915_TILING_X:
2366 		tile_width = 512;
2367 		tile_height = 8;
2368 		break;
2369 	case I915_TILING_Y:
2370 		tile_width = 128;
2371 		tile_height = 32;
2372 		break;
2373 	}
2374 	/* XXX align to an even tile row */
2375 	if (!kgem->has_relaxed_fencing)
2376 		tile_height *= 2;
2377 
2378 	*pitch = ALIGN(width * bpp / 8, tile_width);
2379 	height = ALIGN(height, tile_height);
2380 	DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n",
2381 	     __FUNCTION__, tile_width, tile_height, *pitch, height));
2382 
2383 	if (kgem->gen >= 040)
2384 		return PAGE_ALIGN(*pitch * height);
2385 
2386 	/* If it is too wide for the blitter, don't even bother.  */
2387 	if (tiling != I915_TILING_NONE) {
2388 		if (*pitch > 8192) {
2389 			DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n",
2390 			     __FUNCTION__, *pitch, 8192));
2391 			return 0;
2392 		}
2393 
2394 		for (size = tile_width; size < *pitch; size <<= 1)
2395 			;
2396 		*pitch = size;
2397 	} else {
2398 		if (*pitch >= 32768) {
2399 			DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n",
2400 			     __FUNCTION__, *pitch, 32767));
2401 			return 0;
2402 		}
2403 	}
2404 
2405 	size = *pitch * height;
2406 	if (relaxed_fencing || tiling == I915_TILING_NONE)
2407 		return PAGE_ALIGN(size);
2408 
2409 	/* We need to allocate a pot fence region for a tiled buffer. */
2410 	if (kgem->gen < 030)
2411 		tile_width = 512 * 1024;
2412 	else
2413 		tile_width = 1024 * 1024;
2414 	while (tile_width < size)
2415 		tile_width *= 2;
2416 	return tile_width;
2417 }
2418 
kgem_check_surface_size(struct kgem * kgem,uint32_t width,uint32_t height,uint32_t bpp,uint32_t tiling,uint32_t pitch,uint32_t size)2419 bool kgem_check_surface_size(struct kgem *kgem,
2420 			     uint32_t width,
2421 			     uint32_t height,
2422 			     uint32_t bpp,
2423 			     uint32_t tiling,
2424 			     uint32_t pitch,
2425 			     uint32_t size)
2426 {
2427 	uint32_t min_size, min_pitch;
2428 	int tile_width, tile_height, tile_size;
2429 
2430 	DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n",
2431 	     __FUNCTION__, width, height, bpp, tiling, pitch, size));
2432 
2433 	if (pitch & 3)
2434 		return false;
2435 
2436 	min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0,
2437 				     width, height, bpp, tiling,
2438 				     &min_pitch);
2439 
2440 	DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size));
2441 
2442 	if (size < min_size)
2443 		return false;
2444 
2445 	if (pitch < min_pitch)
2446 		return false;
2447 
2448 	kgem_get_tile_size(kgem, tiling, min_pitch,
2449 			   &tile_width, &tile_height, &tile_size);
2450 
2451 	DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size));
2452 	if (pitch & (tile_width - 1))
2453 		return false;
2454 	if (size & (tile_size - 1))
2455 		return false;
2456 
2457 	return true;
2458 }
2459 
kgem_aligned_height(struct kgem * kgem,uint32_t height,uint32_t tiling)2460 static uint32_t kgem_aligned_height(struct kgem *kgem,
2461 				    uint32_t height, uint32_t tiling)
2462 {
2463 	uint32_t tile_height;
2464 
2465 	if (kgem->gen <= 030) {
2466 		tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1;
2467 	} else switch (tiling) {
2468 		/* XXX align to an even tile row */
2469 	default:
2470 	case I915_TILING_NONE:
2471 		tile_height = 1;
2472 		break;
2473 	case I915_TILING_X:
2474 		tile_height = 8;
2475 		break;
2476 	case I915_TILING_Y:
2477 		tile_height = 32;
2478 		break;
2479 	}
2480 
2481 	/* XXX align to an even tile row */
2482 	if (!kgem->has_relaxed_fencing)
2483 		tile_height *= 2;
2484 
2485 	return ALIGN(height, tile_height);
2486 }
2487 
2488 static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem * kgem,struct kgem_bo * bo)2489 kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
2490 {
2491 	struct drm_i915_gem_exec_object2 *exec;
2492 
2493 	DBG(("%s: handle=%d, index=%d\n",
2494 	     __FUNCTION__, bo->handle, kgem->nexec));
2495 
2496 	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
2497 	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
2498 	exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
2499 	exec->handle = bo->handle;
2500 	exec->offset = bo->presumed_offset;
2501 
2502 	kgem->aperture += num_pages(bo);
2503 
2504 	return exec;
2505 }
2506 
kgem_add_bo(struct kgem * kgem,struct kgem_bo * bo)2507 static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
2508 {
2509 	assert(bo->refcnt);
2510 	assert(bo->proxy == NULL);
2511 
2512 	bo->exec = kgem_add_handle(kgem, bo);
2513 	bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
2514 
2515 	list_move_tail(&bo->request, &kgem->next_request->buffers);
2516 	if (bo->io && !list_is_empty(&bo->list))
2517 		list_move(&bo->list, &kgem->batch_buffers);
2518 
2519 	/* XXX is it worth working around gcc here? */
2520 	kgem->flush |= bo->flush;
2521 }
2522 
kgem_clear_swctrl(struct kgem * kgem)2523 static void kgem_clear_swctrl(struct kgem *kgem)
2524 {
2525 	uint32_t *b;
2526 
2527 	if (kgem->bcs_state == 0)
2528 		return;
2529 
2530 	DBG(("%s: clearin SWCTRL LRI from %x\n",
2531 	     __FUNCTION__, kgem->bcs_state));
2532 
2533 	b = kgem->batch + kgem->nbatch;
2534 	kgem->nbatch += 7;
2535 
2536 	*b++ = MI_FLUSH_DW;
2537 	*b++ = 0;
2538 	*b++ = 0;
2539 	*b++ = 0;
2540 
2541 	*b++ = MI_LOAD_REGISTER_IMM;
2542 	*b++ = BCS_SWCTRL;
2543 	*b++ = (BCS_SRC_Y | BCS_DST_Y) << 16;
2544 
2545 	kgem->bcs_state = 0;
2546 }
2547 
kgem_end_batch(struct kgem * kgem)2548 static uint32_t kgem_end_batch(struct kgem *kgem)
2549 {
2550 	kgem_clear_swctrl(kgem);
2551 	kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
2552 	if (kgem->nbatch & 1)
2553 		kgem->batch[kgem->nbatch++] = MI_NOOP;
2554 
2555 	return kgem->nbatch;
2556 }
2557 
kgem_bo_binding_free(struct kgem * kgem,struct kgem_bo * bo)2558 static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
2559 {
2560 	struct kgem_bo_binding *b;
2561 
2562 	b = bo->binding.next;
2563 	while (b) {
2564 		struct kgem_bo_binding *next = b->next;
2565 		free(b);
2566 		b = next;
2567 	}
2568 }
2569 
kgem_bo_free(struct kgem * kgem,struct kgem_bo * bo)2570 static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
2571 {
2572 	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2573 	assert(bo->refcnt == 0);
2574 	assert(bo->proxy == NULL);
2575 	assert(bo->exec == NULL);
2576 	assert(!bo->snoop || bo->rq == NULL);
2577 
2578 #ifdef DEBUG_MEMORY
2579 	kgem->debug_memory.bo_allocs--;
2580 	kgem->debug_memory.bo_bytes -= bytes(bo);
2581 #endif
2582 
2583 	kgem_bo_binding_free(kgem, bo);
2584 	kgem_bo_rmfb(kgem, bo);
2585 
2586 	if (IS_USER_MAP(bo->map__cpu)) {
2587 		assert(bo->rq == NULL);
2588 		assert(!__kgem_busy(kgem, bo->handle));
2589 		assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush);
2590 		if (!(bo->io || bo->flush)) {
2591 			DBG(("%s: freeing snooped base\n", __FUNCTION__));
2592 			assert(bo != MAP(bo->map__cpu));
2593 			free(MAP(bo->map__cpu));
2594 		}
2595 		bo->map__cpu = NULL;
2596 	}
2597 
2598 	DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
2599 	     __FUNCTION__, bo->map__gtt, bo->map__cpu,
2600 	     bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
2601 
2602 	if (!list_is_empty(&bo->vma)) {
2603 		_list_del(&bo->vma);
2604 		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2605 	}
2606 
2607 	if (bo->map__gtt)
2608 		munmap(bo->map__gtt, bytes(bo));
2609 	if (bo->map__wc) {
2610 		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
2611 		munmap(bo->map__wc, bytes(bo));
2612 	}
2613 	if (bo->map__cpu) {
2614 		VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
2615 		munmap(MAP(bo->map__cpu), bytes(bo));
2616 	}
2617 
2618 	_list_del(&bo->list);
2619 	_list_del(&bo->request);
2620 	gem_close(kgem->fd, bo->handle);
2621 
2622 	if (!bo->io && !DBG_NO_MALLOC_CACHE) {
2623 		*(struct kgem_bo **)bo = __kgem_freed_bo;
2624 		__kgem_freed_bo = bo;
2625 	} else
2626 		free(bo);
2627 }
2628 
kgem_bo_move_to_inactive(struct kgem * kgem,struct kgem_bo * bo)2629 inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
2630 					    struct kgem_bo *bo)
2631 {
2632 	DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
2633 
2634 	assert(bo->refcnt == 0);
2635 	assert(bo->reusable);
2636 	assert(bo->rq == NULL);
2637 	assert(bo->exec == NULL);
2638 	assert(bo->domain != DOMAIN_GPU);
2639 	assert(!bo->proxy);
2640 	assert(!bo->io);
2641 	assert(!bo->scanout);
2642 	assert(!bo->snoop);
2643 	assert(!bo->flush);
2644 	assert(!bo->needs_flush);
2645 	assert(!bo->delta);
2646 	assert(list_is_empty(&bo->vma));
2647 	assert_tiling(kgem, bo);
2648 	assert_caching(kgem, bo);
2649 	ASSERT_IDLE(kgem, bo->handle);
2650 
2651 	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
2652 		if (bo->map__gtt) {
2653 			DBG(("%s: relinquishing large GTT mapping for handle=%d\n",
2654 			     __FUNCTION__, bo->handle));
2655 			munmap(bo->map__gtt, bytes(bo));
2656 			bo->map__gtt = NULL;
2657 		}
2658 
2659 		list_move(&bo->list, &kgem->large_inactive);
2660 	} else {
2661 		assert(bo->flush == false);
2662 		assert(list_is_empty(&bo->vma));
2663 		list_move(&bo->list, &kgem->inactive[bucket(bo)]);
2664 		if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
2665 			DBG(("%s: relinquishing old GTT mapping for handle=%d\n",
2666 			     __FUNCTION__, bo->handle));
2667 			munmap(bo->map__gtt, bytes(bo));
2668 			bo->map__gtt = NULL;
2669 		}
2670 		if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
2671 			list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
2672 			kgem->vma[0].count++;
2673 		}
2674 		if (bo->map__cpu && list_is_empty(&bo->vma)) {
2675 			list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
2676 			kgem->vma[1].count++;
2677 		}
2678 	}
2679 
2680 	kgem->need_expire = true;
2681 }
2682 
kgem_bo_replace_io(struct kgem_bo * bo)2683 static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
2684 {
2685 	struct kgem_bo *base;
2686 
2687 	if (!bo->io)
2688 		return bo;
2689 
2690 	assert(!bo->snoop);
2691 	assert(!bo->purged);
2692 	assert(!bo->scanout);
2693 	assert(!bo->delta);
2694 
2695 	if (__kgem_freed_bo) {
2696 		base = __kgem_freed_bo;
2697 		__kgem_freed_bo = *(struct kgem_bo **)base;
2698 	} else
2699 		base = malloc(sizeof(*base));
2700 	if (base) {
2701 		DBG(("%s: transferring io handle=%d to bo\n",
2702 		     __FUNCTION__, bo->handle));
2703 		/* transfer the handle to a minimum bo */
2704 		memcpy(base, bo, sizeof(*base));
2705 		base->io = false;
2706 		list_init(&base->list);
2707 		list_replace(&bo->request, &base->request);
2708 		list_replace(&bo->vma, &base->vma);
2709 		free(bo);
2710 		bo = base;
2711 	} else
2712 		bo->reusable = false;
2713 
2714 	return bo;
2715 }
2716 
kgem_bo_remove_from_inactive(struct kgem * kgem,struct kgem_bo * bo)2717 inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
2718 						struct kgem_bo *bo)
2719 {
2720 	DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
2721 
2722 	list_del(&bo->list);
2723 	assert(bo->rq == NULL);
2724 	assert(bo->exec == NULL);
2725 	assert(!bo->purged);
2726 	if (!list_is_empty(&bo->vma)) {
2727 		assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
2728 		list_del(&bo->vma);
2729 		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2730 	}
2731 }
2732 
kgem_bo_remove_from_active(struct kgem * kgem,struct kgem_bo * bo)2733 inline static void kgem_bo_remove_from_active(struct kgem *kgem,
2734 					      struct kgem_bo *bo)
2735 {
2736 	DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
2737 
2738 	list_del(&bo->list);
2739 	assert(bo->rq != NULL);
2740 	if (RQ(bo->rq) == (void *)kgem) {
2741 		assert(bo->exec == NULL);
2742 		list_del(&bo->request);
2743 	}
2744 	assert(list_is_empty(&bo->vma));
2745 }
2746 
_kgem_bo_delete_buffer(struct kgem * kgem,struct kgem_bo * bo)2747 static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
2748 {
2749 	struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
2750 
2751 	DBG(("%s: size=%d, offset=%d, parent used=%d\n",
2752 	     __FUNCTION__, bo->size.bytes, bo->delta, io->used));
2753 
2754 	if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
2755 		io->used = bo->delta;
2756 }
2757 
check_scanout_size(struct kgem * kgem,struct kgem_bo * bo,int width,int height)2758 static bool check_scanout_size(struct kgem *kgem,
2759 			       struct kgem_bo *bo,
2760 			       int width, int height)
2761 {
2762 	struct drm_mode_fb_cmd info;
2763 
2764 	assert(bo->scanout);
2765 
2766 	VG_CLEAR(info);
2767 	info.fb_id = bo->delta;
2768 
2769 	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info))
2770 		return false;
2771 
2772 	gem_close(kgem->fd, info.handle);
2773 
2774 	if (width > info.width || height > info.height) {
2775 		DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n",
2776 		     __FUNCTION__,
2777 		     info.fb_id, info.width, info.height,
2778 		     width, height));
2779 		return false;
2780 	}
2781 
2782 	return true;
2783 }
2784 
kgem_bo_move_to_scanout(struct kgem * kgem,struct kgem_bo * bo)2785 static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
2786 {
2787 	assert(bo->refcnt == 0);
2788 	assert(bo->scanout);
2789 	assert(!bo->flush);
2790 	assert(!bo->snoop);
2791 	assert(!bo->io);
2792 
2793 	if (bo->purged) { /* for stolen fb */
2794 		if (!bo->exec) {
2795 			DBG(("%s: discarding purged scanout - stolen?\n",
2796 			     __FUNCTION__));
2797 			kgem_bo_free(kgem, bo);
2798 		}
2799 		return;
2800 	}
2801 
2802 	DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
2803 	     __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
2804 	if (bo->rq)
2805 		list_move_tail(&bo->list, &kgem->scanout);
2806 	else
2807 		list_move(&bo->list, &kgem->scanout);
2808 
2809 	kgem->need_expire = true;
2810 }
2811 
kgem_bo_move_to_snoop(struct kgem * kgem,struct kgem_bo * bo)2812 static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
2813 {
2814 	assert(bo->reusable);
2815 	assert(!bo->scanout);
2816 	assert(!bo->flush);
2817 	assert(!bo->needs_flush);
2818 	assert(bo->refcnt == 0);
2819 	assert(bo->exec == NULL);
2820 	assert(!bo->purged);
2821 	assert(!bo->delta);
2822 
2823 	if (DBG_NO_SNOOP_CACHE) {
2824 		kgem_bo_free(kgem, bo);
2825 		return;
2826 	}
2827 
2828 	if (num_pages(bo) > kgem->max_cpu_size >> 13) {
2829 		DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
2830 		     __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
2831 		kgem_bo_free(kgem, bo);
2832 		return;
2833 	}
2834 
2835 	assert(bo->tiling == I915_TILING_NONE);
2836 	assert(bo->rq == NULL);
2837 
2838 	DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
2839 	list_add(&bo->list, &kgem->snoop);
2840 	kgem->need_expire = true;
2841 }
2842 
kgem_bo_move_to_cache(struct kgem * kgem,struct kgem_bo * bo)2843 static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo)
2844 {
2845 	bool retired = false;
2846 
2847 	DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle));
2848 
2849 	if (bo->prime) {
2850 		DBG(("%s: discarding imported prime handle=%d\n",
2851 		     __FUNCTION__, bo->handle));
2852 		kgem_bo_free(kgem, bo);
2853 	} else if (bo->snoop) {
2854 		kgem_bo_move_to_snoop(kgem, bo);
2855 	} else if (bo->scanout) {
2856 		kgem_bo_move_to_scanout(kgem, bo);
2857 	} else if ((bo = kgem_bo_replace_io(bo))->reusable) {
2858 		kgem_bo_move_to_inactive(kgem, bo);
2859 		retired = true;
2860 	} else
2861 		kgem_bo_free(kgem, bo);
2862 
2863 	return retired;
2864 }
2865 
2866 static struct kgem_bo *
search_snoop_cache(struct kgem * kgem,unsigned int num_pages,unsigned flags)2867 search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
2868 {
2869 	struct kgem_bo *bo, *first = NULL;
2870 
2871 	DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
2872 
2873 	if ((kgem->has_caching | kgem->has_userptr) == 0)
2874 		return NULL;
2875 
2876 	if (list_is_empty(&kgem->snoop)) {
2877 		DBG(("%s: inactive and cache empty\n", __FUNCTION__));
2878 		if (!__kgem_throttle_retire(kgem, flags)) {
2879 			DBG(("%s: nothing retired\n", __FUNCTION__));
2880 			return NULL;
2881 		}
2882 	}
2883 
2884 	list_for_each_entry(bo, &kgem->snoop, list) {
2885 		assert(bo->refcnt == 0);
2886 		assert(bo->snoop);
2887 		assert(!bo->scanout);
2888 		assert(!bo->purged);
2889 		assert(bo->proxy == NULL);
2890 		assert(bo->tiling == I915_TILING_NONE);
2891 		assert(bo->rq == NULL);
2892 		assert(bo->exec == NULL);
2893 
2894 		if (num_pages > num_pages(bo))
2895 			continue;
2896 
2897 		if (num_pages(bo) > 2*num_pages) {
2898 			if (first == NULL)
2899 				first = bo;
2900 			continue;
2901 		}
2902 
2903 		list_del(&bo->list);
2904 		bo->pitch = 0;
2905 		bo->delta = 0;
2906 
2907 		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2908 		     __FUNCTION__, bo->handle, num_pages(bo)));
2909 		return bo;
2910 	}
2911 
2912 	if (first) {
2913 		list_del(&first->list);
2914 		first->pitch = 0;
2915 		first->delta = 0;
2916 
2917 		DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
2918 		     __FUNCTION__, first->handle, num_pages(first)));
2919 		return first;
2920 	}
2921 
2922 	return NULL;
2923 }
2924 
kgem_bo_undo(struct kgem * kgem,struct kgem_bo * bo)2925 void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
2926 {
2927 	if (kgem->nexec != 1 || bo->exec == NULL)
2928 		return;
2929 
2930 	assert(bo);
2931 	DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
2932 	     __FUNCTION__, bo->handle));
2933 
2934 	assert(bo->exec == &_kgem_dummy_exec || bo->exec == &kgem->exec[0]);
2935 	assert(kgem->exec[0].handle == bo->handle);
2936 	assert(RQ(bo->rq) == kgem->next_request);
2937 
2938 	bo->refcnt++;
2939 	kgem_reset(kgem);
2940 	bo->refcnt--;
2941 
2942 	assert(kgem->nreloc == 0);
2943 	assert(kgem->nexec == 0);
2944 	assert(bo->exec == NULL);
2945 }
2946 
kgem_bo_pair_undo(struct kgem * kgem,struct kgem_bo * a,struct kgem_bo * b)2947 void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b)
2948 {
2949 	if (kgem->nexec > 2)
2950 		return;
2951 
2952 	if (kgem->nexec == 1) {
2953 		if (a)
2954 			kgem_bo_undo(kgem, a);
2955 		if (b)
2956 			kgem_bo_undo(kgem, b);
2957 		return;
2958 	}
2959 
2960 	if (a == NULL || b == NULL)
2961 		return;
2962 	assert(a != b);
2963 	if (a->exec == NULL || b->exec == NULL)
2964 		return;
2965 
2966 	DBG(("%s: only handles in batch, discarding last operations for handle=%d (index=%d) and handle=%d (index=%d)\n",
2967 	     __FUNCTION__,
2968 	     a->handle, a->proxy ? -1 : a->exec - kgem->exec,
2969 	     b->handle, b->proxy ? -1 : b->exec - kgem->exec));
2970 
2971 	assert(a->exec == &_kgem_dummy_exec ||
2972 	       a->exec == &kgem->exec[0] ||
2973 	       a->exec == &kgem->exec[1]);
2974 	assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle);
2975 	assert(RQ(a->rq) == kgem->next_request);
2976 	assert(b->exec == &_kgem_dummy_exec ||
2977 	       b->exec == &kgem->exec[0] ||
2978 	       b->exec == &kgem->exec[1]);
2979 	assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle);
2980 	assert(RQ(b->rq) == kgem->next_request);
2981 
2982 	a->refcnt++;
2983 	b->refcnt++;
2984 	kgem_reset(kgem);
2985 	b->refcnt--;
2986 	a->refcnt--;
2987 
2988 	assert(kgem->nreloc == 0);
2989 	assert(kgem->nexec == 0);
2990 	assert(a->exec == NULL);
2991 	assert(b->exec == NULL);
2992 }
2993 
__kgem_bo_destroy(struct kgem * kgem,struct kgem_bo * bo)2994 static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
2995 {
2996 	DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2997 
2998 	assert(list_is_empty(&bo->list));
2999 	assert(list_is_empty(&bo->vma));
3000 	assert(bo->refcnt == 0);
3001 	assert(bo->proxy == NULL);
3002 	assert(bo->active_scanout == 0);
3003 	assert_tiling(kgem, bo);
3004 
3005 	bo->binding.offset = 0;
3006 
3007 	if (DBG_NO_CACHE)
3008 		goto destroy;
3009 
3010 	if (bo->prime)
3011 		goto destroy;
3012 
3013 	if (bo->snoop && !bo->flush) {
3014 		DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
3015 		assert(bo->reusable);
3016 		assert(list_is_empty(&bo->list));
3017 		if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
3018 			__kgem_bo_clear_busy(bo);
3019 		if (bo->rq == NULL)
3020 			kgem_bo_move_to_snoop(kgem, bo);
3021 		return;
3022 	}
3023 	if (!IS_USER_MAP(bo->map__cpu))
3024 		bo->flush = false;
3025 
3026 	if (bo->scanout) {
3027 		kgem_bo_move_to_scanout(kgem, bo);
3028 		return;
3029 	}
3030 
3031 	if (bo->io)
3032 		bo = kgem_bo_replace_io(bo);
3033 	if (!bo->reusable) {
3034 		DBG(("%s: handle=%d, not reusable\n",
3035 		     __FUNCTION__, bo->handle));
3036 		goto destroy;
3037 	}
3038 
3039 	assert(list_is_empty(&bo->vma));
3040 	assert(list_is_empty(&bo->list));
3041 	assert(bo->flush == false);
3042 	assert(bo->snoop == false);
3043 	assert(bo->io == false);
3044 	assert(bo->scanout == false);
3045 	assert_caching(kgem, bo);
3046 
3047 	kgem_bo_undo(kgem, bo);
3048 	assert(bo->refcnt == 0);
3049 
3050 	if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
3051 		__kgem_bo_clear_busy(bo);
3052 
3053 	if (bo->rq) {
3054 		struct list *cache;
3055 
3056 		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
3057 		if (bucket(bo) < NUM_CACHE_BUCKETS)
3058 			cache = &kgem->active[bucket(bo)][bo->tiling];
3059 		else
3060 			cache = &kgem->large;
3061 		list_add(&bo->list, cache);
3062 		return;
3063 	}
3064 
3065 	assert(bo->exec == NULL);
3066 	assert(list_is_empty(&bo->request));
3067 
3068 	if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) {
3069 		if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
3070 			goto destroy;
3071 
3072 		DBG(("%s: handle=%d, purged\n",
3073 		     __FUNCTION__, bo->handle));
3074 	}
3075 
3076 	kgem_bo_move_to_inactive(kgem, bo);
3077 	return;
3078 
3079 destroy:
3080 	if (!bo->exec)
3081 		kgem_bo_free(kgem, bo);
3082 }
3083 
kgem_bo_unref(struct kgem * kgem,struct kgem_bo * bo)3084 static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
3085 {
3086 	assert(bo->refcnt);
3087 	if (--bo->refcnt == 0)
3088 		__kgem_bo_destroy(kgem, bo);
3089 }
3090 
kgem_buffer_release(struct kgem * kgem,struct kgem_buffer * bo)3091 static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
3092 {
3093 	assert(bo->base.io);
3094 	while (!list_is_empty(&bo->base.vma)) {
3095 		struct kgem_bo *cached;
3096 
3097 		cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
3098 		assert(cached->proxy == &bo->base);
3099 		assert(cached != &bo->base);
3100 		list_del(&cached->vma);
3101 
3102 		assert(*(struct kgem_bo **)cached->map__gtt == cached);
3103 		*(struct kgem_bo **)cached->map__gtt = NULL;
3104 		cached->map__gtt = NULL;
3105 
3106 		kgem_bo_destroy(kgem, cached);
3107 	}
3108 }
3109 
kgem_retire__buffers(struct kgem * kgem)3110 void kgem_retire__buffers(struct kgem *kgem)
3111 {
3112 	while (!list_is_empty(&kgem->active_buffers)) {
3113 		struct kgem_buffer *bo =
3114 			list_last_entry(&kgem->active_buffers,
3115 					struct kgem_buffer,
3116 					base.list);
3117 
3118 		DBG(("%s: handle=%d, busy? %d [%d]\n",
3119 		     __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL));
3120 
3121 		assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request);
3122 		if (bo->base.rq)
3123 			break;
3124 
3125 		DBG(("%s: releasing upload cache for handle=%d? %d\n",
3126 		     __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
3127 		list_del(&bo->base.list);
3128 		kgem_buffer_release(kgem, bo);
3129 		kgem_bo_unref(kgem, &bo->base);
3130 	}
3131 }
3132 
kgem_retire__flushing(struct kgem * kgem)3133 static bool kgem_retire__flushing(struct kgem *kgem)
3134 {
3135 	struct kgem_bo *bo, *next;
3136 	bool retired = false;
3137 
3138 	list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
3139 		assert(RQ(bo->rq) == (void *)kgem);
3140 		assert(bo->exec == NULL);
3141 
3142 		if (__kgem_busy(kgem, bo->handle))
3143 			break;
3144 
3145 		__kgem_bo_clear_busy(bo);
3146 
3147 		if (bo->refcnt)
3148 			continue;
3149 
3150 		retired |= kgem_bo_move_to_cache(kgem, bo);
3151 	}
3152 #if HAS_DEBUG_FULL
3153 	{
3154 		int count = 0;
3155 		list_for_each_entry(bo, &kgem->flushing, request)
3156 			count++;
3157 		DBG(("%s: %d bo on flushing list, retired? %d\n", __FUNCTION__, count, retired));
3158 	}
3159 #endif
3160 
3161 	kgem->need_retire |= !list_is_empty(&kgem->flushing);
3162 
3163 	return retired;
3164 }
3165 
__kgem_bo_flush(struct kgem * kgem,struct kgem_bo * bo)3166 static bool __kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
3167 {
3168 	struct drm_i915_gem_busy busy;
3169 
3170 	if (!bo->needs_flush)
3171 		return false;
3172 
3173 	bo->needs_flush = false;
3174 
3175 	VG_CLEAR(busy);
3176 	busy.handle = bo->handle;
3177 	busy.busy = !kgem->wedged;
3178 	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
3179 	DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
3180 	     __FUNCTION__, bo->handle, busy.busy, kgem->wedged));
3181 
3182 	if (busy.busy == 0)
3183 		return false;
3184 
3185 	DBG(("%s: moving %d to flushing\n",
3186 	     __FUNCTION__, bo->handle));
3187 	list_add(&bo->request, &kgem->flushing);
3188 	bo->rq = MAKE_REQUEST(kgem, !!(busy.busy & ~0x1ffff));
3189 	bo->needs_flush = busy.busy & 0xffff;
3190 	kgem->need_retire = true;
3191 	return true;
3192 }
3193 
__kgem_retire_rq(struct kgem * kgem,struct kgem_request * rq)3194 static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
3195 {
3196 	bool retired = false;
3197 
3198 	DBG(("%s: request %d complete\n",
3199 	     __FUNCTION__, rq->bo->handle));
3200 	assert(RQ(rq->bo->rq) == rq);
3201 	assert(rq != (struct kgem_request *)kgem);
3202 	assert(rq != &kgem->static_request);
3203 
3204 	if (rq == kgem->fence[rq->ring])
3205 		kgem->fence[rq->ring] = NULL;
3206 
3207 	while (!list_is_empty(&rq->buffers)) {
3208 		struct kgem_bo *bo;
3209 
3210 		bo = list_first_entry(&rq->buffers,
3211 				      struct kgem_bo,
3212 				      request);
3213 
3214 		assert(RQ(bo->rq) == rq);
3215 		assert(bo->exec == NULL);
3216 		assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3217 
3218 		list_del(&bo->request);
3219 
3220 		if (unlikely(__kgem_bo_flush(kgem, bo))) {
3221 			assert(bo != rq->bo);
3222 			DBG(("%s: movied %d to flushing\n",
3223 			     __FUNCTION__, bo->handle));
3224 			continue;
3225 		}
3226 
3227 		bo->domain = DOMAIN_NONE;
3228 		bo->rq = NULL;
3229 		if (bo->refcnt)
3230 			continue;
3231 
3232 		retired |= kgem_bo_move_to_cache(kgem, bo);
3233 	}
3234 
3235 	assert(rq->bo->rq == NULL);
3236 	assert(rq->bo->exec == NULL);
3237 	assert(list_is_empty(&rq->bo->request));
3238 	assert(rq->bo->refcnt > 0);
3239 
3240 	if (--rq->bo->refcnt == 0) {
3241 		kgem_bo_move_to_inactive(kgem, rq->bo);
3242 		retired = true;
3243 	}
3244 
3245 	__kgem_request_free(rq);
3246 	return retired;
3247 }
3248 
kgem_retire__requests_ring(struct kgem * kgem,int ring)3249 static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
3250 {
3251 	bool retired = false;
3252 
3253 	assert(ring < ARRAY_SIZE(kgem->requests));
3254 	while (!list_is_empty(&kgem->requests[ring])) {
3255 		struct kgem_request *rq;
3256 
3257 		DBG(("%s: retiring ring %d\n", __FUNCTION__, ring));
3258 
3259 		rq = list_first_entry(&kgem->requests[ring],
3260 				      struct kgem_request,
3261 				      list);
3262 		assert(rq->ring == ring);
3263 		assert(rq->bo);
3264 		assert(RQ(rq->bo->rq) == rq);
3265 		if (__kgem_busy(kgem, rq->bo->handle))
3266 			break;
3267 
3268 		retired |= __kgem_retire_rq(kgem, rq);
3269 	}
3270 
3271 #if HAS_DEBUG_FULL
3272 	{
3273 		struct kgem_bo *bo;
3274 		int count = 0;
3275 
3276 		list_for_each_entry(bo, &kgem->requests[ring], request)
3277 			count++;
3278 
3279 		bo = NULL;
3280 		if (!list_is_empty(&kgem->requests[ring]))
3281 			bo = list_first_entry(&kgem->requests[ring],
3282 					      struct kgem_request,
3283 					      list)->bo;
3284 
3285 		DBG(("%s: ring=%d, %d outstanding requests, oldest=%d, retired? %d\n",
3286 		     __FUNCTION__, ring, count, bo ? bo->handle : 0, retired));
3287 	}
3288 #endif
3289 
3290 	return retired;
3291 }
3292 
kgem_retire__requests(struct kgem * kgem)3293 static bool kgem_retire__requests(struct kgem *kgem)
3294 {
3295 	bool retired = false;
3296 	int n;
3297 
3298 	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3299 		retired |= kgem_retire__requests_ring(kgem, n);
3300 		kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
3301 	}
3302 
3303 	return retired;
3304 }
3305 
kgem_retire(struct kgem * kgem)3306 bool kgem_retire(struct kgem *kgem)
3307 {
3308 	bool retired = false;
3309 
3310 	DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire));
3311 
3312 	kgem->need_retire = false;
3313 
3314 	retired |= kgem_retire__flushing(kgem);
3315 	retired |= kgem_retire__requests(kgem);
3316 
3317 	DBG(("%s -- retired=%d, need_retire=%d\n",
3318 	     __FUNCTION__, retired, kgem->need_retire));
3319 
3320 	kgem->retire(kgem);
3321 
3322 	return retired;
3323 }
3324 
__kgem_ring_is_idle(struct kgem * kgem,int ring)3325 bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
3326 {
3327 	struct kgem_request *rq;
3328 
3329 	assert(ring < ARRAY_SIZE(kgem->requests));
3330 	assert(!list_is_empty(&kgem->requests[ring]));
3331 
3332 	rq = kgem->fence[ring];
3333 	if (rq) {
3334 		struct kgem_request *tmp;
3335 
3336 		if (__kgem_busy(kgem, rq->bo->handle)) {
3337 			DBG(("%s: last fence handle=%d still busy\n",
3338 			     __FUNCTION__, rq->bo->handle));
3339 			return false;
3340 		}
3341 
3342 		do {
3343 			tmp = list_first_entry(&kgem->requests[ring],
3344 					       struct kgem_request,
3345 					       list);
3346 			assert(tmp->ring == ring);
3347 			__kgem_retire_rq(kgem, tmp);
3348 		} while (tmp != rq);
3349 
3350 		assert(kgem->fence[ring] == NULL);
3351 		if (list_is_empty(&kgem->requests[ring]))
3352 			return true;
3353 	}
3354 
3355 	rq = list_last_entry(&kgem->requests[ring],
3356 			     struct kgem_request, list);
3357 	assert(rq->ring == ring);
3358 	assert(rq->bo);
3359 	assert(RQ(rq->bo->rq) == rq);
3360 	if (__kgem_busy(kgem, rq->bo->handle)) {
3361 		DBG(("%s: last requests handle=%d still busy\n",
3362 		     __FUNCTION__, rq->bo->handle));
3363 		kgem->fence[ring] = rq;
3364 		return false;
3365 	}
3366 
3367 	DBG(("%s: ring=%d idle (handle=%d)\n",
3368 	     __FUNCTION__, ring, rq->bo->handle));
3369 
3370 	while (!list_is_empty(&kgem->requests[ring])) {
3371 		rq = list_first_entry(&kgem->requests[ring],
3372 				      struct kgem_request,
3373 				      list);
3374 		assert(rq->ring == ring);
3375 		__kgem_retire_rq(kgem, rq);
3376 	}
3377 
3378 	return true;
3379 }
3380 
__kgem_retire_requests_upto(struct kgem * kgem,struct kgem_bo * bo)3381 bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo)
3382 {
3383 	struct kgem_request * const rq = RQ(bo->rq), *tmp;
3384 	struct list *requests = &kgem->requests[rq->ring];
3385 
3386 	DBG(("%s(handle=%d, ring=%d)\n", __FUNCTION__, bo->handle, rq->ring));
3387 
3388 	assert(rq != &kgem->static_request);
3389 	if (rq == (struct kgem_request *)kgem) {
3390 		__kgem_bo_clear_busy(bo);
3391 		return false;
3392 	}
3393 
3394 	assert(rq->ring < ARRAY_SIZE(kgem->requests));
3395 	do {
3396 		tmp = list_first_entry(requests, struct kgem_request, list);
3397 		assert(tmp->ring == rq->ring);
3398 		__kgem_retire_rq(kgem, tmp);
3399 	} while (tmp != rq);
3400 
3401 	assert(bo->needs_flush || bo->rq == NULL);
3402 	assert(bo->needs_flush || list_is_empty(&bo->request));
3403 	assert(bo->needs_flush || bo->domain == DOMAIN_NONE);
3404 	return bo->rq;
3405 }
3406 
3407 #if 0
3408 static void kgem_commit__check_reloc(struct kgem *kgem)
3409 {
3410 	struct kgem_request *rq = kgem->next_request;
3411 	struct kgem_bo *bo;
3412 	bool has_64bit = kgem->gen >= 0100;
3413 	int i;
3414 
3415 	for (i = 0; i < kgem->nreloc; i++) {
3416 		list_for_each_entry(bo, &rq->buffers, request) {
3417 			if (bo->target_handle == kgem->reloc[i].target_handle) {
3418 				uint64_t value = 0;
3419 				gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4);
3420 				assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta);
3421 				break;
3422 			}
3423 		}
3424 	}
3425 }
3426 #else
3427 #define kgem_commit__check_reloc(kgem)
3428 #endif
3429 
3430 #ifndef NDEBUG
kgem_commit__check_buffers(struct kgem * kgem)3431 static void kgem_commit__check_buffers(struct kgem *kgem)
3432 {
3433 	struct kgem_buffer *bo;
3434 
3435 	list_for_each_entry(bo, &kgem->active_buffers, base.list)
3436 		assert(bo->base.exec == NULL);
3437 }
3438 #else
3439 #define kgem_commit__check_buffers(kgem)
3440 #endif
3441 
kgem_commit(struct kgem * kgem)3442 static void kgem_commit(struct kgem *kgem)
3443 {
3444 	struct kgem_request *rq = kgem->next_request;
3445 	struct kgem_bo *bo, *next;
3446 
3447 	kgem_commit__check_reloc(kgem);
3448 
3449 	list_for_each_entry_safe(bo, next, &rq->buffers, request) {
3450 		assert(next->request.prev == &bo->request);
3451 
3452 		DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
3453 		     __FUNCTION__, bo->handle, bo->proxy != NULL,
3454 		     bo->gpu_dirty, bo->needs_flush, bo->snoop,
3455 		     (unsigned)bo->exec->offset));
3456 
3457 		assert(bo->exec);
3458 		assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
3459 		assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
3460 
3461 		bo->presumed_offset = bo->exec->offset;
3462 		bo->exec = NULL;
3463 		bo->target_handle = -1;
3464 
3465 		if (!bo->refcnt && !bo->reusable) {
3466 			assert(!bo->snoop);
3467 			assert(!bo->proxy);
3468 			kgem_bo_free(kgem, bo);
3469 			continue;
3470 		}
3471 
3472 		bo->binding.offset = 0;
3473 		bo->domain = DOMAIN_GPU;
3474 		bo->gpu_dirty = false;
3475 		bo->gtt_dirty = false;
3476 
3477 		if (bo->proxy) {
3478 			/* proxies are not used for domain tracking */
3479 			__kgem_bo_clear_busy(bo);
3480 		}
3481 
3482 		kgem->scanout_busy |= bo->scanout && bo->needs_flush;
3483 	}
3484 
3485 	if (rq == &kgem->static_request) {
3486 		struct drm_i915_gem_set_domain set_domain;
3487 
3488 		DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
3489 
3490 		VG_CLEAR(set_domain);
3491 		set_domain.handle = rq->bo->handle;
3492 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3493 		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3494 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
3495 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
3496 			kgem_throttle(kgem);
3497 		}
3498 
3499 		while (!list_is_empty(&rq->buffers)) {
3500 			bo = list_first_entry(&rq->buffers,
3501 					      struct kgem_bo,
3502 					      request);
3503 
3504 			assert(RQ(bo->rq) == rq);
3505 			assert(bo->exec == NULL);
3506 			assert(bo->domain == DOMAIN_GPU);
3507 
3508 			list_del(&bo->request);
3509 			bo->domain = DOMAIN_NONE;
3510 			bo->rq = NULL;
3511 
3512 			if (bo->refcnt == 0)
3513 				_kgem_bo_destroy(kgem, bo);
3514 		}
3515 
3516 		kgem_retire(kgem);
3517 		assert(list_is_empty(&rq->buffers));
3518 
3519 		assert(rq->bo->map__gtt == NULL);
3520 		assert(rq->bo->map__wc == NULL);
3521 		assert(rq->bo->map__cpu == NULL);
3522 		gem_close(kgem->fd, rq->bo->handle);
3523 		kgem_cleanup_cache(kgem);
3524 	} else {
3525 		assert(rq != (struct kgem_request *)kgem);
3526 		assert(rq->ring < ARRAY_SIZE(kgem->requests));
3527 		assert(rq->bo);
3528 		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
3529 		kgem->need_throttle = kgem->need_retire = 1;
3530 
3531 		if (kgem->fence[rq->ring] == NULL &&
3532 		    __kgem_busy(kgem, rq->bo->handle))
3533 			kgem->fence[rq->ring] = rq;
3534 	}
3535 
3536 	kgem->next_request = NULL;
3537 
3538 	kgem_commit__check_buffers(kgem);
3539 }
3540 
kgem_close_list(struct kgem * kgem,struct list * head)3541 static void kgem_close_list(struct kgem *kgem, struct list *head)
3542 {
3543 	while (!list_is_empty(head))
3544 		kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
3545 }
3546 
kgem_close_inactive(struct kgem * kgem)3547 static void kgem_close_inactive(struct kgem *kgem)
3548 {
3549 	unsigned int i;
3550 
3551 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3552 		kgem_close_list(kgem, &kgem->inactive[i]);
3553 		assert(list_is_empty(&kgem->inactive[i]));
3554 	}
3555 }
3556 
kgem_finish_buffers(struct kgem * kgem)3557 static void kgem_finish_buffers(struct kgem *kgem)
3558 {
3559 	struct kgem_buffer *bo, *next;
3560 
3561 	list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
3562 		DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
3563 		     __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
3564 		     bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
3565 		     bo->base.refcnt));
3566 
3567 		assert(next->base.list.prev == &bo->base.list);
3568 		assert(bo->base.io);
3569 		assert(bo->base.refcnt >= 1);
3570 
3571 		if (bo->base.refcnt > 1 && !bo->base.exec) {
3572 			DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
3573 			     __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
3574 			continue;
3575 		}
3576 
3577 		if (!bo->write) {
3578 			assert(bo->base.exec || bo->base.refcnt > 1);
3579 			goto decouple;
3580 		}
3581 
3582 		if (bo->mmapped) {
3583 			uint32_t used;
3584 
3585 			assert(!bo->need_io);
3586 
3587 			used = ALIGN(bo->used, PAGE_SIZE);
3588 			if (!DBG_NO_UPLOAD_ACTIVE &&
3589 			    used + PAGE_SIZE <= bytes(&bo->base) &&
3590 			    (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) {
3591 				DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n",
3592 				     __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt));
3593 				bo->used = used;
3594 				list_move(&bo->base.list,
3595 					  &kgem->active_buffers);
3596 				kgem->need_retire = true;
3597 				continue;
3598 			}
3599 			DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
3600 			     __FUNCTION__, bo->used, bo->mmapped));
3601 			goto decouple;
3602 		}
3603 
3604 		if (!bo->used || !bo->base.exec) {
3605 			/* Unless we replace the handle in the execbuffer,
3606 			 * then this bo will become active. So decouple it
3607 			 * from the buffer list and track it in the normal
3608 			 * manner.
3609 			 */
3610 			goto decouple;
3611 		}
3612 
3613 		assert(bo->need_io);
3614 		assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
3615 		assert(bo->base.domain != DOMAIN_GPU);
3616 
3617 		if (bo->base.refcnt == 1 &&
3618 		    bo->base.size.pages.count > 1 &&
3619 		    bo->used < bytes(&bo->base) / 2) {
3620 			struct kgem_bo *shrink;
3621 			unsigned alloc = NUM_PAGES(bo->used);
3622 
3623 			shrink = search_snoop_cache(kgem, alloc,
3624 						    CREATE_INACTIVE | CREATE_NO_RETIRE);
3625 			if (shrink) {
3626 				void *map;
3627 				int n;
3628 
3629 				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3630 				     __FUNCTION__,
3631 				     bo->used, bytes(&bo->base), bytes(shrink),
3632 				     bo->base.handle, shrink->handle));
3633 
3634 				assert(bo->used <= bytes(shrink));
3635 				map = kgem_bo_map__cpu(kgem, shrink);
3636 				if (map) {
3637 					memcpy(map, bo->mem, bo->used);
3638 
3639 					shrink->target_handle =
3640 						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3641 					for (n = 0; n < kgem->nreloc; n++) {
3642 						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3643 							uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset;
3644 							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr;
3645 							if (kgem->gen >= 0100)
3646 								kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32;
3647 
3648 							kgem->reloc[n].target_handle = shrink->target_handle;
3649 							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3650 						}
3651 					}
3652 
3653 					bo->base.exec->handle = shrink->handle;
3654 					bo->base.exec->offset = shrink->presumed_offset;
3655 					shrink->exec = bo->base.exec;
3656 					shrink->rq = bo->base.rq;
3657 					list_replace(&bo->base.request,
3658 						     &shrink->request);
3659 					list_init(&bo->base.request);
3660 					shrink->needs_flush = bo->base.gpu_dirty;
3661 
3662 					bo->base.exec = NULL;
3663 					bo->base.rq = NULL;
3664 					bo->base.gpu_dirty = false;
3665 					bo->base.needs_flush = false;
3666 					bo->used = 0;
3667 
3668 					goto decouple;
3669 				}
3670 
3671 				__kgem_bo_destroy(kgem, shrink);
3672 			}
3673 
3674 			shrink = search_linear_cache(kgem, alloc,
3675 						     CREATE_INACTIVE | CREATE_NO_RETIRE);
3676 			if (shrink) {
3677 				int n;
3678 
3679 				DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3680 				     __FUNCTION__,
3681 				     bo->used, bytes(&bo->base), bytes(shrink),
3682 				     bo->base.handle, shrink->handle));
3683 
3684 				assert(bo->used <= bytes(shrink));
3685 				if (gem_write__cachealigned(kgem->fd, shrink->handle,
3686 							    0, bo->used, bo->mem) == 0) {
3687 					shrink->target_handle =
3688 						kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3689 					for (n = 0; n < kgem->nreloc; n++) {
3690 						if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3691 							uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset;
3692 							kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr;
3693 							if (kgem->gen >= 0100)
3694 								kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32;
3695 
3696 							kgem->reloc[n].target_handle = shrink->target_handle;
3697 							kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3698 						}
3699 					}
3700 
3701 					bo->base.exec->handle = shrink->handle;
3702 					bo->base.exec->offset = shrink->presumed_offset;
3703 					shrink->exec = bo->base.exec;
3704 					shrink->rq = bo->base.rq;
3705 					list_replace(&bo->base.request,
3706 						     &shrink->request);
3707 					list_init(&bo->base.request);
3708 					shrink->needs_flush = bo->base.gpu_dirty;
3709 
3710 					bo->base.exec = NULL;
3711 					bo->base.rq = NULL;
3712 					bo->base.gpu_dirty = false;
3713 					bo->base.needs_flush = false;
3714 					bo->used = 0;
3715 
3716 					goto decouple;
3717 				}
3718 
3719 				__kgem_bo_destroy(kgem, shrink);
3720 			}
3721 		}
3722 
3723 		DBG(("%s: handle=%d, uploading %d/%d\n",
3724 		     __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
3725 		ASSERT_IDLE(kgem, bo->base.handle);
3726 		assert(bo->used <= bytes(&bo->base));
3727 		gem_write__cachealigned(kgem->fd, bo->base.handle,
3728 					0, bo->used, bo->mem);
3729 		bo->need_io = 0;
3730 
3731 decouple:
3732 		DBG(("%s: releasing handle=%d\n",
3733 		     __FUNCTION__, bo->base.handle));
3734 		list_del(&bo->base.list);
3735 		kgem_bo_unref(kgem, &bo->base);
3736 	}
3737 }
3738 
kgem_cleanup(struct kgem * kgem)3739 static void kgem_cleanup(struct kgem *kgem)
3740 {
3741 	int n;
3742 
3743 	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3744 		while (!list_is_empty(&kgem->requests[n])) {
3745 			struct kgem_request *rq;
3746 
3747 			rq = list_first_entry(&kgem->requests[n],
3748 					      struct kgem_request,
3749 					      list);
3750 			assert(rq->ring == n);
3751 			while (!list_is_empty(&rq->buffers)) {
3752 				struct kgem_bo *bo;
3753 
3754 				bo = list_first_entry(&rq->buffers,
3755 						      struct kgem_bo,
3756 						      request);
3757 
3758 				bo->exec = NULL;
3759 				bo->gpu_dirty = false;
3760 				__kgem_bo_clear_busy(bo);
3761 				if (bo->refcnt == 0)
3762 					kgem_bo_free(kgem, bo);
3763 			}
3764 
3765 			if (--rq->bo->refcnt == 0)
3766 				kgem_bo_free(kgem, rq->bo);
3767 
3768 			__kgem_request_free(rq);
3769 		}
3770 	}
3771 
3772 	kgem_close_inactive(kgem);
3773 }
3774 
3775 static int
kgem_batch_write(struct kgem * kgem,struct kgem_bo * bo,uint32_t size)3776 kgem_batch_write(struct kgem *kgem,
3777 		 struct kgem_bo *bo,
3778 		 uint32_t size)
3779 {
3780 	char *ptr;
3781 	int ret;
3782 
3783 	assert(bo->exec == NULL);
3784 	assert(bo->rq == NULL);
3785 	assert(!__kgem_busy(kgem, bo->handle));
3786 
3787 #if DBG_NO_EXEC
3788 	{
3789 		uint32_t batch[] = { MI_BATCH_BUFFER_END, 0};
3790 		return gem_write(kgem->fd, bo->handle, 0, sizeof(batch), batch);
3791 	}
3792 #endif
3793 
3794 	assert(!bo->scanout);
3795 retry:
3796 	ptr = NULL;
3797 	if (bo->domain == DOMAIN_CPU || kgem->has_llc) {
3798 		ptr = bo->map__cpu;
3799 		if (ptr == NULL)
3800 			ptr = __kgem_bo_map__cpu(kgem, bo);
3801 	} else if (kgem->has_wc_mmap) {
3802 		ptr = bo->map__wc;
3803 		if (ptr == NULL)
3804 			ptr = __kgem_bo_map__wc(kgem, bo);
3805 	}
3806 	if (ptr) {
3807 		memcpy(ptr, kgem->batch, sizeof(uint32_t)*kgem->nbatch);
3808 		if (kgem->surface != kgem->batch_size) {
3809 			ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3810 			ret -= sizeof(uint32_t) * kgem->surface;
3811 			ptr += size - ret;
3812 			memcpy(ptr, kgem->batch + kgem->surface,
3813 			       (kgem->batch_size - kgem->surface)*sizeof(uint32_t));
3814 		}
3815 		return 0;
3816 	}
3817 
3818 	/* If there is no surface data, just upload the batch */
3819 	if (kgem->surface == kgem->batch_size) {
3820 		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3821 						   0, sizeof(uint32_t)*kgem->nbatch,
3822 						   kgem->batch)) == 0)
3823 			return 0;
3824 
3825 		goto expire;
3826 	}
3827 
3828 	/* Are the batch pages conjoint with the surface pages? */
3829 	if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
3830 		assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
3831 		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3832 						   0, kgem->batch_size*sizeof(uint32_t),
3833 						   kgem->batch)) == 0)
3834 			return 0;
3835 
3836 		goto expire;
3837 	}
3838 
3839 	/* Disjoint surface/batch, upload separately */
3840 	if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3841 					   0, sizeof(uint32_t)*kgem->nbatch,
3842 					   kgem->batch)))
3843 		goto expire;
3844 
3845 	ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3846 	ret -= sizeof(uint32_t) * kgem->surface;
3847 	assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
3848 	if (gem_write(kgem->fd, bo->handle,
3849 		      size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
3850 		      kgem->batch + kgem->surface))
3851 		goto expire;
3852 
3853 	return 0;
3854 
3855 expire:
3856 	assert(ret != EINVAL);
3857 
3858 	(void)__kgem_throttle_retire(kgem, 0);
3859 	if (kgem_expire_cache(kgem))
3860 		goto retry;
3861 
3862 	if (kgem_cleanup_cache(kgem))
3863 		goto retry;
3864 
3865 	ERR(("%s: failed to write batch (handle=%d): %d\n",
3866 	     __FUNCTION__, bo->handle, -ret));
3867 	return ret;
3868 }
3869 
kgem_reset(struct kgem * kgem)3870 void kgem_reset(struct kgem *kgem)
3871 {
3872 	if (kgem->next_request) {
3873 		struct kgem_request *rq = kgem->next_request;
3874 
3875 		while (!list_is_empty(&rq->buffers)) {
3876 			struct kgem_bo *bo =
3877 				list_first_entry(&rq->buffers,
3878 						 struct kgem_bo,
3879 						 request);
3880 			list_del(&bo->request);
3881 
3882 			assert(RQ(bo->rq) == rq);
3883 
3884 			bo->binding.offset = 0;
3885 			bo->exec = NULL;
3886 			bo->target_handle = -1;
3887 			bo->gpu_dirty = false;
3888 
3889 			if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
3890 				assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3891 				list_add(&bo->request, &kgem->flushing);
3892 				bo->rq = (void *)kgem;
3893 				kgem->need_retire = true;
3894 			} else
3895 				__kgem_bo_clear_busy(bo);
3896 
3897 			if (bo->refcnt || bo->rq)
3898 				continue;
3899 
3900 			kgem_bo_move_to_cache(kgem, bo);
3901 		}
3902 
3903 		if (rq != &kgem->static_request) {
3904 			list_init(&rq->list);
3905 			__kgem_request_free(rq);
3906 		}
3907 	}
3908 
3909 	kgem->nfence = 0;
3910 	kgem->nexec = 0;
3911 	kgem->nreloc = 0;
3912 	kgem->nreloc__self = 0;
3913 	kgem->aperture = 0;
3914 	kgem->aperture_fenced = 0;
3915 	kgem->aperture_max_fence = 0;
3916 	kgem->nbatch = 0;
3917 	kgem->surface = kgem->batch_size;
3918 	kgem->mode = KGEM_NONE;
3919 	kgem->needs_semaphore = false;
3920 	kgem->needs_reservation = false;
3921 	kgem->flush = 0;
3922 	kgem->batch_flags = kgem->batch_flags_base;
3923 	assert(kgem->batch);
3924 
3925 	kgem->next_request = __kgem_request_alloc(kgem);
3926 
3927 	kgem_sna_reset(kgem);
3928 }
3929 
compact_batch_surface(struct kgem * kgem,int * shrink)3930 static int compact_batch_surface(struct kgem *kgem, int *shrink)
3931 {
3932 	int size, n;
3933 
3934 	if (!kgem->has_relaxed_delta)
3935 		return kgem->batch_size * sizeof(uint32_t);
3936 
3937 	/* See if we can pack the contents into one or two pages */
3938 	n = ALIGN(kgem->batch_size, 1024);
3939 	size = n - kgem->surface + kgem->nbatch;
3940 	size = ALIGN(size, 1024);
3941 
3942 	*shrink = (n - size) * sizeof(uint32_t);
3943 	return size * sizeof(uint32_t);
3944 }
3945 
first_available(struct kgem * kgem,struct list * list)3946 static struct kgem_bo *first_available(struct kgem *kgem, struct list *list)
3947 {
3948 	struct kgem_bo *bo;
3949 
3950 	list_for_each_entry(bo, list, list) {
3951 		assert(bo->refcnt > 0);
3952 
3953 		if (bo->rq) {
3954 			assert(RQ(bo->rq)->bo == bo);
3955 			if (__kgem_busy(kgem, bo->handle))
3956 				break;
3957 
3958 			__kgem_retire_rq(kgem, RQ(bo->rq));
3959 			assert(bo->rq == NULL);
3960 		}
3961 
3962 		if (bo->refcnt > 1)
3963 			continue;
3964 
3965 		list_move_tail(&bo->list, list);
3966 		return kgem_bo_reference(bo);
3967 	}
3968 
3969 	return NULL;
3970 }
3971 
3972 static struct kgem_bo *
kgem_create_batch(struct kgem * kgem)3973 kgem_create_batch(struct kgem *kgem)
3974 {
3975 	struct kgem_bo *bo;
3976 	int size, shrink = 0;
3977 
3978 #if !DBG_NO_SHRINK_BATCHES
3979 	if (kgem->surface != kgem->batch_size)
3980 		size = compact_batch_surface(kgem, &shrink);
3981 	else
3982 		size = kgem->nbatch * sizeof(uint32_t);
3983 
3984 	if (size <= 4096) {
3985 		bo = first_available(kgem, &kgem->pinned_batches[0]);
3986 		if (bo)
3987 			goto write;
3988 	}
3989 
3990 	if (size <= 16384) {
3991 		bo = first_available(kgem, &kgem->pinned_batches[1]);
3992 		if (bo)
3993 			goto write;
3994 	}
3995 
3996 	if (kgem->gen == 020) {
3997 		bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3998 		if (bo)
3999 			goto write;
4000 
4001 		/* Nothing available for reuse, rely on the kernel wa */
4002 		if (kgem->has_pinned_batches) {
4003 			bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
4004 			if (bo) {
4005 				kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
4006 				goto write;
4007 			}
4008 		}
4009 
4010 		if (size < 16384) {
4011 			bo = list_first_entry(&kgem->pinned_batches[size > 4096],
4012 					      struct kgem_bo,
4013 					      list);
4014 			list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
4015 
4016 			DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
4017 			if (kgem_bo_wait(kgem, bo))
4018 				return NULL;
4019 
4020 			kgem_retire(kgem);
4021 			assert(bo->rq == NULL);
4022 			bo = kgem_bo_reference(bo);
4023 			goto write;
4024 		}
4025 	}
4026 #else
4027 	if (kgem->surface != kgem->batch_size)
4028 		size = kgem->batch_size * sizeof(uint32_t);
4029 	else
4030 		size = kgem->nbatch * sizeof(uint32_t);
4031 #endif
4032 
4033 	if (!kgem->batch_bo || !kgem->has_llc) {
4034 		bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
4035 		if (bo) {
4036 write:
4037 			kgem_fixup_relocs(kgem, bo, shrink);
4038 			if (kgem_batch_write(kgem, bo, size)) {
4039 				kgem_bo_destroy(kgem, bo);
4040 				return NULL;
4041 			}
4042 			return bo;
4043 		}
4044 	}
4045 
4046 	return kgem_new_batch(kgem);
4047 }
4048 
4049 #if !NDEBUG
dump_file(const char * path)4050 static bool dump_file(const char *path)
4051 {
4052 	FILE *file;
4053 	size_t len = 0;
4054 	char *line = NULL;
4055 
4056 	file = fopen(path, "r");
4057 	if (file == NULL)
4058 		return false;
4059 
4060 	while (getline(&line, &len, file) != -1)
4061 		ErrorF("%s", line);
4062 
4063 	free(line);
4064 	fclose(file);
4065 	return true;
4066 }
4067 
dump_debugfs(struct kgem * kgem,const char * name)4068 static void dump_debugfs(struct kgem *kgem, const char *name)
4069 {
4070 	char path[80];
4071 	int minor = kgem_get_minor(kgem);
4072 
4073 	if (minor < 0)
4074 		return;
4075 
4076 	sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name);
4077 	if (dump_file(path))
4078 		return;
4079 
4080 	sprintf(path, "/debug/dri/%d/%s", minor, name);
4081 	if (dump_file(path))
4082 		return;
4083 }
4084 
dump_gtt_info(struct kgem * kgem)4085 static void dump_gtt_info(struct kgem *kgem)
4086 {
4087 	dump_debugfs(kgem, "i915_gem_gtt");
4088 }
4089 
dump_fence_regs(struct kgem * kgem)4090 static void dump_fence_regs(struct kgem *kgem)
4091 {
4092 	dump_debugfs(kgem, "i915_gem_fence_regs");
4093 }
4094 #endif
4095 
do_execbuf(struct kgem * kgem,struct drm_i915_gem_execbuffer2 * execbuf)4096 static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf)
4097 {
4098 	int ret;
4099 
4100 retry:
4101 	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
4102 	if (ret == 0)
4103 		return 0;
4104 
4105 	DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret));
4106 	(void)__kgem_throttle_retire(kgem, 0);
4107 	if (kgem_expire_cache(kgem))
4108 		goto retry;
4109 
4110 	if (kgem_cleanup_cache(kgem))
4111 		goto retry;
4112 
4113 	/* last gasp */
4114 	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
4115 	if (ret != -ENOSPC)
4116 		return ret;
4117 
4118 	/* One final trick up our sleeve for when we run out of space.
4119 	 * We turn everything off to free up our pinned framebuffers,
4120 	 * sprites and cursors, and try just one more time.
4121 	 */
4122 
4123 	xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
4124 		   "Failed to submit rendering commands, trying again with outputs disabled.\n");
4125 
4126 	if (sna_mode_disable(__to_sna(kgem))) {
4127 		kgem_cleanup_cache(kgem);
4128 		ret = do_ioctl(kgem->fd,
4129 			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
4130 			       execbuf);
4131 		DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret));
4132 		sna_mode_enable(__to_sna(kgem));
4133 	}
4134 
4135 	return ret;
4136 }
4137 
_kgem_submit(struct kgem * kgem)4138 void _kgem_submit(struct kgem *kgem)
4139 {
4140 	struct kgem_request *rq;
4141 	uint32_t batch_end;
4142 	int i, ret;
4143 
4144 	assert(!DBG_NO_HW);
4145 	assert(!kgem->wedged);
4146 
4147 	assert(kgem->nbatch);
4148 	assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
4149 	assert(kgem->nbatch <= kgem->surface);
4150 
4151 	batch_end = kgem_end_batch(kgem);
4152 	kgem_sna_flush(kgem);
4153 
4154 	DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n",
4155 	     kgem->mode, kgem->ring, kgem->batch_flags,
4156 	     batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
4157 	     kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced));
4158 
4159 	assert(kgem->nbatch <= kgem->batch_size);
4160 	assert(kgem->nbatch <= kgem->surface);
4161 	assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
4162 	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
4163 	assert(kgem->nfence <= kgem->fence_max);
4164 
4165 	kgem_finish_buffers(kgem);
4166 
4167 #if SHOW_BATCH_BEFORE
4168 	__kgem_batch_debug(kgem, batch_end);
4169 #endif
4170 
4171 	rq = kgem->next_request;
4172 	assert(rq->bo == NULL);
4173 
4174 	rq->bo = kgem_create_batch(kgem);
4175 	if (rq->bo) {
4176 		struct drm_i915_gem_execbuffer2 execbuf;
4177 
4178 		assert(!rq->bo->needs_flush);
4179 
4180 		i = kgem->nexec++;
4181 		kgem->exec[i].handle = rq->bo->handle;
4182 		kgem->exec[i].relocation_count = kgem->nreloc;
4183 		kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
4184 		kgem->exec[i].alignment = 0;
4185 		kgem->exec[i].offset = rq->bo->presumed_offset;
4186 		/* Make sure the kernel releases any fence, ignored if gen4+ */
4187 		kgem->exec[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4188 		kgem->exec[i].rsvd1 = 0;
4189 		kgem->exec[i].rsvd2 = 0;
4190 
4191 		rq->bo->exec = &kgem->exec[i];
4192 		rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
4193 		list_add(&rq->bo->request, &rq->buffers);
4194 		rq->ring = kgem->ring == KGEM_BLT;
4195 
4196 		memset(&execbuf, 0, sizeof(execbuf));
4197 		execbuf.buffers_ptr = (uintptr_t)kgem->exec;
4198 		execbuf.buffer_count = kgem->nexec;
4199 		if (kgem->gen < 030)
4200 			execbuf.batch_len = batch_end*sizeof(uint32_t);
4201 		execbuf.flags = kgem->ring | kgem->batch_flags;
4202 
4203 		if (DBG_DUMP) {
4204 			int fd = open("/tmp/i915-batchbuffers.dump",
4205 				      O_WRONLY | O_CREAT | O_APPEND,
4206 				      0666);
4207 			if (fd != -1) {
4208 				ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
4209 				fd = close(fd);
4210 			}
4211 		}
4212 
4213 		ret = do_execbuf(kgem, &execbuf);
4214 	} else
4215 		ret = -ENOMEM;
4216 
4217 	if (ret < 0) {
4218 		kgem_throttle(kgem);
4219 		if (!kgem->wedged) {
4220 			xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
4221 				   "Failed to submit rendering commands (%s), disabling acceleration.\n",
4222 				   strerror(-ret));
4223 			__kgem_set_wedged(kgem);
4224 		}
4225 
4226 #if !NDEBUG
4227 		ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
4228 		       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
4229 		       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
4230 
4231 		for (i = 0; i < kgem->nexec; i++) {
4232 			struct kgem_bo *bo, *found = NULL;
4233 
4234 			list_for_each_entry(bo, &kgem->next_request->buffers, request) {
4235 				if (bo->handle == kgem->exec[i].handle) {
4236 					found = bo;
4237 					break;
4238 				}
4239 			}
4240 			ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
4241 			       i,
4242 			       kgem->exec[i].handle,
4243 			       (int)kgem->exec[i].offset,
4244 			       found ? kgem_bo_size(found) : -1,
4245 			       found ? found->tiling : -1,
4246 			       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
4247 			       found ? found->snoop : -1,
4248 			       found ? found->purged : -1);
4249 		}
4250 		for (i = 0; i < kgem->nreloc; i++) {
4251 			ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
4252 			       i,
4253 			       (int)kgem->reloc[i].offset,
4254 			       kgem->reloc[i].target_handle,
4255 			       kgem->reloc[i].delta,
4256 			       kgem->reloc[i].read_domains,
4257 			       kgem->reloc[i].write_domain,
4258 			       (int)kgem->reloc[i].presumed_offset);
4259 		}
4260 
4261 		{
4262 			struct drm_i915_gem_get_aperture aperture;
4263 			if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
4264 				ErrorF("Aperture size %lld, available %lld\n",
4265 				       (long long)aperture.aper_size,
4266 				       (long long)aperture.aper_available_size);
4267 		}
4268 
4269 		if (ret == -ENOSPC)
4270 			dump_gtt_info(kgem);
4271 		if (ret == -EDEADLK)
4272 			dump_fence_regs(kgem);
4273 
4274 		if (DEBUG_SYNC) {
4275 			int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
4276 			if (fd != -1) {
4277 				int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
4278 				assert(ignored == batch_end*sizeof(uint32_t));
4279 				close(fd);
4280 			}
4281 
4282 			FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
4283 		}
4284 #endif
4285 	} else {
4286 		if (DEBUG_SYNC) {
4287 			struct drm_i915_gem_set_domain set_domain;
4288 
4289 			VG_CLEAR(set_domain);
4290 			set_domain.handle = rq->bo->handle;
4291 			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
4292 			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
4293 
4294 			ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
4295 		}
4296 
4297 #if SHOW_BATCH_AFTER
4298 		if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0)
4299 			__kgem_batch_debug(kgem, batch_end);
4300 #endif
4301 
4302 		kgem_commit(kgem);
4303 	}
4304 
4305 	if (unlikely(kgem->wedged))
4306 		kgem_cleanup(kgem);
4307 
4308 	kgem_reset(kgem);
4309 
4310 	assert(kgem->next_request != NULL);
4311 }
4312 
kgem_throttle(struct kgem * kgem)4313 void kgem_throttle(struct kgem *kgem)
4314 {
4315 	if (unlikely(kgem->wedged))
4316 		return;
4317 
4318 	if (__kgem_throttle(kgem, true)) {
4319 		xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
4320 			   "Detected a hung GPU, disabling acceleration.\n");
4321 		__kgem_set_wedged(kgem);
4322 		kgem->need_throttle = false;
4323 	}
4324 }
4325 
kgem_is_wedged(struct kgem * kgem)4326 int kgem_is_wedged(struct kgem *kgem)
4327 {
4328 	return __kgem_throttle(kgem, true);
4329 }
4330 
kgem_purge_cache(struct kgem * kgem)4331 static void kgem_purge_cache(struct kgem *kgem)
4332 {
4333 	struct kgem_bo *bo, *next;
4334 	int i;
4335 
4336 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4337 		list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
4338 			if (!kgem_bo_is_retained(kgem, bo)) {
4339 				DBG(("%s: purging %d\n",
4340 				     __FUNCTION__, bo->handle));
4341 				kgem_bo_free(kgem, bo);
4342 			}
4343 		}
4344 	}
4345 
4346 	kgem->need_purge = false;
4347 }
4348 
kgem_clean_scanout_cache(struct kgem * kgem)4349 void kgem_clean_scanout_cache(struct kgem *kgem)
4350 {
4351 	while (!list_is_empty(&kgem->scanout)) {
4352 		struct kgem_bo *bo;
4353 
4354 		bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
4355 
4356 		assert(bo->scanout);
4357 		assert(!bo->refcnt);
4358 		assert(!bo->prime);
4359 		assert(bo->proxy == NULL);
4360 
4361 		if (bo->exec || __kgem_busy(kgem, bo->handle))
4362 			break;
4363 
4364 		DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
4365 		     __FUNCTION__, bo->handle, bo->delta, bo->reusable));
4366 		list_del(&bo->list);
4367 
4368 		kgem_bo_rmfb(kgem, bo);
4369 		bo->scanout = false;
4370 
4371 		if (!bo->purged) {
4372 			bo->reusable = true;
4373 			if (kgem->has_llc &&
4374 			    !gem_set_caching(kgem->fd, bo->handle, SNOOPED))
4375 				bo->reusable = false;
4376 
4377 		}
4378 
4379 		__kgem_bo_destroy(kgem, bo);
4380 	}
4381 }
4382 
kgem_clean_large_cache(struct kgem * kgem)4383 void kgem_clean_large_cache(struct kgem *kgem)
4384 {
4385 	while (!list_is_empty(&kgem->large_inactive)) {
4386 		kgem_bo_free(kgem,
4387 			     list_first_entry(&kgem->large_inactive,
4388 					      struct kgem_bo, list));
4389 
4390 	}
4391 }
4392 
kgem_expire_cache(struct kgem * kgem)4393 bool kgem_expire_cache(struct kgem *kgem)
4394 {
4395 	time_t now, expire;
4396 	struct kgem_bo *bo;
4397 	unsigned int size = 0, count = 0;
4398 	bool idle;
4399 	unsigned int i;
4400 
4401 	if (!time(&now))
4402 		return false;
4403 
4404 	while (__kgem_freed_bo) {
4405 		bo = __kgem_freed_bo;
4406 		__kgem_freed_bo = *(struct kgem_bo **)bo;
4407 		free(bo);
4408 	}
4409 
4410 	while (__kgem_freed_request) {
4411 		struct kgem_request *rq = __kgem_freed_request;
4412 		__kgem_freed_request = *(struct kgem_request **)rq;
4413 		free(rq);
4414 	}
4415 
4416 	kgem_clean_large_cache(kgem);
4417 	if (__to_sna(kgem)->scrn->vtSema)
4418 		kgem_clean_scanout_cache(kgem);
4419 
4420 	expire = 0;
4421 	list_for_each_entry(bo, &kgem->snoop, list) {
4422 		if (bo->delta) {
4423 			expire = now - MAX_INACTIVE_TIME/2;
4424 			break;
4425 		}
4426 
4427 		assert(now);
4428 		bo->delta = now;
4429 	}
4430 	if (expire) {
4431 		while (!list_is_empty(&kgem->snoop)) {
4432 			bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
4433 
4434 			if (bo->delta > expire)
4435 				break;
4436 
4437 			kgem_bo_free(kgem, bo);
4438 		}
4439 	}
4440 #ifdef DEBUG_MEMORY
4441 	{
4442 		long snoop_size = 0;
4443 		int snoop_count = 0;
4444 		list_for_each_entry(bo, &kgem->snoop, list)
4445 			snoop_count++, snoop_size += bytes(bo);
4446 		DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
4447 		     __FUNCTION__, snoop_count, snoop_size));
4448 	}
4449 #endif
4450 
4451 	kgem_retire(kgem);
4452 	if (unlikely(kgem->wedged))
4453 		kgem_cleanup(kgem);
4454 
4455 	kgem->expire(kgem);
4456 
4457 	if (kgem->need_purge)
4458 		kgem_purge_cache(kgem);
4459 
4460 	if (kgem->need_retire)
4461 		kgem_retire(kgem);
4462 
4463 	expire = 0;
4464 	idle = true;
4465 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4466 		idle &= list_is_empty(&kgem->inactive[i]);
4467 		list_for_each_entry(bo, &kgem->inactive[i], list) {
4468 			if (bo->delta) {
4469 				expire = now - MAX_INACTIVE_TIME;
4470 				break;
4471 			}
4472 
4473 			assert(now);
4474 			kgem_bo_set_purgeable(kgem, bo);
4475 			bo->delta = now;
4476 		}
4477 	}
4478 	if (expire == 0) {
4479 		DBG(("%s: idle? %d\n", __FUNCTION__, idle));
4480 		kgem->need_expire = !idle;
4481 		return false;
4482 	}
4483 
4484 	idle = true;
4485 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4486 		struct list preserve;
4487 
4488 		list_init(&preserve);
4489 		while (!list_is_empty(&kgem->inactive[i])) {
4490 			bo = list_last_entry(&kgem->inactive[i],
4491 					     struct kgem_bo, list);
4492 
4493 			if (bo->delta > expire) {
4494 				idle = false;
4495 				break;
4496 			}
4497 
4498 			if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) {
4499 				idle = false;
4500 				list_move_tail(&bo->list, &preserve);
4501 			} else {
4502 				count++;
4503 				size += bytes(bo);
4504 				kgem_bo_free(kgem, bo);
4505 				DBG(("%s: expiring handle=%d\n",
4506 				     __FUNCTION__, bo->handle));
4507 			}
4508 		}
4509 		list_splice_tail(&preserve, &kgem->inactive[i]);
4510 	}
4511 
4512 #ifdef DEBUG_MEMORY
4513 	{
4514 		long inactive_size = 0;
4515 		int inactive_count = 0;
4516 		for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
4517 			list_for_each_entry(bo, &kgem->inactive[i], list)
4518 				inactive_count++, inactive_size += bytes(bo);
4519 		DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
4520 		     __FUNCTION__, inactive_count, inactive_size));
4521 	}
4522 #endif
4523 
4524 	DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
4525 	     __FUNCTION__, count, size, idle));
4526 
4527 	kgem->need_expire = !idle;
4528 	return count;
4529 	(void)count;
4530 	(void)size;
4531 }
4532 
kgem_cleanup_cache(struct kgem * kgem)4533 bool kgem_cleanup_cache(struct kgem *kgem)
4534 {
4535 	unsigned int i;
4536 	int n;
4537 
4538 	DBG(("%s\n", __FUNCTION__));
4539 
4540 	/* sync to the most recent request */
4541 	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
4542 		if (!list_is_empty(&kgem->requests[n])) {
4543 			struct kgem_request *rq;
4544 
4545 			rq = list_last_entry(&kgem->requests[n],
4546 					     struct kgem_request,
4547 					     list);
4548 
4549 			DBG(("%s: sync on cleanup\n", __FUNCTION__));
4550 			assert(rq->ring == n);
4551 			assert(rq->bo);
4552 			assert(RQ(rq->bo->rq) == rq);
4553 			kgem_bo_wait(kgem, rq->bo);
4554 		}
4555 		assert(list_is_empty(&kgem->requests[n]));
4556 	}
4557 
4558 	kgem_retire(kgem);
4559 	kgem_cleanup(kgem);
4560 
4561 	DBG(("%s: need_expire?=%d\n", __FUNCTION__, kgem->need_expire));
4562 	if (!kgem->need_expire)
4563 		return false;
4564 
4565 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4566 		while (!list_is_empty(&kgem->inactive[i]))
4567 			kgem_bo_free(kgem,
4568 				     list_last_entry(&kgem->inactive[i],
4569 						     struct kgem_bo, list));
4570 	}
4571 
4572 	kgem_clean_large_cache(kgem);
4573 	kgem_clean_scanout_cache(kgem);
4574 
4575 	while (!list_is_empty(&kgem->snoop))
4576 		kgem_bo_free(kgem,
4577 			     list_last_entry(&kgem->snoop,
4578 					     struct kgem_bo, list));
4579 
4580 	while (__kgem_freed_bo) {
4581 		struct kgem_bo *bo = __kgem_freed_bo;
4582 		__kgem_freed_bo = *(struct kgem_bo **)bo;
4583 		free(bo);
4584 	}
4585 
4586 	kgem->need_purge = false;
4587 	kgem->need_expire = false;
4588 
4589 	DBG(("%s: complete\n", __FUNCTION__));
4590 	return true;
4591 }
4592 
4593 static struct kgem_bo *
search_linear_cache(struct kgem * kgem,unsigned int num_pages,unsigned flags)4594 search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
4595 {
4596 	struct kgem_bo *bo, *first = NULL;
4597 	bool use_active = (flags & CREATE_INACTIVE) == 0;
4598 	struct list *cache;
4599 
4600 	DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
4601 	     __FUNCTION__, num_pages, flags, use_active,
4602 	     num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
4603 	     MAX_CACHE_SIZE / PAGE_SIZE));
4604 
4605 	assert(num_pages);
4606 
4607 	if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
4608 		DBG(("%s: searching large buffers\n", __FUNCTION__));
4609 retry_large:
4610 		cache = use_active ? &kgem->large : &kgem->large_inactive;
4611 		list_for_each_entry_safe(bo, first, cache, list) {
4612 			assert(bo->refcnt == 0);
4613 			assert(bo->reusable);
4614 			assert(!bo->scanout);
4615 
4616 			if (num_pages > num_pages(bo))
4617 				goto discard;
4618 
4619 			if (bo->tiling != I915_TILING_NONE) {
4620 				if (use_active && kgem->gen < 040)
4621 					goto discard;
4622 
4623 				if (!kgem_set_tiling(kgem, bo,
4624 						    I915_TILING_NONE, 0))
4625 					goto discard;
4626 			}
4627 			assert(bo->tiling == I915_TILING_NONE);
4628 			bo->pitch = 0;
4629 
4630 			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
4631 				goto discard;
4632 
4633 			list_del(&bo->list);
4634 			if (RQ(bo->rq) == (void *)kgem) {
4635 				assert(bo->exec == NULL);
4636 				list_del(&bo->request);
4637 			}
4638 
4639 			bo->delta = 0;
4640 			assert_tiling(kgem, bo);
4641 			return bo;
4642 
4643 discard:
4644 			if (!use_active)
4645 				kgem_bo_free(kgem, bo);
4646 		}
4647 
4648 		if (use_active) {
4649 			use_active = false;
4650 			goto retry_large;
4651 		}
4652 
4653 		if (__kgem_throttle_retire(kgem, flags))
4654 			goto retry_large;
4655 
4656 		return NULL;
4657 	}
4658 
4659 	if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
4660 		DBG(("%s: inactive and cache bucket empty\n",
4661 		     __FUNCTION__));
4662 
4663 		if (flags & CREATE_NO_RETIRE) {
4664 			DBG(("%s: can not retire\n", __FUNCTION__));
4665 			return NULL;
4666 		}
4667 
4668 		if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
4669 			DBG(("%s: active cache bucket empty\n", __FUNCTION__));
4670 			return NULL;
4671 		}
4672 
4673 		if (!__kgem_throttle_retire(kgem, flags)) {
4674 			DBG(("%s: nothing retired\n", __FUNCTION__));
4675 			return NULL;
4676 		}
4677 
4678 		if (list_is_empty(inactive(kgem, num_pages))) {
4679 			DBG(("%s: active cache bucket still empty after retire\n",
4680 			     __FUNCTION__));
4681 			return NULL;
4682 		}
4683 	}
4684 
4685 	if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4686 		int for_cpu = !!(flags & CREATE_CPU_MAP);
4687 		DBG(("%s: searching for inactive %s map\n",
4688 		     __FUNCTION__, for_cpu ? "cpu" : "gtt"));
4689 		cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
4690 		list_for_each_entry(bo, cache, vma) {
4691 			assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
4692 			assert(bucket(bo) == cache_bucket(num_pages));
4693 			assert(bo->proxy == NULL);
4694 			assert(bo->rq == NULL);
4695 			assert(bo->exec == NULL);
4696 			assert(!bo->scanout);
4697 
4698 			if (num_pages > num_pages(bo)) {
4699 				DBG(("inactive too small: %d < %d\n",
4700 				     num_pages(bo), num_pages));
4701 				continue;
4702 			}
4703 
4704 			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4705 				kgem_bo_free(kgem, bo);
4706 				break;
4707 			}
4708 
4709 			if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) {
4710 				kgem_bo_free(kgem, bo);
4711 				break;
4712 			}
4713 
4714 			kgem_bo_remove_from_inactive(kgem, bo);
4715 			assert(list_is_empty(&bo->vma));
4716 			assert(list_is_empty(&bo->list));
4717 
4718 			assert(bo->tiling == I915_TILING_NONE);
4719 			assert(bo->pitch == 0);
4720 			bo->delta = 0;
4721 			DBG(("  %s: found handle=%d (num_pages=%d) in linear vma cache\n",
4722 			     __FUNCTION__, bo->handle, num_pages(bo)));
4723 			assert(use_active || bo->domain != DOMAIN_GPU);
4724 			assert(!bo->needs_flush);
4725 			assert_tiling(kgem, bo);
4726 			ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4727 			return bo;
4728 		}
4729 
4730 		if (flags & CREATE_EXACT)
4731 			return NULL;
4732 
4733 		if (flags & CREATE_CPU_MAP && !kgem->has_llc)
4734 			return NULL;
4735 	}
4736 
4737 	cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
4738 	list_for_each_entry(bo, cache, list) {
4739 		assert(bo->refcnt == 0);
4740 		assert(bo->reusable);
4741 		assert(!!bo->rq == !!use_active);
4742 		assert(bo->proxy == NULL);
4743 		assert(!bo->scanout);
4744 
4745 		if (num_pages > num_pages(bo))
4746 			continue;
4747 
4748 		if (use_active &&
4749 		    kgem->gen <= 040 &&
4750 		    bo->tiling != I915_TILING_NONE)
4751 			continue;
4752 
4753 		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4754 			kgem_bo_free(kgem, bo);
4755 			break;
4756 		}
4757 
4758 		if (I915_TILING_NONE != bo->tiling) {
4759 			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
4760 				continue;
4761 
4762 			if (first)
4763 				continue;
4764 
4765 			if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0))
4766 				continue;
4767 		}
4768 		assert(bo->tiling == I915_TILING_NONE);
4769 		bo->pitch = 0;
4770 
4771 		if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
4772 			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4773 				int for_cpu = !!(flags & CREATE_CPU_MAP);
4774 				if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
4775 					if (first != NULL)
4776 						break;
4777 
4778 					first = bo;
4779 					continue;
4780 				}
4781 			} else {
4782 				if (first != NULL)
4783 					break;
4784 
4785 				first = bo;
4786 				continue;
4787 			}
4788 		} else {
4789 			if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo))
4790 				continue;
4791 
4792 			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4793 				if (first != NULL)
4794 					break;
4795 
4796 				first = bo;
4797 				continue;
4798 			}
4799 		}
4800 
4801 		if (use_active)
4802 			kgem_bo_remove_from_active(kgem, bo);
4803 		else
4804 			kgem_bo_remove_from_inactive(kgem, bo);
4805 
4806 		assert(bo->tiling == I915_TILING_NONE);
4807 		assert(bo->pitch == 0);
4808 		bo->delta = 0;
4809 		DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
4810 		     __FUNCTION__, bo->handle, num_pages(bo),
4811 		     use_active ? "active" : "inactive"));
4812 		assert(list_is_empty(&bo->list));
4813 		assert(list_is_empty(&bo->vma));
4814 		assert(use_active || bo->domain != DOMAIN_GPU);
4815 		assert(!bo->needs_flush || use_active);
4816 		assert_tiling(kgem, bo);
4817 		ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4818 		return bo;
4819 	}
4820 
4821 	if (first) {
4822 		assert(first->tiling == I915_TILING_NONE);
4823 
4824 		if (use_active)
4825 			kgem_bo_remove_from_active(kgem, first);
4826 		else
4827 			kgem_bo_remove_from_inactive(kgem, first);
4828 
4829 		first->pitch = 0;
4830 		first->delta = 0;
4831 		DBG(("  %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
4832 		     __FUNCTION__, first->handle, num_pages(first),
4833 		     use_active ? "active" : "inactive"));
4834 		assert(list_is_empty(&first->list));
4835 		assert(list_is_empty(&first->vma));
4836 		assert(use_active || first->domain != DOMAIN_GPU);
4837 		assert(!first->needs_flush || use_active);
4838 		ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
4839 		return first;
4840 	}
4841 
4842 	return NULL;
4843 }
4844 
kgem_create_for_name(struct kgem * kgem,uint32_t name)4845 struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
4846 {
4847 	struct drm_gem_open open_arg;
4848 	struct drm_i915_gem_get_tiling tiling;
4849 	struct kgem_bo *bo;
4850 
4851 	DBG(("%s(name=%d)\n", __FUNCTION__, name));
4852 
4853 	VG_CLEAR(open_arg);
4854 	open_arg.name = name;
4855 	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg))
4856 		return NULL;
4857 
4858 	DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
4859 
4860 	VG_CLEAR(tiling);
4861 	tiling.handle = open_arg.handle;
4862 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4863 		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4864 		gem_close(kgem->fd, open_arg.handle);
4865 		return NULL;
4866 	}
4867 
4868 	DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode));
4869 
4870 	bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE);
4871 	if (bo == NULL) {
4872 		gem_close(kgem->fd, open_arg.handle);
4873 		return NULL;
4874 	}
4875 
4876 	bo->unique_id = kgem_get_unique_id(kgem);
4877 	bo->tiling = tiling.tiling_mode;
4878 	bo->prime = true;
4879 	bo->reusable = false;
4880 	kgem_bo_unclean(kgem, bo);
4881 
4882 	debug_alloc__bo(kgem, bo);
4883 	return bo;
4884 }
4885 
kgem_create_for_prime(struct kgem * kgem,int name,uint32_t size)4886 struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size)
4887 {
4888 #ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE
4889 	struct drm_prime_handle args;
4890 	struct drm_i915_gem_get_tiling tiling;
4891 	struct local_i915_gem_caching caching;
4892 	struct kgem_bo *bo;
4893 	off_t seek;
4894 
4895 	DBG(("%s(name=%d)\n", __FUNCTION__, name));
4896 
4897 	VG_CLEAR(args);
4898 	args.fd = name;
4899 	args.flags = 0;
4900 	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) {
4901 		DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno));
4902 		return NULL;
4903 	}
4904 
4905 	VG_CLEAR(tiling);
4906 	tiling.handle = args.handle;
4907 	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4908 		DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4909 		gem_close(kgem->fd, args.handle);
4910 		return NULL;
4911 	}
4912 
4913 	/* Query actual size, overriding specified if available */
4914 	seek = lseek(args.fd, 0, SEEK_END);
4915 	DBG(("%s: estimated size=%ld, actual=%lld\n",
4916 	     __FUNCTION__, (long)size, (long long)seek));
4917 	if (seek != -1) {
4918 		if (size > seek) {
4919 			DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek));
4920 			gem_close(kgem->fd, args.handle);
4921 			return NULL;
4922 		}
4923 		size = seek;
4924 	}
4925 
4926 	DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__,
4927 	     args.handle, tiling.tiling_mode));
4928 	bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size));
4929 	if (bo == NULL) {
4930 		gem_close(kgem->fd, args.handle);
4931 		return NULL;
4932 	}
4933 
4934 	bo->unique_id = kgem_get_unique_id(kgem);
4935 	bo->tiling = tiling.tiling_mode;
4936 	bo->reusable = false;
4937 	bo->prime = true;
4938 	bo->domain = DOMAIN_NONE;
4939 
4940 	/* is this a special bo (e.g. scanout or CPU coherent)? */
4941 
4942 	VG_CLEAR(caching);
4943 	caching.handle = args.handle;
4944 	caching.caching = kgem->has_llc;
4945 	(void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching);
4946 	DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching));
4947 	switch (caching.caching) {
4948 	case 0:
4949 		if (kgem->has_llc) {
4950 			DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4951 			     __FUNCTION__, args.handle));
4952 			bo->scanout = true;
4953 		}
4954 		break;
4955 	case 1:
4956 		if (!kgem->has_llc) {
4957 			DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n",
4958 			     __FUNCTION__, args.handle));
4959 			bo->snoop = true;
4960 			if (bo->tiling) {
4961 				DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__));
4962 				kgem_bo_free(kgem, bo);
4963 				return NULL;
4964 			}
4965 		}
4966 		break;
4967 	case 2:
4968 		DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4969 		     __FUNCTION__, args.handle));
4970 		bo->scanout = true;
4971 		break;
4972 	}
4973 
4974 	debug_alloc__bo(kgem, bo);
4975 	return bo;
4976 #else
4977 	return NULL;
4978 #endif
4979 }
4980 
kgem_bo_export_to_prime(struct kgem * kgem,struct kgem_bo * bo)4981 int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo)
4982 {
4983 #if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC)
4984 	struct drm_prime_handle args;
4985 
4986 	assert(kgem_bo_is_fenced(kgem, bo));
4987 
4988 	VG_CLEAR(args);
4989 	args.handle = bo->handle;
4990 	args.flags = O_CLOEXEC;
4991 
4992 	if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args))
4993 		return -1;
4994 
4995 	bo->reusable = false;
4996 	return args.fd;
4997 #else
4998 	return -1;
4999 #endif
5000 }
5001 
kgem_create_linear(struct kgem * kgem,int size,unsigned flags)5002 struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
5003 {
5004 	struct kgem_bo *bo;
5005 	uint32_t handle;
5006 
5007 	DBG(("%s(%d)\n", __FUNCTION__, size));
5008 	assert(size);
5009 
5010 	if (flags & CREATE_GTT_MAP && kgem->has_llc) {
5011 		flags &= ~CREATE_GTT_MAP;
5012 		flags |= CREATE_CPU_MAP;
5013 	}
5014 
5015 	size = NUM_PAGES(size);
5016 	if ((flags & CREATE_UNCACHED) == 0) {
5017 		bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
5018 		if (bo) {
5019 			assert(!bo->purged);
5020 			assert(!bo->delta);
5021 			assert(bo->domain != DOMAIN_GPU);
5022 			ASSERT_IDLE(kgem, bo->handle);
5023 			bo->refcnt = 1;
5024 			return bo;
5025 		}
5026 
5027 		if (flags & CREATE_CACHED)
5028 			return NULL;
5029 	}
5030 
5031 	handle = gem_create(kgem->fd, size);
5032 	if (handle == 0)
5033 		return NULL;
5034 
5035 	DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
5036 	bo = __kgem_bo_alloc(handle, size);
5037 	if (bo == NULL) {
5038 		gem_close(kgem->fd, handle);
5039 		return NULL;
5040 	}
5041 
5042 	debug_alloc__bo(kgem, bo);
5043 	return bo;
5044 }
5045 
kgem_choose_tiling(struct kgem * kgem,int tiling,int width,int height,int bpp)5046 int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp)
5047 {
5048 	if (DBG_NO_TILING)
5049 		return tiling < 0 ? tiling : I915_TILING_NONE;
5050 
5051 	if (kgem->gen < 040) {
5052 		if (tiling && width * bpp > 8192 * 8) {
5053 			DBG(("%s: pitch too large for tliing [%d]\n",
5054 			     __FUNCTION__, width*bpp/8));
5055 			tiling = I915_TILING_NONE;
5056 			goto done;
5057 		}
5058 	} else {
5059 		if (width*bpp > (MAXSHORT-512) * 8) {
5060 			if (tiling > 0)
5061 				tiling = -tiling;
5062 			else if (tiling == 0)
5063 				tiling = -I915_TILING_X;
5064 			DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
5065 			     __FUNCTION__, width*bpp/8, tiling));
5066 		} else if (tiling && (width|height) > 8192) {
5067 			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
5068 			     __FUNCTION__, width, height));
5069 			tiling = -I915_TILING_X;
5070 		}
5071 
5072 		/* fences limited to 128k (256k on ivb) */
5073 		assert(width * bpp <= 128 * 1024 * 8);
5074 	}
5075 
5076 	if (tiling < 0)
5077 		return tiling;
5078 
5079 	if (tiling == I915_TILING_Y && !kgem->can_render_y)
5080 		tiling = I915_TILING_X;
5081 
5082 	if (tiling && (height == 1 || width == 1)) {
5083 		DBG(("%s: disabling tiling [%dx%d] for single row/col\n",
5084 		     __FUNCTION__,width, height));
5085 		tiling = I915_TILING_NONE;
5086 		goto done;
5087 	}
5088 	if (tiling == I915_TILING_Y && height <= 16) {
5089 		DBG(("%s: too short [%d] for TILING_Y\n",
5090 		     __FUNCTION__,height));
5091 		tiling = I915_TILING_X;
5092 	}
5093 	if (tiling && width * bpp > 8 * (4096 - 64)) {
5094 		DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
5095 		     __FUNCTION__,
5096 		     width, height, width*bpp/8,
5097 		     tiling));
5098 		return -tiling;
5099 	}
5100 	if (tiling == I915_TILING_X && height < 4) {
5101 		DBG(("%s: too short [%d] for TILING_X\n",
5102 		     __FUNCTION__, height));
5103 		tiling = I915_TILING_NONE;
5104 		goto done;
5105 	}
5106 
5107 	if (tiling == I915_TILING_X && width * bpp <= 8*512) {
5108 		DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
5109 		     __FUNCTION__, width, bpp));
5110 		tiling = I915_TILING_NONE;
5111 		goto done;
5112 	}
5113 	if (tiling == I915_TILING_Y && width * bpp < 8*128) {
5114 		DBG(("%s: too thin [%d] for TILING_Y\n",
5115 		     __FUNCTION__, width));
5116 		tiling = I915_TILING_NONE;
5117 		goto done;
5118 	}
5119 
5120 	if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
5121 		DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__,
5122 		     ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
5123 		     tiling == I915_TILING_X ? 'X' : 'Y'));
5124 		tiling = I915_TILING_NONE;
5125 		goto done;
5126 	}
5127 
5128 	if (tiling && width * bpp >= 8 * 4096 / 2) {
5129 		DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
5130 		     __FUNCTION__,
5131 		     width, height, width*bpp/8,
5132 		     tiling));
5133 		return -tiling;
5134 	}
5135 
5136 done:
5137 	DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
5138 	return tiling;
5139 }
5140 
bits_per_pixel(int depth)5141 static int bits_per_pixel(int depth)
5142 {
5143 	switch (depth) {
5144 	case 8: return 8;
5145 	case 15:
5146 	case 16: return 16;
5147 	case 24:
5148 	case 30:
5149 	case 32: return 32;
5150 	default: return 0;
5151 	}
5152 }
5153 
kgem_can_create_2d(struct kgem * kgem,int width,int height,int depth)5154 unsigned kgem_can_create_2d(struct kgem *kgem,
5155 			    int width, int height, int depth)
5156 {
5157 	uint32_t pitch, size;
5158 	unsigned flags = 0;
5159 	int tiling;
5160 	int bpp;
5161 
5162 	DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth));
5163 
5164 	bpp = bits_per_pixel(depth);
5165 	if (bpp == 0) {
5166 		DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth));
5167 		return 0;
5168 	}
5169 
5170 	if (width > MAXSHORT || height > MAXSHORT) {
5171 		DBG(("%s: unhandled size %dx%d\n",
5172 		     __FUNCTION__, width, height));
5173 		return 0;
5174 	}
5175 
5176 	size = kgem_surface_size(kgem, false, 0,
5177 				 width, height, bpp,
5178 				 I915_TILING_NONE, &pitch);
5179 	DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
5180 	if (size > 0) {
5181 		if (size <= kgem->max_cpu_size)
5182 			flags |= KGEM_CAN_CREATE_CPU;
5183 		if (size > 4096 && size <= kgem->max_gpu_size)
5184 			flags |= KGEM_CAN_CREATE_GPU;
5185 		if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
5186 			flags |= KGEM_CAN_CREATE_GTT;
5187 		if (size > kgem->large_object_size)
5188 			flags |= KGEM_CAN_CREATE_LARGE;
5189 		if (size > kgem->max_object_size) {
5190 			DBG(("%s: too large (untiled) %d > %d\n",
5191 			     __FUNCTION__, size, kgem->max_object_size));
5192 			return 0;
5193 		}
5194 	}
5195 
5196 	tiling = kgem_choose_tiling(kgem, I915_TILING_X,
5197 				    width, height, bpp);
5198 	if (tiling != I915_TILING_NONE) {
5199 		size = kgem_surface_size(kgem, false, 0,
5200 					 width, height, bpp, tiling,
5201 					 &pitch);
5202 		DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
5203 		if (size > 0 && size <= kgem->max_gpu_size)
5204 			flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
5205 		if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4)
5206 			flags |= KGEM_CAN_CREATE_GTT;
5207 		if (size > PAGE_SIZE*kgem->aperture_mappable/4)
5208 			flags &= ~KGEM_CAN_CREATE_GTT;
5209 		if (size > kgem->large_object_size)
5210 			flags |= KGEM_CAN_CREATE_LARGE;
5211 		if (size > kgem->max_object_size) {
5212 			DBG(("%s: too large (tiled) %d > %d\n",
5213 			     __FUNCTION__, size, kgem->max_object_size));
5214 			return 0;
5215 		}
5216 		if (kgem->gen < 040) {
5217 			int fence_size = 1024 * 1024;
5218 			while (fence_size < size)
5219 				fence_size <<= 1;
5220 			if (fence_size > kgem->max_gpu_size)
5221 				flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
5222 			if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4)
5223 				flags &= ~KGEM_CAN_CREATE_GTT;
5224 		}
5225 	}
5226 
5227 	return flags;
5228 }
5229 
kgem_bo_fenced_size(struct kgem * kgem,struct kgem_bo * bo)5230 inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
5231 {
5232 	unsigned int size;
5233 
5234 	assert(bo->tiling);
5235 	assert_tiling(kgem, bo);
5236 	assert(kgem->gen < 040);
5237 
5238 	if (kgem->gen < 030)
5239 		size = 512 * 1024 / PAGE_SIZE;
5240 	else
5241 		size = 1024 * 1024 / PAGE_SIZE;
5242 	while (size < num_pages(bo))
5243 		size <<= 1;
5244 
5245 	return size;
5246 }
5247 
5248 static struct kgem_bo *
__kgem_bo_create_as_display(struct kgem * kgem,int size,int tiling,int pitch)5249 __kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch)
5250 {
5251 	struct local_i915_gem_create2 args;
5252 	struct kgem_bo *bo;
5253 
5254 	if (!kgem->has_create2)
5255 		return NULL;
5256 
5257 	memset(&args, 0, sizeof(args));
5258 	args.size = size * PAGE_SIZE;
5259 	args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN;
5260 	args.caching = DISPLAY;
5261 	args.tiling_mode = tiling;
5262 	args.stride = pitch;
5263 
5264 	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) {
5265 		args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM;
5266 		if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args))
5267 			return NULL;
5268 	}
5269 
5270 	bo = __kgem_bo_alloc(args.handle, size);
5271 	if (bo == NULL) {
5272 		gem_close(kgem->fd, args.handle);
5273 		return NULL;
5274 	}
5275 
5276 	bo->unique_id = kgem_get_unique_id(kgem);
5277 	bo->tiling = tiling;
5278 	bo->pitch = pitch;
5279 	if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) {
5280 		bo->purged = true; /* for asserts against CPU access */
5281 	}
5282 	bo->reusable = false; /* so that unclaimed scanouts are freed */
5283 	bo->domain = DOMAIN_NONE;
5284 
5285 	if (__kgem_busy(kgem, bo->handle)) {
5286 		assert(bo->exec == NULL);
5287 		list_add(&bo->request, &kgem->flushing);
5288 		bo->rq = (void *)kgem;
5289 		kgem->need_retire = true;
5290 	}
5291 
5292 	assert_tiling(kgem, bo);
5293 	debug_alloc__bo(kgem, bo);
5294 
5295 	return bo;
5296 }
5297 
__kgem_bo_make_scanout(struct kgem * kgem,struct kgem_bo * bo,int width,int height)5298 static void __kgem_bo_make_scanout(struct kgem *kgem,
5299 				   struct kgem_bo *bo,
5300 				   int width, int height)
5301 {
5302 	ScrnInfoPtr scrn = __to_sna(kgem)->scrn;
5303 	struct drm_mode_fb_cmd arg;
5304 
5305 	assert(bo->proxy == NULL);
5306 
5307 	if (!scrn->vtSema)
5308 		return;
5309 
5310 	DBG(("%s: create fb %dx%d@%d/%d\n",
5311 	     __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel));
5312 
5313 	VG_CLEAR(arg);
5314 	arg.width = width;
5315 	arg.height = height;
5316 	arg.pitch = bo->pitch;
5317 	arg.bpp = scrn->bitsPerPixel;
5318 	arg.depth = scrn->depth;
5319 	arg.handle = bo->handle;
5320 
5321 	/* First move the scanout out of cached memory */
5322 	if (kgem->has_llc) {
5323 		if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) &&
5324 		    !gem_set_caching(kgem->fd, bo->handle, UNCACHED))
5325 			return;
5326 	}
5327 
5328 	bo->scanout = true;
5329 
5330 	/* Then pre-emptively move the object into the mappable
5331 	 * portion to avoid rebinding later when busy.
5332 	 */
5333 	if (bo->map__gtt == NULL)
5334 		bo->map__gtt = __kgem_bo_map__gtt(kgem, bo);
5335 	if (bo->map__gtt) {
5336 		if (sigtrap_get() == 0) {
5337 			*(uint32_t *)bo->map__gtt = 0;
5338 			sigtrap_put();
5339 		}
5340 		bo->domain = DOMAIN_GTT;
5341 	}
5342 
5343 	if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) {
5344 		DBG(("%s: attached fb=%d to handle=%d\n",
5345 		     __FUNCTION__, arg.fb_id, arg.handle));
5346 		bo->delta = arg.fb_id;
5347 	}
5348 }
5349 
tiling_changed(struct kgem_bo * bo,int tiling,int pitch)5350 static bool tiling_changed(struct kgem_bo *bo, int tiling, int pitch)
5351 {
5352 	if (tiling != bo->tiling)
5353 		return true;
5354 
5355 	return tiling != I915_TILING_NONE && pitch != bo->pitch;
5356 }
5357 
set_gpu_tiling(struct kgem * kgem,struct kgem_bo * bo,int tiling,int pitch)5358 static void set_gpu_tiling(struct kgem *kgem,
5359 			   struct kgem_bo *bo,
5360 			   int tiling, int pitch)
5361 {
5362 	DBG(("%s: handle=%d, tiling=%d, pitch=%d\n",
5363 	     __FUNCTION__, bo->handle, tiling, pitch));
5364 
5365 	if (tiling_changed(bo, tiling, pitch) && bo->map__gtt) {
5366 		if (!list_is_empty(&bo->vma)) {
5367 			list_del(&bo->vma);
5368 			kgem->vma[0].count--;
5369 		}
5370 		munmap(bo->map__gtt, bytes(bo));
5371 		bo->map__gtt = NULL;
5372 	}
5373 
5374 	bo->tiling = tiling;
5375 	bo->pitch = pitch;
5376 }
5377 
kgem_bo_is_fenced(struct kgem * kgem,struct kgem_bo * bo)5378 bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo)
5379 {
5380 	struct drm_i915_gem_get_tiling tiling;
5381 
5382 	assert(kgem);
5383 	assert(bo);
5384 
5385 	VG_CLEAR(tiling);
5386 	tiling.handle = bo->handle;
5387 	tiling.tiling_mode = bo->tiling;
5388 	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
5389 	return tiling.tiling_mode == bo->tiling; /* assume pitch is fine! */
5390 }
5391 
kgem_create_2d(struct kgem * kgem,int width,int height,int bpp,int tiling,uint32_t flags)5392 struct kgem_bo *kgem_create_2d(struct kgem *kgem,
5393 			       int width,
5394 			       int height,
5395 			       int bpp,
5396 			       int tiling,
5397 			       uint32_t flags)
5398 {
5399 	struct list *cache;
5400 	struct kgem_bo *bo;
5401 	uint32_t pitch, tiled_height, size;
5402 	uint32_t handle;
5403 	int i, bucket, retry;
5404 	bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
5405 
5406 	if (tiling < 0)
5407 		exact = true, tiling = -tiling;
5408 
5409 	DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
5410 	     width, height, bpp, tiling, exact,
5411 	     !!(flags & CREATE_INACTIVE),
5412 	     !!(flags & CREATE_CPU_MAP),
5413 	     !!(flags & CREATE_GTT_MAP),
5414 	     !!(flags & CREATE_SCANOUT),
5415 	     !!(flags & CREATE_PRIME),
5416 	     !!(flags & CREATE_TEMPORARY)));
5417 
5418 	size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
5419 				 width, height, bpp, tiling, &pitch);
5420 	if (size == 0) {
5421 		DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__));
5422 		return NULL;
5423 	}
5424 
5425 	size /= PAGE_SIZE;
5426 	bucket = cache_bucket(size);
5427 
5428 	if (flags & CREATE_SCANOUT) {
5429 		struct kgem_bo *last = NULL;
5430 
5431 		list_for_each_entry_reverse(bo, &kgem->scanout, list) {
5432 			assert(bo->scanout);
5433 			assert(!bo->flush);
5434 			assert(!bo->refcnt);
5435 			assert_tiling(kgem, bo);
5436 
5437 			if (size > num_pages(bo) || num_pages(bo) > 2*size)
5438 				continue;
5439 
5440 			if (bo->tiling != tiling || bo->pitch != pitch)
5441 				/* No tiling/pitch without recreating fb */
5442 				continue;
5443 
5444 			if (bo->delta && !check_scanout_size(kgem, bo, width, height))
5445 				kgem_bo_rmfb(kgem, bo);
5446 
5447 			if (flags & CREATE_INACTIVE && bo->rq) {
5448 				last = bo;
5449 				continue;
5450 			}
5451 
5452 			list_del(&bo->list);
5453 
5454 			bo->unique_id = kgem_get_unique_id(kgem);
5455 			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5456 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5457 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5458 			assert_tiling(kgem, bo);
5459 			bo->refcnt = 1;
5460 			return bo;
5461 		}
5462 
5463 		if (last) {
5464 			list_del(&last->list);
5465 
5466 			last->unique_id = kgem_get_unique_id(kgem);
5467 			DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5468 			     last->pitch, last->tiling, last->handle, last->unique_id));
5469 			assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
5470 			assert_tiling(kgem, last);
5471 			last->refcnt = 1;
5472 			return last;
5473 		}
5474 
5475 		if (__to_sna(kgem)->scrn->vtSema) {
5476 			ScrnInfoPtr scrn = __to_sna(kgem)->scrn;
5477 
5478 			list_for_each_entry_reverse(bo, &kgem->scanout, list) {
5479 				struct drm_mode_fb_cmd arg;
5480 
5481 				assert(bo->scanout);
5482 				assert(!bo->refcnt);
5483 
5484 				if (size > num_pages(bo) || num_pages(bo) > 2*size)
5485 					continue;
5486 
5487 				if (flags & CREATE_INACTIVE && bo->rq)
5488 					continue;
5489 
5490 				list_del(&bo->list);
5491 
5492 				if (bo->tiling != tiling || bo->pitch != pitch) {
5493 					if (bo->delta) {
5494 						kgem_bo_rmfb(kgem, bo);
5495 						bo->delta = 0;
5496 					}
5497 
5498 					if (!kgem_set_tiling(kgem, bo,
5499 							     tiling, pitch)) {
5500 						bo->scanout = false;
5501 						__kgem_bo_destroy(kgem, bo);
5502 						break;
5503 					}
5504 				}
5505 
5506 				VG_CLEAR(arg);
5507 				arg.width = width;
5508 				arg.height = height;
5509 				arg.pitch = bo->pitch;
5510 				arg.bpp = scrn->bitsPerPixel;
5511 				arg.depth = scrn->depth;
5512 				arg.handle = bo->handle;
5513 
5514 				if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) {
5515 					bo->scanout = false;
5516 					__kgem_bo_destroy(kgem, bo);
5517 					break;
5518 				}
5519 
5520 				bo->delta = arg.fb_id;
5521 				bo->unique_id = kgem_get_unique_id(kgem);
5522 
5523 				DBG(("  2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5524 				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5525 				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5526 				assert_tiling(kgem, bo);
5527 				bo->refcnt = 1;
5528 				return bo;
5529 			}
5530 		}
5531 
5532 		if (flags & CREATE_CACHED)
5533 			return NULL;
5534 
5535 		bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch);
5536 		if (bo)
5537 			return bo;
5538 
5539 		flags |= CREATE_INACTIVE;
5540 	}
5541 
5542 	if (bucket >= NUM_CACHE_BUCKETS) {
5543 		DBG(("%s: large bo num pages=%d, bucket=%d\n",
5544 		     __FUNCTION__, size, bucket));
5545 
5546 		if (flags & CREATE_INACTIVE)
5547 			goto large_inactive;
5548 
5549 		tiled_height = kgem_aligned_height(kgem, height, tiling);
5550 
5551 		list_for_each_entry(bo, &kgem->large, list) {
5552 			assert(!bo->purged);
5553 			assert(!bo->scanout);
5554 			assert(bo->refcnt == 0);
5555 			assert(bo->reusable);
5556 			assert_tiling(kgem, bo);
5557 
5558 			if (kgem->gen < 040) {
5559 				if (bo->pitch < pitch) {
5560 					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5561 					     bo->tiling, tiling,
5562 					     bo->pitch, pitch));
5563 					continue;
5564 				}
5565 
5566 				if (bo->pitch * tiled_height > bytes(bo))
5567 					continue;
5568 			} else {
5569 				if (num_pages(bo) < size)
5570 					continue;
5571 
5572 				if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5573 					if (exact) {
5574 						DBG(("tiled and pitch not exact: tiling=%d, (want %d), pitch=%d, need %d\n",
5575 						     bo->tiling, tiling,
5576 						     bo->pitch, pitch));
5577 						continue;
5578 					}
5579 
5580 					set_gpu_tiling(kgem, bo, tiling, pitch);
5581 				}
5582 			}
5583 
5584 			kgem_bo_remove_from_active(kgem, bo);
5585 
5586 			bo->unique_id = kgem_get_unique_id(kgem);
5587 			bo->delta = 0;
5588 			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5589 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5590 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5591 			assert_tiling(kgem, bo);
5592 			bo->refcnt = 1;
5593 			return bo;
5594 		}
5595 
5596 large_inactive:
5597 		__kgem_throttle_retire(kgem, flags);
5598 		list_for_each_entry(bo, &kgem->large_inactive, list) {
5599 			assert(bo->refcnt == 0);
5600 			assert(bo->reusable);
5601 			assert(!bo->scanout);
5602 			assert_tiling(kgem, bo);
5603 
5604 			if (size > num_pages(bo))
5605 				continue;
5606 
5607 			if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5608 				if (kgem->gen >= 040 && !exact)
5609 					set_gpu_tiling(kgem, bo, tiling, pitch);
5610 				else
5611 					continue;
5612 			}
5613 
5614 			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5615 				kgem_bo_free(kgem, bo);
5616 				break;
5617 			}
5618 
5619 			list_del(&bo->list);
5620 
5621 			assert(bo->domain != DOMAIN_GPU);
5622 			bo->unique_id = kgem_get_unique_id(kgem);
5623 			bo->delta = 0;
5624 			DBG(("  1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5625 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5626 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5627 			assert_tiling(kgem, bo);
5628 			bo->refcnt = 1;
5629 
5630 			if (flags & CREATE_SCANOUT)
5631 				__kgem_bo_make_scanout(kgem, bo, width, height);
5632 
5633 			return bo;
5634 		}
5635 
5636 		goto create;
5637 	}
5638 
5639 	if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
5640 		int for_cpu = !!(flags & CREATE_CPU_MAP);
5641 		if (kgem->has_llc && tiling == I915_TILING_NONE)
5642 			for_cpu = 1;
5643 		/* We presume that we will need to upload to this bo,
5644 		 * and so would prefer to have an active VMA.
5645 		 */
5646 		cache = &kgem->vma[for_cpu].inactive[bucket];
5647 		do {
5648 			list_for_each_entry(bo, cache, vma) {
5649 				assert(bucket(bo) == bucket);
5650 				assert(bo->refcnt == 0);
5651 				assert(!bo->scanout);
5652 				assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
5653 				assert(bo->rq == NULL);
5654 				assert(bo->exec == NULL);
5655 				assert(list_is_empty(&bo->request));
5656 				assert(bo->flush == false);
5657 				assert_tiling(kgem, bo);
5658 
5659 				if (size > num_pages(bo)) {
5660 					DBG(("inactive too small: %d < %d\n",
5661 					     num_pages(bo), size));
5662 					continue;
5663 				}
5664 
5665 				if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU)
5666 					continue;
5667 
5668 				if (bo->tiling != tiling ||
5669 				    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5670 					if (bo->map__gtt ||
5671 					    !kgem_set_tiling(kgem, bo,
5672 							     tiling, pitch)) {
5673 						DBG(("inactive GTT vma with wrong tiling: %d < %d\n",
5674 						     bo->tiling, tiling));
5675 						kgem_bo_free(kgem, bo);
5676 						break;
5677 					}
5678 				}
5679 
5680 				if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5681 					kgem_bo_free(kgem, bo);
5682 					break;
5683 				}
5684 
5685 				if (tiling == I915_TILING_NONE)
5686 					bo->pitch = pitch;
5687 
5688 				assert(bo->tiling == tiling);
5689 				assert(bo->pitch >= pitch);
5690 				bo->delta = 0;
5691 				bo->unique_id = kgem_get_unique_id(kgem);
5692 
5693 				kgem_bo_remove_from_inactive(kgem, bo);
5694 				assert(list_is_empty(&bo->list));
5695 				assert(list_is_empty(&bo->vma));
5696 
5697 				DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5698 				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5699 				assert(bo->reusable);
5700 				assert(bo->domain != DOMAIN_GPU);
5701 				ASSERT_IDLE(kgem, bo->handle);
5702 				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5703 				assert_tiling(kgem, bo);
5704 				bo->refcnt = 1;
5705 				return bo;
5706 			}
5707 		} while (!list_is_empty(cache) &&
5708 			 __kgem_throttle_retire(kgem, flags));
5709 
5710 		if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
5711 			if (list_is_empty(&kgem->active[bucket][tiling]) &&
5712 			    list_is_empty(&kgem->inactive[bucket]))
5713 				flags &= ~CREATE_CACHED;
5714 
5715 			goto create;
5716 		}
5717 	}
5718 
5719 	if (flags & CREATE_INACTIVE)
5720 		goto skip_active_search;
5721 
5722 	/* Best active match */
5723 	retry = NUM_CACHE_BUCKETS - bucket;
5724 	if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
5725 		retry = 3;
5726 search_active:
5727 	assert(bucket < NUM_CACHE_BUCKETS);
5728 	cache = &kgem->active[bucket][tiling];
5729 	if (tiling) {
5730 		tiled_height = kgem_aligned_height(kgem, height, tiling);
5731 		list_for_each_entry(bo, cache, list) {
5732 			assert(!bo->purged);
5733 			assert(bo->refcnt == 0);
5734 			assert(bucket(bo) == bucket);
5735 			assert(bo->reusable);
5736 			assert(bo->tiling == tiling);
5737 			assert(bo->flush == false);
5738 			assert(!bo->scanout);
5739 			assert_tiling(kgem, bo);
5740 
5741 			if (kgem->gen < 040) {
5742 				if (bo->pitch < pitch) {
5743 					DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5744 					     bo->tiling, tiling,
5745 					     bo->pitch, pitch));
5746 					continue;
5747 				}
5748 
5749 				if (bo->pitch * tiled_height > bytes(bo))
5750 					continue;
5751 			} else {
5752 				if (num_pages(bo) < size)
5753 					continue;
5754 
5755 				if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5756 					if (exact) {
5757 						DBG(("tiled and pitch not exact: tiling=%d, (want %d), pitch=%d, need %d\n",
5758 						     bo->tiling, tiling,
5759 						     bo->pitch, pitch));
5760 						continue;
5761 					}
5762 
5763 					set_gpu_tiling(kgem, bo, tiling, pitch);
5764 				}
5765 			}
5766 			assert(bo->tiling == tiling);
5767 			assert(bo->pitch >= pitch);
5768 
5769 			kgem_bo_remove_from_active(kgem, bo);
5770 
5771 			bo->unique_id = kgem_get_unique_id(kgem);
5772 			bo->delta = 0;
5773 			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5774 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5775 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5776 			assert_tiling(kgem, bo);
5777 			bo->refcnt = 1;
5778 			return bo;
5779 		}
5780 	} else {
5781 		list_for_each_entry(bo, cache, list) {
5782 			assert(bucket(bo) == bucket);
5783 			assert(!bo->purged);
5784 			assert(bo->refcnt == 0);
5785 			assert(bo->reusable);
5786 			assert(!bo->scanout);
5787 			assert(bo->tiling == tiling);
5788 			assert(bo->flush == false);
5789 			assert_tiling(kgem, bo);
5790 
5791 			if (num_pages(bo) < size)
5792 				continue;
5793 
5794 			kgem_bo_remove_from_active(kgem, bo);
5795 
5796 			bo->pitch = pitch;
5797 			bo->unique_id = kgem_get_unique_id(kgem);
5798 			bo->delta = 0;
5799 			DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5800 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5801 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5802 			assert_tiling(kgem, bo);
5803 			bo->refcnt = 1;
5804 			return bo;
5805 		}
5806 	}
5807 
5808 	if (kgem->gen >= 040) {
5809 		for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) {
5810 			cache = &kgem->active[bucket][i];
5811 			list_for_each_entry(bo, cache, list) {
5812 				assert(!bo->purged);
5813 				assert(bo->refcnt == 0);
5814 				assert(bo->reusable);
5815 				assert(!bo->scanout);
5816 				assert(bo->flush == false);
5817 				assert_tiling(kgem, bo);
5818 
5819 				if (num_pages(bo) < size)
5820 					continue;
5821 
5822 				if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5823 					if (exact || kgem->gen < 040)
5824 						continue;
5825 
5826 					set_gpu_tiling(kgem, bo, tiling, pitch);
5827 				}
5828 				assert(bo->tiling == tiling);
5829 				assert(bo->pitch >= pitch);
5830 
5831 				kgem_bo_remove_from_active(kgem, bo);
5832 
5833 				bo->unique_id = kgem_get_unique_id(kgem);
5834 				bo->delta = 0;
5835 				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5836 				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5837 				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5838 				assert_tiling(kgem, bo);
5839 				bo->refcnt = 1;
5840 				return bo;
5841 			}
5842 		}
5843 	} else if (!exact) { /* allow an active near-miss? */
5844 		for (i = tiling; i >= I915_TILING_NONE; i--) {
5845 			tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
5846 							 width, height, bpp, tiling, &pitch);
5847 			cache = active(kgem, tiled_height / PAGE_SIZE, i);
5848 			tiled_height = kgem_aligned_height(kgem, height, i);
5849 			list_for_each_entry(bo, cache, list) {
5850 				assert(!bo->purged);
5851 				assert(bo->refcnt == 0);
5852 				assert(bo->reusable);
5853 				assert(!bo->scanout);
5854 				assert(bo->flush == false);
5855 				assert_tiling(kgem, bo);
5856 
5857 				if (bo->tiling) {
5858 					if (bo->pitch < pitch) {
5859 						DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5860 						     bo->tiling, tiling,
5861 						     bo->pitch, pitch));
5862 						continue;
5863 					}
5864 				} else
5865 					bo->pitch = pitch;
5866 
5867 				if (bo->pitch * tiled_height > bytes(bo))
5868 					continue;
5869 
5870 				kgem_bo_remove_from_active(kgem, bo);
5871 
5872 				bo->unique_id = kgem_get_unique_id(kgem);
5873 				bo->delta = 0;
5874 				DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5875 				     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5876 				assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5877 				assert_tiling(kgem, bo);
5878 				bo->refcnt = 1;
5879 				return bo;
5880 			}
5881 		}
5882 	}
5883 
5884 	if (--retry) {
5885 		bucket++;
5886 		goto search_active;
5887 	}
5888 
5889 skip_active_search:
5890 	bucket = cache_bucket(size);
5891 	retry = NUM_CACHE_BUCKETS - bucket;
5892 	if (retry > 3)
5893 		retry = 3;
5894 search_inactive:
5895 	/* Now just look for a close match and prefer any currently active */
5896 	assert(bucket < NUM_CACHE_BUCKETS);
5897 	cache = &kgem->inactive[bucket];
5898 	list_for_each_entry(bo, cache, list) {
5899 		assert(bucket(bo) == bucket);
5900 		assert(bo->reusable);
5901 		assert(!bo->scanout);
5902 		assert(bo->flush == false);
5903 		assert_tiling(kgem, bo);
5904 
5905 		if (size > num_pages(bo)) {
5906 			DBG(("inactive too small: %d < %d\n",
5907 			     num_pages(bo), size));
5908 			continue;
5909 		}
5910 
5911 		if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5912 			if (exact || kgem->gen < 040) {
5913 				kgem_bo_free(kgem, bo);
5914 				break;
5915 			}
5916 
5917 			set_gpu_tiling(kgem, bo, tiling, pitch);
5918 		}
5919 
5920 		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5921 			kgem_bo_free(kgem, bo);
5922 			break;
5923 		}
5924 
5925 		kgem_bo_remove_from_inactive(kgem, bo);
5926 		assert(list_is_empty(&bo->list));
5927 		assert(list_is_empty(&bo->vma));
5928 		assert(bo->tiling == tiling);
5929 		assert(bo->pitch >= pitch);
5930 
5931 		bo->delta = 0;
5932 		bo->unique_id = kgem_get_unique_id(kgem);
5933 		assert(bo->pitch);
5934 		DBG(("  from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5935 		     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5936 		assert(bo->refcnt == 0);
5937 		assert(bo->reusable);
5938 		assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
5939 		ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
5940 		assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5941 		assert_tiling(kgem, bo);
5942 		bo->refcnt = 1;
5943 
5944 		if (flags & CREATE_SCANOUT)
5945 			__kgem_bo_make_scanout(kgem, bo, width, height);
5946 
5947 		return bo;
5948 	}
5949 
5950 	if ((flags & CREATE_NO_RETIRE) == 0) {
5951 		list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) {
5952 			if (bo->exec)
5953 				break;
5954 
5955 			if (size > num_pages(bo))
5956 				continue;
5957 
5958 			if (__kgem_busy(kgem, bo->handle)) {
5959 				if (flags & CREATE_NO_THROTTLE)
5960 					goto no_retire;
5961 
5962 				do {
5963 					if (!kgem->need_throttle) {
5964 						DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle));
5965 						goto no_retire;
5966 					}
5967 
5968 					__kgem_throttle(kgem, false);
5969 				} while (__kgem_busy(kgem, bo->handle));
5970 			}
5971 
5972 			DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle));
5973 
5974 			kgem_bo_remove_from_active(kgem, bo);
5975 			__kgem_bo_clear_busy(bo);
5976 
5977 			if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5978 				if (exact || kgem->gen < 040)
5979 					goto no_retire;
5980 
5981 				set_gpu_tiling(kgem, bo, tiling, pitch);
5982 			}
5983 			assert(bo->tiling == tiling);
5984 			assert(bo->pitch >= pitch);
5985 
5986 			bo->unique_id = kgem_get_unique_id(kgem);
5987 			bo->delta = 0;
5988 			DBG(("  2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5989 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5990 			assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5991 			assert_tiling(kgem, bo);
5992 			bo->refcnt = 1;
5993 
5994 			if (flags & CREATE_SCANOUT)
5995 				__kgem_bo_make_scanout(kgem, bo, width, height);
5996 
5997 			return bo;
5998 		}
5999 no_retire:
6000 		flags |= CREATE_NO_RETIRE;
6001 	}
6002 
6003 	if (--retry) {
6004 		bucket++;
6005 		goto search_inactive;
6006 	}
6007 
6008 create:
6009 	if (flags & CREATE_CACHED) {
6010 		DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__));
6011 		return NULL;
6012 	}
6013 
6014 	if (bucket >= NUM_CACHE_BUCKETS)
6015 		size = ALIGN(size, 1024);
6016 	handle = gem_create(kgem->fd, size);
6017 	if (handle == 0) {
6018 		DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__));
6019 		return NULL;
6020 	}
6021 
6022 	bo = __kgem_bo_alloc(handle, size);
6023 	if (!bo) {
6024 		DBG(("%s: malloc failed\n", __FUNCTION__));
6025 		gem_close(kgem->fd, handle);
6026 		return NULL;
6027 	}
6028 
6029 	bo->unique_id = kgem_get_unique_id(kgem);
6030 	if (kgem_set_tiling(kgem, bo, tiling, pitch)) {
6031 		if (flags & CREATE_SCANOUT)
6032 			__kgem_bo_make_scanout(kgem, bo, width, height);
6033 	} else {
6034 		if (kgem->gen >= 040) {
6035 			assert(!kgem->can_fence);
6036 			bo->tiling = tiling;
6037 			bo->pitch = pitch;
6038 		} else {
6039 			if (flags & CREATE_EXACT) {
6040 				DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__));
6041 				gem_close(kgem->fd, handle);
6042 				free(bo);
6043 				return NULL;
6044 			}
6045 		}
6046 	}
6047 
6048 	assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
6049 	assert_tiling(kgem, bo);
6050 
6051 	debug_alloc__bo(kgem, bo);
6052 
6053 	DBG(("  new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
6054 	     bo->pitch, bo->tiling, bo->handle, bo->unique_id,
6055 	     size, num_pages(bo), bucket(bo)));
6056 	return bo;
6057 }
6058 
kgem_create_cpu_2d(struct kgem * kgem,int width,int height,int bpp,uint32_t flags)6059 struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
6060 				   int width,
6061 				   int height,
6062 				   int bpp,
6063 				   uint32_t flags)
6064 {
6065 	struct kgem_bo *bo;
6066 	int stride, size;
6067 
6068 	if (DBG_NO_CPU)
6069 		return NULL;
6070 
6071 	DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
6072 
6073 	if (kgem->has_llc) {
6074 		bo = kgem_create_2d(kgem, width, height, bpp,
6075 				    I915_TILING_NONE, flags);
6076 		if (bo == NULL)
6077 			return bo;
6078 
6079 		assert(bo->tiling == I915_TILING_NONE);
6080 		assert_tiling(kgem, bo);
6081 
6082 		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
6083 			kgem_bo_destroy(kgem, bo);
6084 			return NULL;
6085 		}
6086 
6087 		return bo;
6088 	}
6089 
6090 	assert(width > 0 && height > 0);
6091 	stride = ALIGN(width, 2) * bpp >> 3;
6092 	stride = ALIGN(stride, 4);
6093 	size = stride * ALIGN(height, 2);
6094 	assert(size >= PAGE_SIZE);
6095 
6096 	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
6097 	     __FUNCTION__, width, height, bpp, stride));
6098 
6099 	bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
6100 	if (bo) {
6101 		assert(bo->tiling == I915_TILING_NONE);
6102 		assert_tiling(kgem, bo);
6103 		assert(bo->snoop);
6104 		bo->refcnt = 1;
6105 		bo->pitch = stride;
6106 		bo->unique_id = kgem_get_unique_id(kgem);
6107 		return bo;
6108 	}
6109 
6110 	if (kgem->has_caching) {
6111 		bo = kgem_create_linear(kgem, size, flags);
6112 		if (bo == NULL)
6113 			return NULL;
6114 
6115 		assert(bo->tiling == I915_TILING_NONE);
6116 		assert_tiling(kgem, bo);
6117 
6118 		assert(!__kgem_busy(kgem, bo->handle));
6119 		if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
6120 			kgem_bo_destroy(kgem, bo);
6121 			return NULL;
6122 		}
6123 		bo->snoop = true;
6124 
6125 		if (kgem_bo_map__cpu(kgem, bo) == NULL) {
6126 			kgem_bo_destroy(kgem, bo);
6127 			return NULL;
6128 		}
6129 
6130 		bo->pitch = stride;
6131 		bo->unique_id = kgem_get_unique_id(kgem);
6132 		return bo;
6133 	}
6134 
6135 	if (kgem->has_userptr) {
6136 		void *ptr;
6137 
6138 		/* XXX */
6139 		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
6140 		if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
6141 			return NULL;
6142 
6143 		bo = kgem_create_map(kgem, ptr, size, false);
6144 		if (bo == NULL) {
6145 			free(ptr);
6146 			return NULL;
6147 		}
6148 
6149 		bo->pitch = stride;
6150 		bo->unique_id = kgem_get_unique_id(kgem);
6151 		return bo;
6152 	}
6153 
6154 	return NULL;
6155 }
6156 
_kgem_bo_destroy(struct kgem * kgem,struct kgem_bo * bo)6157 void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
6158 {
6159 	DBG(("%s: handle=%d, proxy? %d\n",
6160 	     __FUNCTION__, bo->handle, bo->proxy != NULL));
6161 
6162 	if (bo->proxy) {
6163 		assert(!bo->reusable);
6164 		kgem_bo_binding_free(kgem, bo);
6165 
6166 		assert(list_is_empty(&bo->list));
6167 		_list_del(&bo->vma);
6168 		_list_del(&bo->request);
6169 
6170 		if (bo->io && bo->domain == DOMAIN_CPU)
6171 			_kgem_bo_delete_buffer(kgem, bo);
6172 
6173 		kgem_bo_unref(kgem, bo->proxy);
6174 
6175 		if (DBG_NO_MALLOC_CACHE) {
6176 			free(bo);
6177 		} else {
6178 			*(struct kgem_bo **)bo = __kgem_freed_bo;
6179 			__kgem_freed_bo = bo;
6180 		}
6181 	} else
6182 		__kgem_bo_destroy(kgem, bo);
6183 }
6184 
__kgem_flush(struct kgem * kgem,struct kgem_bo * bo)6185 static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
6186 {
6187 	assert(bo->rq);
6188 	assert(bo->exec == NULL);
6189 	assert(bo->needs_flush);
6190 
6191 	/* The kernel will emit a flush *and* update its own flushing lists. */
6192 	if (!__kgem_busy(kgem, bo->handle))
6193 		__kgem_bo_clear_busy(bo);
6194 
6195 	DBG(("%s: handle=%d, busy?=%d\n",
6196 	     __FUNCTION__, bo->handle, bo->rq != NULL));
6197 }
6198 
kgem_scanout_flush(struct kgem * kgem,struct kgem_bo * bo)6199 void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
6200 {
6201 	if (!bo->needs_flush && !bo->gtt_dirty)
6202 		return;
6203 
6204 	kgem_bo_submit(kgem, bo);
6205 
6206 	/* If the kernel fails to emit the flush, then it will be forced when
6207 	 * we assume direct access. And as the usual failure is EIO, we do
6208 	 * not actually care.
6209 	 */
6210 	assert(bo->exec == NULL);
6211 	if (bo->rq)
6212 		__kgem_flush(kgem, bo);
6213 
6214 	if (bo->scanout && kgem->needs_dirtyfb) {
6215 		struct drm_mode_fb_dirty_cmd cmd;
6216 		memset(&cmd, 0, sizeof(cmd));
6217 		cmd.fb_id = bo->delta;
6218 		(void)drmIoctl(kgem->fd, DRM_IOCTL_MODE_DIRTYFB, &cmd);
6219 	}
6220 
6221 	/* Whatever actually happens, we can regard the GTT write domain
6222 	 * as being flushed.
6223 	 */
6224 	__kgem_bo_clear_dirty(bo);
6225 }
6226 
nearly_idle(struct kgem * kgem)6227 inline static bool nearly_idle(struct kgem *kgem)
6228 {
6229 	int ring = kgem->ring == KGEM_BLT;
6230 
6231 	assert(ring < ARRAY_SIZE(kgem->requests));
6232 	if (list_is_singular(&kgem->requests[ring]))
6233 		return true;
6234 
6235 	return __kgem_ring_is_idle(kgem, ring);
6236 }
6237 
needs_semaphore(struct kgem * kgem,struct kgem_bo * bo)6238 inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
6239 {
6240 	if (kgem->needs_semaphore)
6241 		return false;
6242 
6243 	if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring)
6244 		return false;
6245 
6246 	kgem->needs_semaphore = true;
6247 	return true;
6248 }
6249 
needs_reservation(struct kgem * kgem,struct kgem_bo * bo)6250 inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo)
6251 {
6252 	if (kgem->needs_reservation)
6253 		return false;
6254 
6255 	if (bo->presumed_offset)
6256 		return false;
6257 
6258 	kgem->needs_reservation = true;
6259 	return nearly_idle(kgem);
6260 }
6261 
needs_batch_flush(struct kgem * kgem,struct kgem_bo * bo)6262 inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo)
6263 {
6264 	bool flush = false;
6265 
6266 	if (needs_semaphore(kgem, bo)) {
6267 		DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle));
6268 		flush = true;
6269 	}
6270 
6271 	if (needs_reservation(kgem, bo)) {
6272 		DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle));
6273 		flush = true;
6274 	}
6275 
6276 	return kgem->nreloc ? flush : false;
6277 }
6278 
aperture_check(struct kgem * kgem,unsigned num_pages)6279 static bool aperture_check(struct kgem *kgem, unsigned num_pages)
6280 {
6281 	struct drm_i915_gem_get_aperture aperture;
6282 	int reserve;
6283 
6284 	if (kgem->aperture)
6285 		return false;
6286 
6287 	/* Leave some space in case of alignment issues */
6288 	reserve = kgem->aperture_mappable / 2;
6289 	if (kgem->gen < 033 && reserve < kgem->aperture_max_fence)
6290 		reserve = kgem->aperture_max_fence;
6291 	if (!kgem->has_llc)
6292 		reserve += kgem->nexec * PAGE_SIZE * 2;
6293 
6294 	DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n",
6295 	     __FUNCTION__, num_pages, reserve, kgem->aperture_total));
6296 	num_pages += reserve;
6297 
6298 	VG_CLEAR(aperture);
6299 	aperture.aper_available_size = kgem->aperture_total;
6300 	aperture.aper_available_size *= PAGE_SIZE;
6301 	(void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
6302 
6303 	DBG(("%s: aperture required %ld bytes, available %ld bytes\n",
6304 	     __FUNCTION__,
6305 	     (long)num_pages * PAGE_SIZE,
6306 	     (long)aperture.aper_available_size));
6307 
6308 	return num_pages <= aperture.aper_available_size / PAGE_SIZE;
6309 }
6310 
kgem_flush(struct kgem * kgem,bool flush)6311 static inline bool kgem_flush(struct kgem *kgem, bool flush)
6312 {
6313 	if (unlikely(kgem->wedged))
6314 		return false;
6315 
6316 	if (kgem->nreloc == 0)
6317 		return true;
6318 
6319 	if (__to_sna(kgem)->flags & SNA_POWERSAVE)
6320 		return true;
6321 
6322 	if (kgem->flush == flush && kgem->aperture < kgem->aperture_low)
6323 		return true;
6324 
6325 	DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n",
6326 	     __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring)));
6327 	return !kgem_ring_is_idle(kgem, kgem->ring);
6328 }
6329 
kgem_check_bo(struct kgem * kgem,...)6330 bool kgem_check_bo(struct kgem *kgem, ...)
6331 {
6332 	va_list ap;
6333 	struct kgem_bo *bo;
6334 	int num_exec = 0;
6335 	int num_pages = 0;
6336 	bool flush = false;
6337 	bool busy = true;
6338 
6339 	va_start(ap, kgem);
6340 	while ((bo = va_arg(ap, struct kgem_bo *))) {
6341 		while (bo->proxy)
6342 			bo = bo->proxy;
6343 		if (bo->exec)
6344 			continue;
6345 
6346 		if (needs_batch_flush(kgem, bo)) {
6347 			va_end(ap);
6348 			return false;
6349 		}
6350 
6351 		num_pages += num_pages(bo);
6352 		num_exec++;
6353 
6354 		flush |= bo->flush;
6355 		busy &= bo->rq != NULL;
6356 	}
6357 	va_end(ap);
6358 
6359 	DBG(("%s: num_pages=+%d, num_exec=+%d\n",
6360 	     __FUNCTION__, num_pages, num_exec));
6361 
6362 	if (!num_pages)
6363 		return true;
6364 
6365 	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
6366 		DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
6367 		     kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
6368 		return false;
6369 	}
6370 
6371 	if (num_pages + kgem->aperture > kgem->aperture_high) {
6372 		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
6373 		     __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high));
6374 		return aperture_check(kgem, num_pages);
6375 	}
6376 
6377 	if (busy)
6378 		return true;
6379 
6380 	return kgem_flush(kgem, flush);
6381 }
6382 
kgem_check_bo_fenced(struct kgem * kgem,struct kgem_bo * bo)6383 bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
6384 {
6385 	assert(bo->refcnt);
6386 	while (bo->proxy)
6387 		bo = bo->proxy;
6388 	assert(bo->refcnt);
6389 
6390 	if (bo->exec) {
6391 		if (kgem->gen < 040 &&
6392 		    bo->tiling != I915_TILING_NONE &&
6393 		    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6394 			uint32_t size;
6395 
6396 			assert(bo->tiling == I915_TILING_X);
6397 
6398 			if (kgem->nfence >= kgem->fence_max)
6399 				return false;
6400 
6401 			if (kgem->aperture_fenced) {
6402 				size = 3*kgem->aperture_fenced;
6403 				if (kgem->aperture_total == kgem->aperture_mappable)
6404 					size += kgem->aperture;
6405 				if (size > kgem->aperture_fenceable &&
6406 				    kgem_ring_is_idle(kgem, kgem->ring)) {
6407 					DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6408 					return false;
6409 				}
6410 			}
6411 
6412 			size = kgem_bo_fenced_size(kgem, bo);
6413 			if (size > kgem->aperture_max_fence)
6414 				kgem->aperture_max_fence = size;
6415 			size += kgem->aperture_fenced;
6416 			if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6417 				size = 2 * kgem->aperture_max_fence;
6418 			if (kgem->aperture_total == kgem->aperture_mappable)
6419 				size += kgem->aperture;
6420 			if (size > kgem->aperture_fenceable) {
6421 				DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6422 				     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6423 				return false;
6424 			}
6425 		}
6426 
6427 		return true;
6428 	}
6429 
6430 	if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
6431 		return false;
6432 
6433 	if (needs_batch_flush(kgem, bo))
6434 		return false;
6435 
6436 	assert_tiling(kgem, bo);
6437 	if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
6438 		uint32_t size;
6439 
6440 		assert(bo->tiling == I915_TILING_X);
6441 
6442 		if (kgem->nfence >= kgem->fence_max)
6443 			return false;
6444 
6445 		if (kgem->aperture_fenced) {
6446 			size = 3*kgem->aperture_fenced;
6447 			if (kgem->aperture_total == kgem->aperture_mappable)
6448 				size += kgem->aperture;
6449 			if (size > kgem->aperture_fenceable &&
6450 			    kgem_ring_is_idle(kgem, kgem->ring)) {
6451 				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6452 				return false;
6453 			}
6454 		}
6455 
6456 		size = kgem_bo_fenced_size(kgem, bo);
6457 		if (size > kgem->aperture_max_fence)
6458 			kgem->aperture_max_fence = size;
6459 		size += kgem->aperture_fenced;
6460 		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6461 			size = 2 * kgem->aperture_max_fence;
6462 		if (kgem->aperture_total == kgem->aperture_mappable)
6463 			size += kgem->aperture;
6464 		if (size > kgem->aperture_fenceable) {
6465 			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6466 			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6467 			return false;
6468 		}
6469 	}
6470 
6471 	if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) {
6472 		DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
6473 		     __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high));
6474 		return aperture_check(kgem, num_pages(bo));
6475 	}
6476 
6477 	if (bo->rq)
6478 		return true;
6479 
6480 	return kgem_flush(kgem, bo->flush);
6481 }
6482 
kgem_check_many_bo_fenced(struct kgem * kgem,...)6483 bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
6484 {
6485 	va_list ap;
6486 	struct kgem_bo *bo;
6487 	int num_fence = 0;
6488 	int num_exec = 0;
6489 	int num_pages = 0;
6490 	int fenced_size = 0;
6491 	bool flush = false;
6492 	bool busy = true;
6493 
6494 	va_start(ap, kgem);
6495 	while ((bo = va_arg(ap, struct kgem_bo *))) {
6496 		assert(bo->refcnt);
6497 		while (bo->proxy)
6498 			bo = bo->proxy;
6499 		assert(bo->refcnt);
6500 		if (bo->exec) {
6501 			if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE)
6502 				continue;
6503 
6504 			if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6505 				fenced_size += kgem_bo_fenced_size(kgem, bo);
6506 				num_fence++;
6507 			}
6508 
6509 			continue;
6510 		}
6511 
6512 		if (needs_batch_flush(kgem, bo)) {
6513 			va_end(ap);
6514 			return false;
6515 		}
6516 
6517 		assert_tiling(kgem, bo);
6518 		num_pages += num_pages(bo);
6519 		num_exec++;
6520 		if (kgem->gen < 040 && bo->tiling) {
6521 			uint32_t size = kgem_bo_fenced_size(kgem, bo);
6522 			if (size > kgem->aperture_max_fence)
6523 				kgem->aperture_max_fence = size;
6524 			fenced_size += size;
6525 			num_fence++;
6526 		}
6527 
6528 		flush |= bo->flush;
6529 		busy &= bo->rq != NULL;
6530 	}
6531 	va_end(ap);
6532 
6533 	if (num_fence) {
6534 		uint32_t size;
6535 
6536 		if (kgem->nfence + num_fence > kgem->fence_max)
6537 			return false;
6538 
6539 		if (kgem->aperture_fenced) {
6540 			size = 3*kgem->aperture_fenced;
6541 			if (kgem->aperture_total == kgem->aperture_mappable)
6542 				size += kgem->aperture;
6543 			if (size > kgem->aperture_fenceable &&
6544 			    kgem_ring_is_idle(kgem, kgem->ring)) {
6545 				DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6546 				return false;
6547 			}
6548 		}
6549 
6550 		size = kgem->aperture_fenced;
6551 		size += fenced_size;
6552 		if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6553 			size = 2 * kgem->aperture_max_fence;
6554 		if (kgem->aperture_total == kgem->aperture_mappable)
6555 			size += kgem->aperture;
6556 		if (size > kgem->aperture_fenceable) {
6557 			DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6558 			     __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6559 			return false;
6560 		}
6561 	}
6562 
6563 	if (num_pages == 0)
6564 		return true;
6565 
6566 	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
6567 		return false;
6568 
6569 	if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) {
6570 		DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n",
6571 		     __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high));
6572 		return aperture_check(kgem, num_pages);
6573 	}
6574 
6575 	if (busy)
6576 		return true;
6577 
6578 	return kgem_flush(kgem, flush);
6579 }
6580 
__kgem_bcs_set_tiling(struct kgem * kgem,struct kgem_bo * src,struct kgem_bo * dst)6581 void __kgem_bcs_set_tiling(struct kgem *kgem,
6582 			   struct kgem_bo *src,
6583 			   struct kgem_bo *dst)
6584 {
6585 	uint32_t state, *b;
6586 
6587 	DBG(("%s: src handle=%d:tiling=%d, dst handle=%d:tiling=%d\n",
6588 	     __FUNCTION__,
6589 	     src ? src->handle : 0, src ? src->tiling : 0,
6590 	     dst ? dst->handle : 0, dst ? dst->tiling : 0));
6591 	assert(kgem->mode == KGEM_BLT);
6592 	assert(dst == NULL || kgem_bo_can_blt(kgem, dst));
6593 	assert(src == NULL || kgem_bo_can_blt(kgem, src));
6594 
6595 	state = 0;
6596 	if (dst && dst->tiling == I915_TILING_Y)
6597 		state |= BCS_DST_Y;
6598 	if (src && src->tiling == I915_TILING_Y)
6599 		state |= BCS_SRC_Y;
6600 
6601 	if (kgem->bcs_state == state)
6602 		return;
6603 
6604 	DBG(("%s: updating SWCTRL %x -> %x\n", __FUNCTION__,
6605 	     kgem->bcs_state, state));
6606 
6607 	/* Over-estimate space in case we need to re-emit the cmd packet */
6608 	if (!kgem_check_batch(kgem, 24)) {
6609 		_kgem_submit(kgem);
6610 		_kgem_set_mode(kgem, KGEM_BLT);
6611 		if (state == 0)
6612 			return;
6613 	}
6614 
6615 	b = kgem->batch + kgem->nbatch;
6616 	if (kgem->nbatch) {
6617 		*b++ = MI_FLUSH_DW;
6618 		*b++ = 0;
6619 		*b++ = 0;
6620 		*b++ = 0;
6621 	}
6622 	*b++ = MI_LOAD_REGISTER_IMM;
6623 	*b++ = BCS_SWCTRL;
6624 	*b++ = (BCS_SRC_Y | BCS_DST_Y) << 16 | state;
6625 	kgem->nbatch = b - kgem->batch;
6626 
6627 	kgem->bcs_state = state;
6628 }
6629 
kgem_add_reloc(struct kgem * kgem,uint32_t pos,struct kgem_bo * bo,uint32_t read_write_domain,uint32_t delta)6630 uint32_t kgem_add_reloc(struct kgem *kgem,
6631 			uint32_t pos,
6632 			struct kgem_bo *bo,
6633 			uint32_t read_write_domain,
6634 			uint32_t delta)
6635 {
6636 	int index;
6637 
6638 	DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
6639 	     __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
6640 
6641 	assert(kgem->gen < 0100);
6642 	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
6643 
6644 	index = kgem->nreloc++;
6645 	assert(index < ARRAY_SIZE(kgem->reloc));
6646 	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6647 	if (bo) {
6648 		assert(kgem->mode != KGEM_NONE);
6649 		assert(bo->refcnt);
6650 		while (bo->proxy) {
6651 			DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
6652 			     __FUNCTION__, bo->delta, bo->handle));
6653 			delta += bo->delta;
6654 			assert(bo->handle == bo->proxy->handle);
6655 			/* need to release the cache upon batch submit */
6656 			if (bo->exec == NULL) {
6657 				list_move_tail(&bo->request,
6658 					       &kgem->next_request->buffers);
6659 				bo->rq = MAKE_REQUEST(kgem->next_request,
6660 						      kgem->ring);
6661 				bo->exec = &_kgem_dummy_exec;
6662 				bo->domain = DOMAIN_GPU;
6663 			}
6664 
6665 			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6666 				__kgem_bo_mark_dirty(bo);
6667 
6668 			bo = bo->proxy;
6669 			assert(bo->refcnt);
6670 		}
6671 		assert(bo->refcnt);
6672 
6673 		if (bo->exec == NULL)
6674 			kgem_add_bo(kgem, bo);
6675 		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6676 		assert(RQ_RING(bo->rq) == kgem->ring);
6677 
6678 		if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
6679 			if (bo->tiling &&
6680 			    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6681 				assert(bo->tiling == I915_TILING_X);
6682 				assert(kgem->nfence < kgem->fence_max);
6683 				kgem->aperture_fenced +=
6684 					kgem_bo_fenced_size(kgem, bo);
6685 				kgem->nfence++;
6686 			}
6687 			bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
6688 		}
6689 
6690 		kgem->reloc[index].delta = delta;
6691 		kgem->reloc[index].target_handle = bo->target_handle;
6692 		kgem->reloc[index].presumed_offset = bo->presumed_offset;
6693 
6694 		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6695 			assert(!bo->snoop || kgem->can_blt_cpu);
6696 			__kgem_bo_mark_dirty(bo);
6697 		}
6698 
6699 		delta += bo->presumed_offset;
6700 	} else {
6701 		kgem->reloc[index].delta = delta;
6702 		kgem->reloc[index].target_handle = ~0U;
6703 		kgem->reloc[index].presumed_offset = 0;
6704 		if (kgem->nreloc__self < 256)
6705 			kgem->reloc__self[kgem->nreloc__self++] = index;
6706 	}
6707 	kgem->reloc[index].read_domains = read_write_domain >> 16;
6708 	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6709 
6710 	return delta;
6711 }
6712 
kgem_add_reloc64(struct kgem * kgem,uint32_t pos,struct kgem_bo * bo,uint32_t read_write_domain,uint64_t delta)6713 uint64_t kgem_add_reloc64(struct kgem *kgem,
6714 			  uint32_t pos,
6715 			  struct kgem_bo *bo,
6716 			  uint32_t read_write_domain,
6717 			  uint64_t delta)
6718 {
6719 	int index;
6720 
6721 	DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n",
6722 	     __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain));
6723 
6724 	assert(kgem->gen >= 0100);
6725 	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
6726 
6727 	index = kgem->nreloc++;
6728 	assert(index < ARRAY_SIZE(kgem->reloc));
6729 	kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6730 	if (bo) {
6731 		assert(kgem->mode != KGEM_NONE);
6732 		assert(bo->refcnt);
6733 		while (bo->proxy) {
6734 			DBG(("%s: adding proxy [delta=%ld] for handle=%d\n",
6735 			     __FUNCTION__, (long)bo->delta, bo->handle));
6736 			delta += bo->delta;
6737 			assert(bo->handle == bo->proxy->handle);
6738 			/* need to release the cache upon batch submit */
6739 			if (bo->exec == NULL) {
6740 				list_move_tail(&bo->request,
6741 					       &kgem->next_request->buffers);
6742 				bo->rq = MAKE_REQUEST(kgem->next_request,
6743 						      kgem->ring);
6744 				bo->exec = &_kgem_dummy_exec;
6745 				bo->domain = DOMAIN_GPU;
6746 			}
6747 
6748 			if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6749 				__kgem_bo_mark_dirty(bo);
6750 
6751 			bo = bo->proxy;
6752 			assert(bo->refcnt);
6753 		}
6754 		assert(bo->refcnt);
6755 
6756 		if (bo->exec == NULL)
6757 			kgem_add_bo(kgem, bo);
6758 		assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6759 		assert(RQ_RING(bo->rq) == kgem->ring);
6760 
6761 		DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n",
6762 					__FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset));
6763 		kgem->reloc[index].delta = delta;
6764 		kgem->reloc[index].target_handle = bo->target_handle;
6765 		kgem->reloc[index].presumed_offset = bo->presumed_offset;
6766 
6767 		if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6768 			assert(!bo->snoop || kgem->can_blt_cpu);
6769 			__kgem_bo_mark_dirty(bo);
6770 		}
6771 
6772 		delta += bo->presumed_offset;
6773 	} else {
6774 		DBG(("%s[%d] = (delta=%d, target handle=batch)\n",
6775 					__FUNCTION__, index, delta));
6776 		kgem->reloc[index].delta = delta;
6777 		kgem->reloc[index].target_handle = ~0U;
6778 		kgem->reloc[index].presumed_offset = 0;
6779 		if (kgem->nreloc__self < 256)
6780 			kgem->reloc__self[kgem->nreloc__self++] = index;
6781 	}
6782 	kgem->reloc[index].read_domains = read_write_domain >> 16;
6783 	kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6784 
6785 	return delta;
6786 }
6787 
kgem_trim_vma_cache(struct kgem * kgem,int type,int bucket)6788 static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
6789 {
6790 	int i, j;
6791 
6792 	DBG(("%s: type=%d, count=%d (bucket: %d)\n",
6793 	     __FUNCTION__, type, kgem->vma[type].count, bucket));
6794 	if (kgem->vma[type].count <= 0)
6795 	       return;
6796 
6797 	if (kgem->need_purge)
6798 		kgem_purge_cache(kgem);
6799 
6800 	/* vma are limited on a per-process basis to around 64k.
6801 	 * This includes all malloc arenas as well as other file
6802 	 * mappings. In order to be fair and not hog the cache,
6803 	 * and more importantly not to exhaust that limit and to
6804 	 * start failing mappings, we keep our own number of open
6805 	 * vma to within a conservative value.
6806 	 */
6807 	i = 0;
6808 	while (kgem->vma[type].count > 0) {
6809 		struct kgem_bo *bo = NULL;
6810 
6811 		for (j = 0;
6812 		     bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
6813 		     j++) {
6814 			struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
6815 			if (!list_is_empty(head))
6816 				bo = list_last_entry(head, struct kgem_bo, vma);
6817 		}
6818 		if (bo == NULL)
6819 			break;
6820 
6821 		DBG(("%s: discarding inactive %s vma cache for %d\n",
6822 		     __FUNCTION__, type ? "CPU" : "GTT", bo->handle));
6823 
6824 		assert(bo->rq == NULL);
6825 		if (type) {
6826 			VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
6827 			munmap(MAP(bo->map__cpu), bytes(bo));
6828 			bo->map__cpu = NULL;
6829 		} else {
6830 			if (bo->map__wc) {
6831 				VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
6832 				munmap(bo->map__wc, bytes(bo));
6833 				bo->map__wc = NULL;
6834 			}
6835 			if (bo->map__gtt) {
6836 				munmap(bo->map__gtt, bytes(bo));
6837 				bo->map__gtt = NULL;
6838 			}
6839 		}
6840 
6841 		list_del(&bo->vma);
6842 		kgem->vma[type].count--;
6843 	}
6844 }
6845 
__kgem_bo_map__gtt_or_wc(struct kgem * kgem,struct kgem_bo * bo)6846 static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
6847 {
6848 	void *ptr;
6849 
6850 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6851 
6852 	assert(bo->proxy == NULL);
6853 	assert(!bo->snoop);
6854 
6855 	kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
6856 
6857 	if (bo->tiling || !kgem->has_wc_mmap) {
6858 		assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
6859 		warn_unless(num_pages(bo) <= kgem->aperture_mappable / 2);
6860 
6861 		ptr = bo->map__gtt;
6862 		if (ptr == NULL)
6863 			ptr = __kgem_bo_map__gtt(kgem, bo);
6864 	} else {
6865 		ptr = bo->map__wc;
6866 		if (ptr == NULL)
6867 			ptr = __kgem_bo_map__wc(kgem, bo);
6868 	}
6869 
6870 	return ptr;
6871 }
6872 
kgem_bo_map__async(struct kgem * kgem,struct kgem_bo * bo)6873 void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
6874 {
6875 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6876 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6877 
6878 	assert(bo->proxy == NULL);
6879 	assert(list_is_empty(&bo->list));
6880 	assert_tiling(kgem, bo);
6881 	assert(!bo->purged || bo->reusable);
6882 
6883 	if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
6884 		DBG(("%s: converting request for GTT map into CPU map\n",
6885 		     __FUNCTION__));
6886 		return kgem_bo_map__cpu(kgem, bo);
6887 	}
6888 
6889 	return __kgem_bo_map__gtt_or_wc(kgem, bo);
6890 }
6891 
kgem_bo_map(struct kgem * kgem,struct kgem_bo * bo)6892 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
6893 {
6894 	void *ptr;
6895 
6896 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6897 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6898 
6899 	assert(bo->proxy == NULL);
6900 	assert(list_is_empty(&bo->list));
6901 	assert(bo->exec == NULL);
6902 	assert_tiling(kgem, bo);
6903 	assert(!bo->purged || bo->reusable);
6904 
6905 	if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
6906 	    (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
6907 		DBG(("%s: converting request for GTT map into CPU map\n",
6908 		     __FUNCTION__));
6909 		ptr = kgem_bo_map__cpu(kgem, bo);
6910 		if (ptr)
6911 			kgem_bo_sync__cpu(kgem, bo);
6912 		return ptr;
6913 	}
6914 
6915 	ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
6916 
6917 	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
6918 		struct drm_i915_gem_set_domain set_domain;
6919 
6920 		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
6921 		     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
6922 
6923 		/* XXX use PROT_READ to avoid the write flush? */
6924 
6925 		VG_CLEAR(set_domain);
6926 		set_domain.handle = bo->handle;
6927 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
6928 		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
6929 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6930 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6931 			kgem_throttle(kgem);
6932 		}
6933 		bo->needs_flush = false;
6934 		kgem_bo_retire(kgem, bo);
6935 		bo->domain = DOMAIN_GTT;
6936 		bo->gtt_dirty = true;
6937 	}
6938 
6939 	return ptr;
6940 }
6941 
kgem_bo_map__gtt(struct kgem * kgem,struct kgem_bo * bo)6942 void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
6943 {
6944 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6945 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6946 
6947 	assert(bo->proxy == NULL);
6948 	assert(bo->exec == NULL);
6949 	assert(list_is_empty(&bo->list));
6950 	assert_tiling(kgem, bo);
6951 	assert(!bo->purged || bo->reusable);
6952 
6953 	return __kgem_bo_map__gtt_or_wc(kgem, bo);
6954 }
6955 
kgem_bo_map__wc(struct kgem * kgem,struct kgem_bo * bo)6956 void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
6957 {
6958 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6959 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6960 
6961 	assert(bo->proxy == NULL);
6962 	assert(list_is_empty(&bo->list));
6963 	assert_tiling(kgem, bo);
6964 	assert(!bo->purged || bo->reusable);
6965 
6966 	if (bo->map__wc)
6967 		return bo->map__wc;
6968 	if (!kgem->has_wc_mmap)
6969 		return NULL;
6970 
6971 	kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
6972 	return __kgem_bo_map__wc(kgem, bo);
6973 }
6974 
kgem_bo_map__cpu(struct kgem * kgem,struct kgem_bo * bo)6975 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
6976 {
6977 	DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
6978 	     __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
6979 	assert(!bo->purged);
6980 	assert(list_is_empty(&bo->list));
6981 	assert(bo->proxy == NULL);
6982 	assert_tiling(kgem, bo);
6983 
6984 	if (bo->map__cpu)
6985 		return MAP(bo->map__cpu);
6986 
6987 	kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
6988 
6989 	return __kgem_bo_map__cpu(kgem, bo);
6990 }
6991 
kgem_bo_map__debug(struct kgem * kgem,struct kgem_bo * bo)6992 void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
6993 {
6994 	void *ptr;
6995 
6996 	if (bo->tiling == I915_TILING_NONE && kgem->has_llc) {
6997 		ptr = MAP(bo->map__cpu);
6998 		if (ptr == NULL)
6999 			ptr = __kgem_bo_map__cpu(kgem, bo);
7000 	} else if (bo->tiling || !kgem->has_wc_mmap) {
7001 		ptr = bo->map__gtt;
7002 		if (ptr == NULL)
7003 			ptr = __kgem_bo_map__gtt(kgem, bo);
7004 	} else {
7005 		ptr = bo->map__wc;
7006 		if (ptr == NULL)
7007 			ptr = __kgem_bo_map__wc(kgem, bo);
7008 	}
7009 
7010 	return ptr;
7011 }
7012 
7013 
kgem_bo_flink(struct kgem * kgem,struct kgem_bo * bo)7014 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
7015 {
7016 	struct drm_gem_flink flink;
7017 
7018 	assert(kgem_bo_is_fenced(kgem, bo));
7019 
7020 	VG_CLEAR(flink);
7021 	flink.handle = bo->handle;
7022 	if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink))
7023 		return 0;
7024 
7025 	DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n",
7026 	     __FUNCTION__, flink.handle, flink.name));
7027 
7028 	/* Ordinarily giving the name aware makes the buffer non-reusable.
7029 	 * However, we track the lifetime of all clients and their hold
7030 	 * on the buffer, and *presuming* they do not pass it on to a third
7031 	 * party, we track the lifetime accurately.
7032 	 */
7033 	bo->reusable = false;
7034 	kgem_bo_unclean(kgem, bo);
7035 
7036 	return flink.name;
7037 }
7038 
probe(struct kgem * kgem,uint32_t handle)7039 static bool probe(struct kgem *kgem, uint32_t handle)
7040 {
7041 	struct drm_i915_gem_set_domain arg = {
7042 		.handle = handle,
7043 		.read_domains = I915_GEM_DOMAIN_CPU,
7044 	};
7045 
7046 	return do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg) == 0;
7047 }
7048 
probe_userptr(struct kgem * kgem,void * ptr,size_t size,int read_only)7049 static uint32_t probe_userptr(struct kgem *kgem,
7050 			      void *ptr, size_t size, int read_only)
7051 {
7052 	uint32_t handle;
7053 
7054 	handle = gem_userptr(kgem->fd, ptr, size, read_only);
7055 	if (handle && !probe(kgem, handle)) {
7056 		gem_close(kgem->fd, handle);
7057 		handle = 0;
7058 	}
7059 
7060 	return handle;
7061 }
7062 
kgem_create_map(struct kgem * kgem,void * ptr,uint32_t size,bool read_only)7063 struct kgem_bo *kgem_create_map(struct kgem *kgem,
7064 				void *ptr, uint32_t size,
7065 				bool read_only)
7066 {
7067 	struct kgem_bo *bo;
7068 	uintptr_t first_page, last_page;
7069 	uint32_t handle;
7070 
7071 	assert(MAP(ptr) == ptr);
7072 
7073 	DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__,
7074 	     ptr, size, read_only, kgem->has_userptr));
7075 	if (!kgem->has_userptr)
7076 		return NULL;
7077 
7078 	first_page = (uintptr_t)ptr;
7079 	last_page = first_page + size + PAGE_SIZE - 1;
7080 
7081 	first_page &= ~(uintptr_t)(PAGE_SIZE-1);
7082 	last_page &= ~(uintptr_t)(PAGE_SIZE-1);
7083 	assert(last_page > first_page);
7084 
7085 	handle = probe_userptr(kgem,
7086 			       (void *)first_page, last_page-first_page,
7087 			       read_only);
7088 	if (handle == 0 && read_only && kgem->has_wc_mmap)
7089 		handle = probe_userptr(kgem,
7090 				       (void *)first_page, last_page-first_page,
7091 				       false);
7092 	if (handle == 0) {
7093 		DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno));
7094 		return NULL;
7095 	}
7096 
7097 	bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE);
7098 	if (bo == NULL) {
7099 		gem_close(kgem->fd, handle);
7100 		return NULL;
7101 	}
7102 
7103 	bo->unique_id = kgem_get_unique_id(kgem);
7104 	bo->snoop = !kgem->has_llc;
7105 	debug_alloc__bo(kgem, bo);
7106 
7107 	if (first_page != (uintptr_t)ptr) {
7108 		struct kgem_bo *proxy;
7109 
7110 		proxy = kgem_create_proxy(kgem, bo,
7111 					  (uintptr_t)ptr - first_page, size);
7112 		kgem_bo_destroy(kgem, bo);
7113 		if (proxy == NULL)
7114 			return NULL;
7115 
7116 		bo = proxy;
7117 	}
7118 
7119 	bo->map__cpu = MAKE_USER_MAP(ptr);
7120 
7121 	DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n",
7122 	     __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL));
7123 	return bo;
7124 }
7125 
kgem_bo_sync__cpu(struct kgem * kgem,struct kgem_bo * bo)7126 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
7127 {
7128 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7129 	assert(!bo->scanout);
7130 	assert_tiling(kgem, bo);
7131 
7132 	kgem_bo_submit(kgem, bo);
7133 
7134 	/* SHM pixmaps use proxies for subpage offsets */
7135 	assert(!bo->purged);
7136 	while (bo->proxy)
7137 		bo = bo->proxy;
7138 	assert(!bo->purged);
7139 
7140 	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
7141 		struct drm_i915_gem_set_domain set_domain;
7142 
7143 		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7144 		     __FUNCTION__, bo->handle,
7145 		     bo->needs_flush, bo->domain,
7146 		     __kgem_busy(kgem, bo->handle)));
7147 
7148 		VG_CLEAR(set_domain);
7149 		set_domain.handle = bo->handle;
7150 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
7151 		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
7152 
7153 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7154 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7155 			kgem_throttle(kgem);
7156 		}
7157 		bo->needs_flush = false;
7158 		kgem_bo_retire(kgem, bo);
7159 		bo->domain = DOMAIN_CPU;
7160 		bo->gtt_dirty = true;
7161 	}
7162 }
7163 
kgem_bo_sync__cpu_full(struct kgem * kgem,struct kgem_bo * bo,bool write)7164 void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
7165 {
7166 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7167 	assert(!bo->scanout || !write);
7168 	assert_tiling(kgem, bo);
7169 
7170 	if (write || bo->needs_flush)
7171 		kgem_bo_submit(kgem, bo);
7172 
7173 	/* SHM pixmaps use proxies for subpage offsets */
7174 	assert(!bo->purged);
7175 	assert(bo->refcnt);
7176 	while (bo->proxy)
7177 		bo = bo->proxy;
7178 	assert(bo->refcnt);
7179 	assert(!bo->purged);
7180 
7181 	if (bo->rq == NULL && (kgem->has_llc || bo->snoop) && !write)
7182 		return;
7183 
7184 	if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
7185 		struct drm_i915_gem_set_domain set_domain;
7186 
7187 		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7188 		     __FUNCTION__, bo->handle,
7189 		     bo->needs_flush, bo->domain,
7190 		     __kgem_busy(kgem, bo->handle)));
7191 
7192 		VG_CLEAR(set_domain);
7193 		set_domain.handle = bo->handle;
7194 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
7195 		set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
7196 
7197 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7198 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7199 			kgem_throttle(kgem);
7200 		}
7201 		bo->needs_flush = false;
7202 		if (write) {
7203 			kgem_bo_retire(kgem, bo);
7204 			bo->domain = DOMAIN_CPU;
7205 			bo->gtt_dirty = true;
7206 		} else {
7207 			if (bo->exec == NULL)
7208 				kgem_bo_maybe_retire(kgem, bo);
7209 			bo->domain = DOMAIN_NONE;
7210 		}
7211 	}
7212 }
7213 
kgem_bo_sync__gtt(struct kgem * kgem,struct kgem_bo * bo)7214 void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
7215 {
7216 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7217 	assert(bo->refcnt);
7218 	assert(bo->proxy == NULL);
7219 	assert_tiling(kgem, bo);
7220 	assert(!bo->snoop);
7221 
7222 	kgem_bo_submit(kgem, bo);
7223 
7224 	if (bo->domain != DOMAIN_GTT ||
7225 	    !kgem->has_coherent_mmap_gtt ||
7226 	    FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
7227 		struct drm_i915_gem_set_domain set_domain;
7228 
7229 		DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7230 		     __FUNCTION__, bo->handle,
7231 		     bo->needs_flush, bo->domain,
7232 		     __kgem_busy(kgem, bo->handle)));
7233 
7234 		VG_CLEAR(set_domain);
7235 		set_domain.handle = bo->handle;
7236 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
7237 		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
7238 
7239 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7240 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7241 			kgem_throttle(kgem);
7242 		}
7243 		bo->needs_flush = false;
7244 		kgem_bo_retire(kgem, bo);
7245 		bo->domain = DOMAIN_GTT;
7246 		bo->gtt_dirty = true;
7247 	}
7248 }
7249 
kgem_clear_dirty(struct kgem * kgem)7250 void kgem_clear_dirty(struct kgem *kgem)
7251 {
7252 	struct list * const buffers = &kgem->next_request->buffers;
7253 	struct kgem_bo *bo;
7254 
7255 	list_for_each_entry(bo, buffers, request) {
7256 		if (!bo->gpu_dirty)
7257 			break;
7258 
7259 		bo->gpu_dirty = false;
7260 	}
7261 }
7262 
kgem_create_proxy(struct kgem * kgem,struct kgem_bo * target,int offset,int length)7263 struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
7264 				  struct kgem_bo *target,
7265 				  int offset, int length)
7266 {
7267 	struct kgem_bo *bo;
7268 
7269 	DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
7270 	     __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
7271 	     offset, length, target->io));
7272 
7273 	bo = __kgem_bo_alloc(target->handle, length);
7274 	if (bo == NULL)
7275 		return NULL;
7276 
7277 	bo->unique_id = kgem_get_unique_id(kgem);
7278 	bo->reusable = false;
7279 	bo->size.bytes = length;
7280 
7281 	bo->io = target->io && target->proxy == NULL;
7282 	bo->gpu_dirty = target->gpu_dirty;
7283 	bo->tiling = target->tiling;
7284 	bo->pitch = target->pitch;
7285 	bo->flush = target->flush;
7286 	bo->snoop = target->snoop;
7287 
7288 	assert(!bo->scanout);
7289 	bo->proxy = kgem_bo_reference(target);
7290 	bo->delta = offset;
7291 
7292 	/* Proxies are only tracked for busyness on the current rq */
7293 	if (target->exec && !bo->io) {
7294 		assert(RQ(target->rq) == kgem->next_request);
7295 		list_move_tail(&bo->request, &kgem->next_request->buffers);
7296 		bo->exec = &_kgem_dummy_exec;
7297 		bo->rq = target->rq;
7298 	}
7299 
7300 	return bo;
7301 }
7302 
7303 static struct kgem_buffer *
buffer_alloc(void)7304 buffer_alloc(void)
7305 {
7306 	struct kgem_buffer *bo;
7307 
7308 	bo = malloc(sizeof(*bo));
7309 	if (bo == NULL)
7310 		return NULL;
7311 
7312 	bo->mem = NULL;
7313 	bo->need_io = false;
7314 	bo->mmapped = MMAPPED_CPU;
7315 
7316 	return bo;
7317 }
7318 
7319 static struct kgem_buffer *
buffer_alloc_with_data(int num_pages)7320 buffer_alloc_with_data(int num_pages)
7321 {
7322 	struct kgem_buffer *bo;
7323 
7324 	bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
7325 	if (bo == NULL)
7326 		return NULL;
7327 
7328 	bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
7329 	bo->mmapped = false;
7330 	return bo;
7331 }
7332 
7333 static inline bool
use_snoopable_buffer(struct kgem * kgem,uint32_t flags)7334 use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
7335 {
7336 	if ((flags & KGEM_BUFFER_WRITE) == 0)
7337 		return kgem->gen >= 030;
7338 
7339 	return true;
7340 }
7341 
7342 static void
init_buffer_from_bo(struct kgem_buffer * bo,struct kgem_bo * old)7343 init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
7344 {
7345 	DBG(("%s: reusing handle=%d for buffer\n",
7346 	     __FUNCTION__, old->handle));
7347 
7348 	assert(old->proxy == NULL);
7349 	assert(list_is_empty(&old->list));
7350 
7351 	memcpy(&bo->base, old, sizeof(*old));
7352 	if (old->rq)
7353 		list_replace(&old->request, &bo->base.request);
7354 	else
7355 		list_init(&bo->base.request);
7356 	list_replace(&old->vma, &bo->base.vma);
7357 	list_init(&bo->base.list);
7358 	free(old);
7359 
7360 	assert(bo->base.tiling == I915_TILING_NONE);
7361 
7362 	bo->base.refcnt = 1;
7363 }
7364 
7365 static struct kgem_buffer *
search_snoopable_buffer(struct kgem * kgem,unsigned alloc)7366 search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
7367 {
7368 	struct kgem_buffer *bo;
7369 	struct kgem_bo *old;
7370 
7371 	old = search_snoop_cache(kgem, alloc, 0);
7372 	if (old) {
7373 		if (!old->io) {
7374 			bo = buffer_alloc();
7375 			if (bo == NULL)
7376 				return NULL;
7377 
7378 			init_buffer_from_bo(bo, old);
7379 		} else {
7380 			bo = (struct kgem_buffer *)old;
7381 			bo->base.refcnt = 1;
7382 		}
7383 
7384 		DBG(("%s: created CPU handle=%d for buffer, size %d\n",
7385 		     __FUNCTION__, bo->base.handle, num_pages(&bo->base)));
7386 
7387 		assert(bo->base.snoop);
7388 		assert(bo->base.tiling == I915_TILING_NONE);
7389 		assert(num_pages(&bo->base) >= alloc);
7390 		assert(bo->mmapped == MMAPPED_CPU);
7391 		assert(bo->need_io == false);
7392 
7393 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7394 		if (bo->mem == NULL) {
7395 			bo->base.refcnt = 0;
7396 			kgem_bo_free(kgem, &bo->base);
7397 			bo = NULL;
7398 		}
7399 
7400 		return bo;
7401 	}
7402 
7403 	return NULL;
7404 }
7405 
7406 static struct kgem_buffer *
create_snoopable_buffer(struct kgem * kgem,unsigned alloc)7407 create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
7408 {
7409 	struct kgem_buffer *bo;
7410 	uint32_t handle;
7411 
7412 	if (kgem->has_llc) {
7413 		struct kgem_bo *old;
7414 
7415 		bo = buffer_alloc();
7416 		if (bo == NULL)
7417 			return NULL;
7418 
7419 		old = search_linear_cache(kgem, alloc,
7420 					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
7421 		if (old) {
7422 			init_buffer_from_bo(bo, old);
7423 		} else {
7424 			handle = gem_create(kgem->fd, alloc);
7425 			if (handle == 0) {
7426 				free(bo);
7427 				return NULL;
7428 			}
7429 
7430 			__kgem_bo_init(&bo->base, handle, alloc);
7431 			debug_alloc__bo(kgem, &bo->base);
7432 			DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
7433 			     __FUNCTION__, bo->base.handle, alloc));
7434 		}
7435 
7436 		assert(bo->base.refcnt == 1);
7437 		assert(bo->mmapped == MMAPPED_CPU);
7438 		assert(bo->need_io == false);
7439 
7440 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7441 		if (bo->mem != NULL)
7442 			return bo;
7443 
7444 		bo->base.refcnt = 0; /* for valgrind */
7445 		kgem_bo_free(kgem, &bo->base);
7446 	}
7447 
7448 	if (kgem->has_caching) {
7449 		struct kgem_bo *old;
7450 
7451 		bo = buffer_alloc();
7452 		if (bo == NULL)
7453 			return NULL;
7454 
7455 		old = search_linear_cache(kgem, alloc,
7456 					 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
7457 		if (old) {
7458 			init_buffer_from_bo(bo, old);
7459 		} else {
7460 			handle = gem_create(kgem->fd, alloc);
7461 			if (handle == 0) {
7462 				free(bo);
7463 				return NULL;
7464 			}
7465 
7466 			__kgem_bo_init(&bo->base, handle, alloc);
7467 			debug_alloc__bo(kgem, &bo->base);
7468 			DBG(("%s: created CPU handle=%d for buffer, size %d\n",
7469 			     __FUNCTION__, bo->base.handle, alloc));
7470 		}
7471 
7472 		assert(bo->base.refcnt == 1);
7473 		assert(bo->mmapped == MMAPPED_CPU);
7474 		assert(bo->need_io == false);
7475 		assert(!__kgem_busy(kgem, bo->base.handle));
7476 
7477 		if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
7478 			goto free_caching;
7479 
7480 		bo->base.snoop = true;
7481 
7482 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7483 		if (bo->mem == NULL)
7484 			goto free_caching;
7485 
7486 		return bo;
7487 
7488 free_caching:
7489 		bo->base.refcnt = 0; /* for valgrind */
7490 		kgem_bo_free(kgem, &bo->base);
7491 	}
7492 
7493 	if (kgem->has_userptr) {
7494 		bo = buffer_alloc();
7495 		if (bo == NULL)
7496 			return NULL;
7497 
7498 		//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
7499 		if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
7500 			free(bo);
7501 			return NULL;
7502 		}
7503 
7504 		handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
7505 		if (handle == 0) {
7506 			free(bo->mem);
7507 			free(bo);
7508 			return NULL;
7509 		}
7510 
7511 		__kgem_bo_init(&bo->base, handle, alloc);
7512 		debug_alloc__bo(kgem, &bo->base);
7513 		DBG(("%s: created snoop handle=%d for buffer\n",
7514 		     __FUNCTION__, bo->base.handle));
7515 
7516 		assert(bo->mmapped == MMAPPED_CPU);
7517 		assert(bo->need_io == false);
7518 
7519 		bo->base.refcnt = 1;
7520 		bo->base.snoop = true;
7521 		bo->base.map__cpu = MAKE_USER_MAP(bo->mem);
7522 
7523 		return bo;
7524 	}
7525 
7526 	return NULL;
7527 }
7528 
kgem_create_buffer(struct kgem * kgem,uint32_t size,uint32_t flags,void ** ret)7529 struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
7530 				   uint32_t size, uint32_t flags,
7531 				   void **ret)
7532 {
7533 	struct kgem_buffer *bo;
7534 	unsigned offset, alloc;
7535 	struct kgem_bo *old;
7536 
7537 	DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
7538 	     __FUNCTION__, size, flags,
7539 	     !!(flags & KGEM_BUFFER_WRITE),
7540 	     !!(flags & KGEM_BUFFER_INPLACE),
7541 	     !!(flags & KGEM_BUFFER_LAST)));
7542 	assert(size);
7543 	/* we should never be asked to create anything TOO large */
7544 	assert(size <= kgem->max_object_size);
7545 
7546 #if !DBG_NO_UPLOAD_CACHE
7547 	list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
7548 		assert(bo->base.io);
7549 		assert(bo->base.refcnt >= 1);
7550 
7551 		/* We can reuse any write buffer which we can fit */
7552 		if (flags == KGEM_BUFFER_LAST &&
7553 		    bo->write == KGEM_BUFFER_WRITE &&
7554 		    bo->base.refcnt == 1 &&
7555 		    bo->mmapped == MMAPPED_NONE &&
7556 		    size <= bytes(&bo->base)) {
7557 			DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
7558 			     __FUNCTION__, size, bo->used, bytes(&bo->base)));
7559 			gem_write__cachealigned(kgem->fd, bo->base.handle,
7560 						0, bo->used, bo->mem);
7561 			assert(list_is_empty(&bo->base.vma));
7562 			bo->need_io = 0;
7563 			bo->write = 0;
7564 			offset = 0;
7565 			bo->used = size;
7566 			goto done;
7567 		}
7568 
7569 		if (flags & KGEM_BUFFER_WRITE) {
7570 			if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
7571 			    (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
7572 			     !bo->base.snoop)) {
7573 				DBG(("%s: skip write %x buffer, need %x\n",
7574 				     __FUNCTION__, bo->write, flags));
7575 				continue;
7576 			}
7577 			assert(bo->mmapped || bo->need_io);
7578 		} else {
7579 			if (bo->write & KGEM_BUFFER_WRITE) {
7580 				DBG(("%s: skip write %x buffer, need %x\n",
7581 				     __FUNCTION__, bo->write, flags));
7582 				continue;
7583 			}
7584 		}
7585 
7586 		if (bo->used + size <= bytes(&bo->base)) {
7587 			DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
7588 			     __FUNCTION__, bo->used, size, bytes(&bo->base)));
7589 			offset = bo->used;
7590 			bo->used += size;
7591 			goto done;
7592 		}
7593 	}
7594 
7595 	if (flags & KGEM_BUFFER_WRITE) {
7596 		list_for_each_entry(bo, &kgem->active_buffers, base.list) {
7597 			assert(bo->base.io);
7598 			assert(bo->base.refcnt >= 1);
7599 			assert(bo->base.exec == NULL);
7600 			assert(bo->mmapped);
7601 			assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop);
7602 
7603 			if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) {
7604 				DBG(("%s: skip write %x buffer, need %x\n",
7605 				     __FUNCTION__, bo->write, flags));
7606 				continue;
7607 			}
7608 
7609 			if (bo->used + size <= bytes(&bo->base)) {
7610 				DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
7611 				     __FUNCTION__, bo->used, size, bytes(&bo->base)));
7612 				offset = bo->used;
7613 				bo->used += size;
7614 				list_move(&bo->base.list, &kgem->batch_buffers);
7615 				goto done;
7616 			}
7617 
7618 			if (bo->base.refcnt == 1 &&
7619 			    size <= bytes(&bo->base) &&
7620 			    (bo->base.rq == NULL ||
7621 			     !__kgem_busy(kgem, bo->base.handle))) {
7622 				DBG(("%s: reusing whole buffer? size=%d, total=%d\n",
7623 				     __FUNCTION__, size, bytes(&bo->base)));
7624 				__kgem_bo_clear_busy(&bo->base);
7625 				assert(list_is_empty(&bo->base.vma));
7626 
7627 				switch (bo->mmapped) {
7628 				case MMAPPED_CPU:
7629 					kgem_bo_sync__cpu(kgem, &bo->base);
7630 					break;
7631 				case MMAPPED_GTT:
7632 					kgem_bo_sync__gtt(kgem, &bo->base);
7633 					break;
7634 				}
7635 
7636 				offset = 0;
7637 				bo->used = size;
7638 				list_move(&bo->base.list, &kgem->batch_buffers);
7639 				goto done;
7640 			}
7641 		}
7642 	}
7643 #endif
7644 
7645 #if !DBG_NO_MAP_UPLOAD
7646 	/* Be a little more generous and hope to hold fewer mmappings */
7647 	alloc = ALIGN(2*size, kgem->buffer_size);
7648 	if (alloc > MAX_CACHE_SIZE)
7649 		alloc = ALIGN(size, kgem->buffer_size);
7650 	if (alloc > MAX_CACHE_SIZE)
7651 		alloc = PAGE_ALIGN(size);
7652 	assert(alloc);
7653 
7654 	alloc /= PAGE_SIZE;
7655 	if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
7656 		flags &= ~KGEM_BUFFER_INPLACE;
7657 
7658 	if (kgem->has_llc &&
7659 	    (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
7660 		bo = buffer_alloc();
7661 		if (bo == NULL)
7662 			goto skip_llc;
7663 
7664 		old = NULL;
7665 		if ((flags & KGEM_BUFFER_WRITE) == 0)
7666 			old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
7667 		if (old == NULL)
7668 			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
7669 		if (old == NULL)
7670 			old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
7671 		if (old) {
7672 			DBG(("%s: found LLC handle=%d for buffer\n",
7673 			     __FUNCTION__, old->handle));
7674 
7675 			init_buffer_from_bo(bo, old);
7676 		} else {
7677 			uint32_t handle = gem_create(kgem->fd, alloc);
7678 			if (handle == 0) {
7679 				free(bo);
7680 				goto skip_llc;
7681 			}
7682 			__kgem_bo_init(&bo->base, handle, alloc);
7683 			debug_alloc__bo(kgem, &bo->base);
7684 			DBG(("%s: created LLC handle=%d for buffer\n",
7685 			     __FUNCTION__, bo->base.handle));
7686 		}
7687 
7688 		assert(bo->mmapped);
7689 		assert(!bo->need_io);
7690 
7691 		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7692 		if (bo->mem) {
7693 			if (flags & KGEM_BUFFER_WRITE)
7694 				kgem_bo_sync__cpu(kgem, &bo->base);
7695 			flags &= ~KGEM_BUFFER_INPLACE;
7696 			goto init;
7697 		} else {
7698 			bo->base.refcnt = 0; /* for valgrind */
7699 			kgem_bo_free(kgem, &bo->base);
7700 		}
7701 	}
7702 skip_llc:
7703 
7704 	if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
7705 		/* The issue with using a GTT upload buffer is that we may
7706 		 * cause eviction-stalls in order to free up some GTT space.
7707 		 * An is-mappable? ioctl could help us detect when we are
7708 		 * about to block, or some per-page magic in the kernel.
7709 		 *
7710 		 * XXX This is especially noticeable on memory constrained
7711 		 * devices like gen2 or with relatively slow gpu like i3.
7712 		 */
7713 		DBG(("%s: searching for an inactive GTT map for upload\n",
7714 		     __FUNCTION__));
7715 		old = search_linear_cache(kgem, alloc,
7716 					  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7717 #if HAVE_I915_GEM_BUFFER_INFO
7718 		if (old) {
7719 			struct drm_i915_gem_buffer_info info;
7720 
7721 			/* An example of such a non-blocking ioctl might work */
7722 
7723 			VG_CLEAR(info);
7724 			info.handle = handle;
7725 			if (do_ioctl(kgem->fd,
7726 				     DRM_IOCTL_I915_GEM_BUFFER_INFO,
7727 				     &fino) == 0) {
7728 				old->presumed_offset = info.addr;
7729 				if ((info.flags & I915_GEM_MAPPABLE) == 0) {
7730 					kgem_bo_move_to_inactive(kgem, old);
7731 					old = NULL;
7732 				}
7733 			}
7734 		}
7735 #endif
7736 		if (old == NULL)
7737 			old = search_linear_cache(kgem, NUM_PAGES(size),
7738 						  CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7739 		if (old == NULL) {
7740 			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7741 			if (old && !kgem_bo_can_map(kgem, old)) {
7742 				_kgem_bo_destroy(kgem, old);
7743 				old = NULL;
7744 			}
7745 		}
7746 		if (old) {
7747 			DBG(("%s: reusing handle=%d for buffer\n",
7748 			     __FUNCTION__, old->handle));
7749 			assert(kgem_bo_can_map(kgem, old));
7750 			assert(!old->snoop);
7751 			assert(old->rq == NULL);
7752 
7753 			bo = buffer_alloc();
7754 			if (bo == NULL)
7755 				return NULL;
7756 
7757 			init_buffer_from_bo(bo, old);
7758 			assert(num_pages(&bo->base) >= NUM_PAGES(size));
7759 
7760 			assert(bo->mmapped);
7761 			assert(bo->base.refcnt == 1);
7762 
7763 			bo->mem = kgem_bo_map(kgem, &bo->base);
7764 			if (bo->mem) {
7765 				if (bo->mem == MAP(bo->base.map__cpu))
7766 					flags &= ~KGEM_BUFFER_INPLACE;
7767 				else
7768 					bo->mmapped = MMAPPED_GTT;
7769 				goto init;
7770 			} else {
7771 				bo->base.refcnt = 0;
7772 				kgem_bo_free(kgem, &bo->base);
7773 			}
7774 		}
7775 	}
7776 #else
7777 	flags &= ~KGEM_BUFFER_INPLACE;
7778 #endif
7779 	/* Be more parsimonious with pwrite/pread/cacheable buffers */
7780 	if ((flags & KGEM_BUFFER_INPLACE) == 0)
7781 		alloc = NUM_PAGES(size);
7782 
7783 	if (use_snoopable_buffer(kgem, flags)) {
7784 		bo = search_snoopable_buffer(kgem, alloc);
7785 		if (bo) {
7786 			if (flags & KGEM_BUFFER_WRITE)
7787 				kgem_bo_sync__cpu(kgem, &bo->base);
7788 			flags &= ~KGEM_BUFFER_INPLACE;
7789 			goto init;
7790 		}
7791 
7792 		if ((flags & KGEM_BUFFER_INPLACE) == 0) {
7793 			bo = create_snoopable_buffer(kgem, alloc);
7794 			if (bo)
7795 				goto init;
7796 		}
7797 	}
7798 
7799 	flags &= ~KGEM_BUFFER_INPLACE;
7800 
7801 	old = NULL;
7802 	if ((flags & KGEM_BUFFER_WRITE) == 0)
7803 		old = search_linear_cache(kgem, alloc, 0);
7804 	if (old == NULL)
7805 		old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7806 	if (old) {
7807 		DBG(("%s: reusing ordinary handle %d for io\n",
7808 		     __FUNCTION__, old->handle));
7809 		bo = buffer_alloc_with_data(num_pages(old));
7810 		if (bo == NULL)
7811 			return NULL;
7812 
7813 		init_buffer_from_bo(bo, old);
7814 		bo->need_io = flags & KGEM_BUFFER_WRITE;
7815 	} else {
7816 		unsigned hint;
7817 
7818 		if (use_snoopable_buffer(kgem, flags)) {
7819 			bo = create_snoopable_buffer(kgem, alloc);
7820 			if (bo)
7821 				goto init;
7822 		}
7823 
7824 		bo = buffer_alloc();
7825 		if (bo == NULL)
7826 			return NULL;
7827 
7828 		hint = CREATE_INACTIVE;
7829 		if (flags & KGEM_BUFFER_WRITE)
7830 			hint |= CREATE_CPU_MAP;
7831 		old = search_linear_cache(kgem, alloc, hint);
7832 		if (old) {
7833 			DBG(("%s: reusing handle=%d for buffer\n",
7834 			     __FUNCTION__, old->handle));
7835 
7836 			init_buffer_from_bo(bo, old);
7837 		} else {
7838 			uint32_t handle = gem_create(kgem->fd, alloc);
7839 			if (handle == 0) {
7840 				free(bo);
7841 				return NULL;
7842 			}
7843 
7844 			DBG(("%s: created handle=%d for buffer\n",
7845 			     __FUNCTION__, handle));
7846 
7847 			__kgem_bo_init(&bo->base, handle, alloc);
7848 			debug_alloc__bo(kgem, &bo->base);
7849 		}
7850 
7851 		assert(bo->mmapped);
7852 		assert(!bo->need_io);
7853 		assert(bo->base.refcnt == 1);
7854 
7855 		if (flags & KGEM_BUFFER_WRITE) {
7856 			bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7857 			if (bo->mem != NULL) {
7858 				kgem_bo_sync__cpu(kgem, &bo->base);
7859 				goto init;
7860 			}
7861 		}
7862 
7863 		DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
7864 		old = &bo->base;
7865 		bo = buffer_alloc_with_data(num_pages(old));
7866 		if (bo == NULL) {
7867 			old->refcnt= 0;
7868 			kgem_bo_free(kgem, old);
7869 			return NULL;
7870 		}
7871 
7872 		init_buffer_from_bo(bo, old);
7873 
7874 		assert(bo->mem);
7875 		assert(!bo->mmapped);
7876 		assert(bo->base.refcnt == 1);
7877 
7878 		bo->need_io = flags & KGEM_BUFFER_WRITE;
7879 	}
7880 init:
7881 	bo->base.io = true;
7882 	assert(bo->base.refcnt == 1);
7883 	assert(num_pages(&bo->base) >= NUM_PAGES(size));
7884 	assert(!bo->need_io || !bo->base.needs_flush);
7885 	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
7886 	assert(bo->mem);
7887 	assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
7888 	assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
7889 
7890 	bo->used = size;
7891 	bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
7892 	offset = 0;
7893 
7894 	assert(list_is_empty(&bo->base.list));
7895 	list_add(&bo->base.list, &kgem->batch_buffers);
7896 
7897 	DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
7898 	     __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
7899 
7900 done:
7901 	bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
7902 	assert(bo->used && bo->used <= bytes(&bo->base));
7903 	assert(bo->mem);
7904 	*ret = (char *)bo->mem + offset;
7905 	return kgem_create_proxy(kgem, &bo->base, offset, size);
7906 }
7907 
kgem_buffer_is_inplace(struct kgem_bo * _bo)7908 bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
7909 {
7910 	struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
7911 	return bo->write & KGEM_BUFFER_WRITE_INPLACE;
7912 }
7913 
kgem_create_buffer_2d(struct kgem * kgem,int width,int height,int bpp,uint32_t flags,void ** ret)7914 struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
7915 				      int width, int height, int bpp,
7916 				      uint32_t flags,
7917 				      void **ret)
7918 {
7919 	struct kgem_bo *bo;
7920 	int stride;
7921 
7922 	assert(width > 0 && height > 0);
7923 	assert(ret != NULL);
7924 	stride = ALIGN(width, 2) * bpp >> 3;
7925 	stride = ALIGN(stride, kgem->gen >= 0100 ? 32 : 4);
7926 
7927 	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
7928 	     __FUNCTION__, width, height, bpp, stride));
7929 
7930 	bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
7931 	if (bo == NULL) {
7932 		DBG(("%s: allocation failure for upload buffer\n",
7933 		     __FUNCTION__));
7934 		return NULL;
7935 	}
7936 	assert(*ret != NULL);
7937 	assert(bo->proxy != NULL);
7938 
7939 	if (height & 1) {
7940 		struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
7941 		int min;
7942 
7943 		assert(io->used);
7944 
7945 		/* Having padded this surface to ensure that accesses to
7946 		 * the last pair of rows is valid, remove the padding so
7947 		 * that it can be allocated to other pixmaps.
7948 		 */
7949 		min = bo->delta + height * stride;
7950 		min = ALIGN(min, UPLOAD_ALIGNMENT);
7951 		if (io->used != min) {
7952 			DBG(("%s: trimming buffer from %d to %d\n",
7953 			     __FUNCTION__, io->used, min));
7954 			io->used = min;
7955 		}
7956 		bo->size.bytes -= stride;
7957 	}
7958 
7959 	bo->map__cpu = *ret;
7960 	bo->pitch = stride;
7961 	bo->unique_id = kgem_get_unique_id(kgem);
7962 	return bo;
7963 }
7964 
kgem_upload_source_image(struct kgem * kgem,const void * data,const BoxRec * box,int stride,int bpp)7965 struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
7966 					 const void *data,
7967 					 const BoxRec *box,
7968 					 int stride, int bpp)
7969 {
7970 	int width  = box->x2 - box->x1;
7971 	int height = box->y2 - box->y1;
7972 	struct kgem_bo *bo;
7973 	void *dst;
7974 
7975 	if (!kgem_can_create_2d(kgem, width, height, bpp))
7976 		return NULL;
7977 
7978 	DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
7979 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
7980 
7981 	assert(data);
7982 	assert(width > 0);
7983 	assert(height > 0);
7984 	assert(stride);
7985 	assert(bpp);
7986 
7987 	bo = kgem_create_buffer_2d(kgem,
7988 				   width, height, bpp,
7989 				   KGEM_BUFFER_WRITE_INPLACE, &dst);
7990 	if (bo == NULL)
7991 		return NULL;
7992 
7993 	if (sigtrap_get()) {
7994 		kgem_bo_destroy(kgem, bo);
7995 		return NULL;
7996 	}
7997 
7998 	memcpy_blt(data, dst, bpp,
7999 		   stride, bo->pitch,
8000 		   box->x1, box->y1,
8001 		   0, 0,
8002 		   width, height);
8003 
8004 	sigtrap_put();
8005 	return bo;
8006 }
8007 
kgem_proxy_bo_attach(struct kgem_bo * bo,struct kgem_bo ** ptr)8008 void kgem_proxy_bo_attach(struct kgem_bo *bo,
8009 			  struct kgem_bo **ptr)
8010 {
8011 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
8012 	assert(bo->map__gtt == NULL);
8013 	assert(bo->proxy);
8014 	list_add(&bo->vma, &bo->proxy->vma);
8015 	bo->map__gtt = ptr;
8016 	*ptr = kgem_bo_reference(bo);
8017 }
8018 
kgem_buffer_read_sync(struct kgem * kgem,struct kgem_bo * _bo)8019 void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
8020 {
8021 	struct kgem_buffer *bo;
8022 	uint32_t offset = _bo->delta, length = _bo->size.bytes;
8023 
8024 	/* We expect the caller to have already submitted the batch */
8025 	assert(_bo->io);
8026 	assert(_bo->exec == NULL);
8027 	assert(_bo->rq == NULL);
8028 	assert(_bo->proxy);
8029 
8030 	_bo = _bo->proxy;
8031 	assert(_bo->proxy == NULL);
8032 	assert(_bo->exec == NULL);
8033 
8034 	bo = (struct kgem_buffer *)_bo;
8035 
8036 	DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
8037 	     offset, length, bo->base.snoop));
8038 
8039 	if (bo->mmapped) {
8040 		struct drm_i915_gem_set_domain set_domain;
8041 
8042 		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
8043 		     __FUNCTION__,
8044 		     bo->base.needs_flush,
8045 		     bo->base.domain,
8046 		     __kgem_busy(kgem, bo->base.handle)));
8047 
8048 		assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc);
8049 
8050 		VG_CLEAR(set_domain);
8051 		set_domain.handle = bo->base.handle;
8052 		set_domain.write_domain = 0;
8053 		set_domain.read_domains =
8054 			bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
8055 
8056 		if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
8057 			DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
8058 			kgem_throttle(kgem);
8059 		}
8060 	} else {
8061 		if (gem_read(kgem->fd,
8062 			     bo->base.handle, (char *)bo->mem+offset,
8063 			     offset, length))
8064 			return;
8065 	}
8066 	kgem_bo_maybe_retire(kgem, &bo->base);
8067 	bo->base.domain = DOMAIN_NONE;
8068 }
8069 
kgem_bo_get_binding(struct kgem_bo * bo,uint32_t format)8070 uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
8071 {
8072 	struct kgem_bo_binding *b;
8073 
8074 	assert(bo->refcnt);
8075 
8076 	for (b = &bo->binding; b && b->offset; b = b->next)
8077 		if (format == b->format)
8078 			return b->offset;
8079 
8080 	return 0;
8081 }
8082 
kgem_bo_set_binding(struct kgem_bo * bo,uint32_t format,uint16_t offset)8083 void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
8084 {
8085 	struct kgem_bo_binding *b;
8086 
8087 	assert(bo->refcnt);
8088 
8089 	for (b = &bo->binding; b; b = b->next) {
8090 		if (b->offset)
8091 			continue;
8092 
8093 		b->offset = offset;
8094 		b->format = format;
8095 
8096 		if (b->next)
8097 			b->next->offset = 0;
8098 
8099 		return;
8100 	}
8101 
8102 	b = malloc(sizeof(*b));
8103 	if (b) {
8104 		b->next = bo->binding.next;
8105 		b->format = format;
8106 		b->offset = offset;
8107 		bo->binding.next = b;
8108 	}
8109 }
8110 
8111 struct kgem_bo *
kgem_replace_bo(struct kgem * kgem,struct kgem_bo * src,uint32_t width,uint32_t height,uint32_t pitch,uint32_t bpp)8112 kgem_replace_bo(struct kgem *kgem,
8113 		struct kgem_bo *src,
8114 		uint32_t width,
8115 		uint32_t height,
8116 		uint32_t pitch,
8117 		uint32_t bpp)
8118 {
8119 	struct kgem_bo *dst;
8120 	uint32_t br00, br13;
8121 	uint32_t handle;
8122 	uint32_t size;
8123 	uint32_t *b;
8124 
8125 	DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n",
8126 	     __FUNCTION__, src->handle,  width, height, src->pitch, pitch));
8127 
8128 	/* We only expect to be called to fixup small buffers, hence why
8129 	 * we only attempt to allocate a linear bo.
8130 	 */
8131 	assert(src->tiling == I915_TILING_NONE);
8132 	assert(kgem_bo_can_blt(kgem, src));
8133 
8134 	size = height * pitch;
8135 	size = NUM_PAGES(size);
8136 
8137 	dst = search_linear_cache(kgem, size, 0);
8138 	if (dst == NULL)
8139 		dst = search_linear_cache(kgem, size, CREATE_INACTIVE);
8140 	if (dst == NULL) {
8141 		handle = gem_create(kgem->fd, size);
8142 		if (handle == 0)
8143 			return NULL;
8144 
8145 		dst = __kgem_bo_alloc(handle, size);
8146 		if (dst == NULL) {
8147 			gem_close(kgem->fd, handle);
8148 			return NULL;
8149 		}
8150 
8151 		debug_alloc__bo(kgem, dst);
8152 	}
8153 	dst->pitch = pitch;
8154 	dst->unique_id = kgem_get_unique_id(kgem);
8155 	dst->refcnt = 1;
8156 	assert(dst->tiling == I915_TILING_NONE);
8157 	assert(kgem_bo_can_blt(kgem, dst));
8158 
8159 	kgem_set_mode(kgem, KGEM_BLT, dst);
8160 	if (!kgem_check_batch(kgem, 10) ||
8161 	    !kgem_check_reloc(kgem, 2) ||
8162 	    !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
8163 		kgem_submit(kgem);
8164 		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
8165 			kgem_bo_destroy(kgem, dst);
8166 			return NULL;
8167 		}
8168 		_kgem_set_mode(kgem, KGEM_BLT);
8169 	}
8170 	kgem_bcs_set_tiling(kgem, src, dst);
8171 
8172 	br00 = XY_SRC_COPY_BLT_CMD;
8173 	br13 = pitch;
8174 	pitch = src->pitch;
8175 	if (kgem->gen >= 040 && src->tiling) {
8176 		br00 |= BLT_SRC_TILED;
8177 		pitch >>= 2;
8178 	}
8179 
8180 	br13 |= 0xcc << 16;
8181 	br13 |= sna_br13_color_depth(bpp);
8182 	if (bpp == 32)
8183 		br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
8184 
8185 	b = kgem->batch + kgem->nbatch;
8186 	if (kgem->gen >= 0100) {
8187 		b[0] = br00 | 8;
8188 		b[1] = br13;
8189 		b[2] = 0;
8190 		b[3] = height << 16 | width;
8191 		*(uint64_t *)(b+4) =
8192 			kgem_add_reloc64(kgem, kgem->nbatch + 4, dst,
8193 					 I915_GEM_DOMAIN_RENDER << 16 |
8194 					 I915_GEM_DOMAIN_RENDER |
8195 					 KGEM_RELOC_FENCED,
8196 					 0);
8197 		b[6] = 0;
8198 		b[7] = pitch;
8199 		*(uint64_t *)(b+8) =
8200 			kgem_add_reloc64(kgem, kgem->nbatch + 8, src,
8201 					 I915_GEM_DOMAIN_RENDER << 16 |
8202 					 KGEM_RELOC_FENCED,
8203 					 0);
8204 		kgem->nbatch += 10;
8205 	} else {
8206 		b[0] = br00 | 6;
8207 		b[1] = br13;
8208 		b[2] = 0;
8209 		b[3] = height << 16 | width;
8210 		b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
8211 				      I915_GEM_DOMAIN_RENDER << 16 |
8212 				      I915_GEM_DOMAIN_RENDER |
8213 				      KGEM_RELOC_FENCED,
8214 				      0);
8215 		b[5] = 0;
8216 		b[6] = pitch;
8217 		b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
8218 				      I915_GEM_DOMAIN_RENDER << 16 |
8219 				      KGEM_RELOC_FENCED,
8220 				      0);
8221 		kgem->nbatch += 8;
8222 	}
8223 
8224 	return dst;
8225 }
8226 
kgem_bo_convert_to_gpu(struct kgem * kgem,struct kgem_bo * bo,unsigned flags)8227 bool kgem_bo_convert_to_gpu(struct kgem *kgem,
8228 			    struct kgem_bo *bo,
8229 			    unsigned flags)
8230 {
8231 	DBG(("%s: converting handle=%d from CPU to GPU, flags=%x, busy?=%d\n",
8232 	     __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo)));
8233 	assert(bo->tiling == I915_TILING_NONE);
8234 
8235 	if (flags & (__MOVE_PRIME | __MOVE_SCANOUT))
8236 		return false;
8237 
8238 	if (kgem->has_llc)
8239 		return true;
8240 
8241 	if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo))
8242 		return false;
8243 
8244 	assert(bo->snoop);
8245 
8246 	kgem_bo_submit(kgem, bo);
8247 
8248 	if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED))
8249 		return false;
8250 
8251 	bo->snoop = false;
8252 	return true;
8253 }
8254