1 /*
2 * Copyright (c) 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <chris@chris-wilson.co.uk>
25 *
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "sna.h"
33 #include "sna_reg.h"
34
35 #include <unistd.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <time.h>
40 #include <sched.h>
41 #include <errno.h>
42 #include <fcntl.h>
43
44 #include <xf86drm.h>
45
46 #ifdef HAVE_VALGRIND
47 #include <valgrind.h>
48 #include <memcheck.h>
49 #endif
50
51 #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
52 #include <sys/sysinfo.h>
53 #endif
54
55 #include "sna_cpuid.h"
56
57 static struct kgem_bo *
58 search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
59
60 static struct kgem_bo *
61 search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
62
63 #define DBG_NO_HW 0
64 #define DBG_NO_EXEC 0
65 #define DBG_NO_TILING 0
66 #define DBG_NO_CACHE 0
67 #define DBG_NO_SNOOP_CACHE 0
68 #define DBG_NO_CACHE_LEVEL 0
69 #define DBG_NO_CPU 0
70 #define DBG_NO_CREATE2 0
71 #define DBG_NO_USERPTR 0
72 #define DBG_NO_UNSYNCHRONIZED_USERPTR 0
73 #define DBG_NO_COHERENT_MMAP_GTT 0
74 #define DBG_NO_LLC 1
75 #define DBG_NO_SEMAPHORES 0
76 #define DBG_NO_MADV 0
77 #define DBG_NO_UPLOAD_CACHE 0
78 #define DBG_NO_UPLOAD_ACTIVE 0
79 #define DBG_NO_MAP_UPLOAD 0
80 #define DBG_NO_RELAXED_FENCING 0
81 #define DBG_NO_SECURE_BATCHES 0
82 #define DBG_NO_PINNED_BATCHES 0
83 #define DBG_NO_SHRINK_BATCHES 0
84 #define DBG_NO_FAST_RELOC 0
85 #define DBG_NO_HANDLE_LUT 0
86 #define DBG_NO_WT 0
87 #define DBG_NO_WC_MMAP 0
88 #define DBG_NO_BLT_Y 0
89 #define DBG_NO_SCANOUT_Y 0
90 #define DBG_NO_DIRTYFB 0
91 #define DBG_NO_DETILING 0
92 #define DBG_DUMP 0
93 #define DBG_NO_MALLOC_CACHE 0
94
95 #define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
96
97 #ifndef DEBUG_SYNC
98 #define DEBUG_SYNC 0
99 #endif
100
101 #define SHOW_BATCH_BEFORE 0
102 #define SHOW_BATCH_AFTER 0
103
104 #if 0
105 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
106 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
107 #else
108 #define ASSERT_IDLE(kgem__, handle__)
109 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
110 #endif
111
112 /* Worst case seems to be 965gm where we cannot write within a cacheline that
113 * is being simultaneously being read by the GPU, or within the sampler
114 * prefetch. In general, the chipsets seem to have a requirement that sampler
115 * offsets be aligned to a cacheline (64 bytes).
116 *
117 * Actually, it turns out the BLT color pattern (BR15) has the most severe
118 * alignment restrictions, 64 bytes for 8-bpp, 128 bytes for 16-bpp and 256
119 * bytes for 32-bpp.
120 */
121 #define UPLOAD_ALIGNMENT 256
122
123 #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
124 #define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
125
126 #define MAX_GTT_VMA_CACHE 512
127 #define MAX_CPU_VMA_CACHE INT16_MAX
128 #define MAP_PRESERVE_TIME 10
129
130 #define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
131 #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1)
132
133 #define LOCAL_I915_PARAM_HAS_BLT 11
134 #define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
135 #define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
136 #define LOCAL_I915_PARAM_HAS_LLC 17
137 #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
138 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
139 #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
140 #define LOCAL_I915_PARAM_HAS_NO_RELOC 25
141 #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
142 #define LOCAL_I915_PARAM_HAS_WT 27
143 #define LOCAL_I915_PARAM_MMAP_VERSION 30
144 #define LOCAL_I915_PARAM_MMAP_GTT_COHERENT 52
145
146 #define LOCAL_I915_EXEC_IS_PINNED (1<<10)
147 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
148 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
149
150 #define LOCAL_I915_GEM_CREATE2 0x34
151 #define LOCAL_IOCTL_I915_GEM_CREATE2 DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CREATE2, struct local_i915_gem_create2)
152 struct local_i915_gem_create2 {
153 uint64_t size;
154 uint32_t placement;
155 #define LOCAL_I915_CREATE_PLACEMENT_SYSTEM 0
156 #define LOCAL_I915_CREATE_PLACEMENT_STOLEN 1 /* Cannot use CPU mmaps or pread/pwrite */
157 uint32_t domain;
158 uint32_t caching;
159 uint32_t tiling_mode;
160 uint32_t stride;
161 uint32_t flags;
162 uint32_t pad;
163 uint32_t handle;
164 };
165
166 #define LOCAL_I915_GEM_USERPTR 0x33
167 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
168 struct local_i915_gem_userptr {
169 uint64_t user_ptr;
170 uint64_t user_size;
171 uint32_t flags;
172 #define I915_USERPTR_READ_ONLY 0x1
173 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000
174 uint32_t handle;
175 };
176
177 #define UNCACHED 0
178 #define SNOOPED 1
179 #define DISPLAY 2
180
181 struct local_i915_gem_caching {
182 uint32_t handle;
183 uint32_t caching;
184 };
185
186 #define LOCAL_I915_GEM_SET_CACHING 0x2f
187 #define LOCAL_I915_GEM_GET_CACHING 0x30
188 #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
189 #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
190
191 struct local_i915_gem_mmap {
192 uint32_t handle;
193 uint32_t pad;
194 uint64_t offset;
195 uint64_t size;
196 uint64_t addr_ptr;
197 };
198 #define LOCAL_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap)
199
200 struct local_i915_gem_mmap2 {
201 uint32_t handle;
202 uint32_t pad;
203 uint64_t offset;
204 uint64_t size;
205 uint64_t addr_ptr;
206 uint64_t flags;
207 #define I915_MMAP_WC 0x1
208 };
209 #define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
210
211 struct kgem_buffer {
212 struct kgem_bo base;
213 void *mem;
214 uint32_t used;
215 uint32_t need_io : 1;
216 uint32_t write : 2;
217 uint32_t mmapped : 2;
218 };
219 enum {
220 MMAPPED_NONE,
221 MMAPPED_GTT,
222 MMAPPED_CPU
223 };
224
225 static struct kgem_bo *__kgem_freed_bo;
226 static struct kgem_request *__kgem_freed_request;
227 static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
228
__to_sna(struct kgem * kgem)229 static inline struct sna *__to_sna(struct kgem *kgem)
230 {
231 /* minor layering violations */
232 return container_of(kgem, struct sna, kgem);
233 }
234
bytes(struct kgem_bo * bo)235 static inline int bytes(struct kgem_bo *bo)
236 {
237 return __kgem_bo_size(bo);
238 }
239
240 #define bucket(B) (B)->size.pages.bucket
241 #define num_pages(B) (B)->size.pages.count
242
__do_ioctl(int fd,unsigned long req,void * arg)243 static int __do_ioctl(int fd, unsigned long req, void *arg)
244 {
245 do {
246 int err;
247
248 switch ((err = errno)) {
249 case EAGAIN:
250 sched_yield();
251 case EINTR:
252 break;
253 default:
254 return -err;
255 }
256
257 if (likely(ioctl(fd, req, arg) == 0))
258 return 0;
259 } while (1);
260 }
261
do_ioctl(int fd,unsigned long req,void * arg)262 inline static int do_ioctl(int fd, unsigned long req, void *arg)
263 {
264 if (likely(ioctl(fd, req, arg) == 0))
265 return 0;
266
267 return __do_ioctl(fd, req, arg);
268 }
269
270 #ifdef DEBUG_MEMORY
debug_alloc(struct kgem * kgem,size_t size)271 static void debug_alloc(struct kgem *kgem, size_t size)
272 {
273 kgem->debug_memory.bo_allocs++;
274 kgem->debug_memory.bo_bytes += size;
275 }
debug_alloc__bo(struct kgem * kgem,struct kgem_bo * bo)276 static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
277 {
278 debug_alloc(kgem, bytes(bo));
279 }
280 #else
281 #define debug_alloc__bo(k, b)
282 #endif
283
284 #ifndef NDEBUG
assert_tiling(struct kgem * kgem,struct kgem_bo * bo)285 static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
286 {
287 struct drm_i915_gem_get_tiling tiling;
288
289 assert(bo);
290
291 if (!kgem->can_fence && kgem->gen >= 040 && bo->tiling)
292 return; /* lies */
293
294 VG_CLEAR(tiling);
295 tiling.handle = bo->handle;
296 tiling.tiling_mode = bo->tiling;
297 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
298 assert(tiling.tiling_mode == bo->tiling);
299 }
300
assert_caching(struct kgem * kgem,struct kgem_bo * bo)301 static void assert_caching(struct kgem *kgem, struct kgem_bo *bo)
302 {
303 struct local_i915_gem_caching arg;
304 int expect = kgem->has_llc ? SNOOPED : UNCACHED;
305
306 VG_CLEAR(arg);
307 arg.handle = bo->handle;
308 arg.caching = expect;
309
310 (void)do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &arg);
311
312 assert(arg.caching == expect);
313 }
314
assert_bo_retired(struct kgem_bo * bo)315 static void assert_bo_retired(struct kgem_bo *bo)
316 {
317 DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
318 bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
319 assert(bo->refcnt);
320 assert(bo->rq == NULL);
321 assert(bo->exec == NULL);
322 assert(!bo->needs_flush);
323 assert(list_is_empty(&bo->request));
324 }
325 #else
326 #define assert_tiling(kgem, bo)
327 #define assert_caching(kgem, bo)
328 #define assert_bo_retired(bo)
329 #endif
330
__find_debugfs(struct kgem * kgem)331 static int __find_debugfs(struct kgem *kgem)
332 {
333 int i;
334
335 for (i = 0; i < DRM_MAX_MINOR; i++) {
336 char path[80];
337
338 sprintf(path, "/sys/kernel/debug/dri/%d/i915_wedged", i);
339 if (access(path, R_OK) == 0)
340 return i;
341
342 sprintf(path, "/debug/dri/%d/i915_wedged", i);
343 if (access(path, R_OK) == 0)
344 return i;
345 }
346
347 return -1;
348 }
349
kgem_get_minor(struct kgem * kgem)350 static int kgem_get_minor(struct kgem *kgem)
351 {
352 struct stat st;
353
354 if (fstat(kgem->fd, &st))
355 return __find_debugfs(kgem);
356
357 if (!S_ISCHR(st.st_mode))
358 return __find_debugfs(kgem);
359
360 return st.st_rdev & 0x63;
361 }
362
find_hang_state(struct kgem * kgem,char * path,int maxlen)363 static bool find_hang_state(struct kgem *kgem, char *path, int maxlen)
364 {
365 int minor = kgem_get_minor(kgem);
366
367 /* Search for our hang state in a few canonical locations.
368 * In the unlikely event of having multiple devices, we
369 * will need to check which minor actually corresponds to ours.
370 */
371
372 snprintf(path, maxlen, "/sys/class/drm/card%d/error", minor);
373 if (access(path, R_OK) == 0)
374 return true;
375
376 snprintf(path, maxlen, "/sys/kernel/debug/dri/%d/i915_error_state", minor);
377 if (access(path, R_OK) == 0)
378 return true;
379
380 snprintf(path, maxlen, "/debug/dri/%d/i915_error_state", minor);
381 if (access(path, R_OK) == 0)
382 return true;
383
384 path[0] = '\0';
385 return false;
386 }
387
has_error_state(struct kgem * kgem,char * path)388 static bool has_error_state(struct kgem *kgem, char *path)
389 {
390 bool ret = false;
391 char no;
392 int fd;
393
394 fd = open(path, O_RDONLY);
395 if (fd >= 0) {
396 ret = read(fd, &no, 1) == 1 && no != 'N';
397 close(fd);
398 }
399
400 return ret;
401 }
402
kgem_get_screen_index(struct kgem * kgem)403 static int kgem_get_screen_index(struct kgem *kgem)
404 {
405 return __to_sna(kgem)->scrn->scrnIndex;
406 }
407
408 static void
__kgem_set_wedged(struct kgem * kgem)409 __kgem_set_wedged(struct kgem *kgem)
410 {
411 static int once;
412 char path[256];
413
414 if (kgem->wedged)
415 return;
416
417 if (!once &&
418 find_hang_state(kgem, path, sizeof(path)) &&
419 has_error_state(kgem, path)) {
420 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
421 "When reporting this, please include %s and the full dmesg.\n",
422 path);
423 once = 1;
424 }
425
426 kgem->wedged = true;
427 sna_render_mark_wedged(__to_sna(kgem));
428 }
429
kgem_sna_reset(struct kgem * kgem)430 static void kgem_sna_reset(struct kgem *kgem)
431 {
432 struct sna *sna = __to_sna(kgem);
433
434 sna->render.reset(sna);
435 sna->blt_state.fill_bo = 0;
436 }
437
kgem_sna_flush(struct kgem * kgem)438 static void kgem_sna_flush(struct kgem *kgem)
439 {
440 struct sna *sna = __to_sna(kgem);
441
442 sna->render.flush(sna);
443
444 if (sna->render.solid_cache.dirty)
445 sna_render_flush_solid(sna);
446 }
447
kgem_bo_rmfb(struct kgem * kgem,struct kgem_bo * bo)448 static bool kgem_bo_rmfb(struct kgem *kgem, struct kgem_bo *bo)
449 {
450 if (bo->scanout && bo->delta) {
451 DBG(("%s: releasing fb=%d for handle=%d\n",
452 __FUNCTION__, bo->delta, bo->handle));
453 /* XXX will leak if we are not DRM_MASTER. *shrug* */
454 do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &bo->delta);
455 bo->delta = 0;
456 return true;
457 } else
458 return false;
459 }
460
kgem_set_tiling(struct kgem * kgem,struct kgem_bo * bo,int tiling,int stride)461 static bool kgem_set_tiling(struct kgem *kgem, struct kgem_bo *bo,
462 int tiling, int stride)
463 {
464 struct drm_i915_gem_set_tiling set_tiling;
465 int err;
466
467 if (tiling == bo->tiling) {
468 if (tiling == I915_TILING_NONE) {
469 bo->pitch = stride;
470 return true;
471 }
472 if (stride == bo->pitch)
473 return true;
474 }
475
476 if (DBG_NO_TILING)
477 return false;
478
479 VG_CLEAR(set_tiling);
480 restart:
481 set_tiling.handle = bo->handle;
482 set_tiling.tiling_mode = tiling;
483 set_tiling.stride = tiling ? stride : 0;
484
485 if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0) {
486 bo->tiling = set_tiling.tiling_mode;
487 bo->pitch = set_tiling.tiling_mode ? set_tiling.stride : stride;
488 DBG(("%s: handle=%d, tiling=%d [%d], pitch=%d [%d]: %d\n",
489 __FUNCTION__, bo->handle,
490 bo->tiling, tiling,
491 bo->pitch, stride,
492 set_tiling.tiling_mode == tiling));
493 return set_tiling.tiling_mode == tiling && bo->pitch >= stride;
494 }
495
496 err = errno;
497 if (err == EINTR)
498 goto restart;
499
500 if (err == EAGAIN) {
501 sched_yield();
502 goto restart;
503 }
504
505 if (err == EBUSY && kgem_bo_rmfb(kgem, bo))
506 goto restart;
507
508 ERR(("%s: failed to set-tiling(tiling=%d, pitch=%d) for handle=%d: %d\n",
509 __FUNCTION__, tiling, stride, bo->handle, err));
510 return false;
511 }
512
gem_set_caching(int fd,uint32_t handle,int caching)513 static bool gem_set_caching(int fd, uint32_t handle, int caching)
514 {
515 struct local_i915_gem_caching arg;
516
517 VG_CLEAR(arg);
518 arg.handle = handle;
519 arg.caching = caching;
520 return do_ioctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
521 }
522
gem_userptr(int fd,void * ptr,size_t size,int read_only)523 static uint32_t gem_userptr(int fd, void *ptr, size_t size, int read_only)
524 {
525 struct local_i915_gem_userptr arg;
526
527 VG_CLEAR(arg);
528 arg.user_ptr = (uintptr_t)ptr;
529 arg.user_size = size;
530 arg.flags = I915_USERPTR_UNSYNCHRONIZED;
531 if (read_only)
532 arg.flags |= I915_USERPTR_READ_ONLY;
533
534 if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
535 do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
536 arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
537 if (do_ioctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
538 DBG(("%s: failed to map %p + %d bytes: %d\n",
539 __FUNCTION__, ptr, size, errno));
540 return 0;
541 }
542 }
543
544 return arg.handle;
545 }
546
__kgem_throttle(struct kgem * kgem,bool harder)547 static bool __kgem_throttle(struct kgem *kgem, bool harder)
548 {
549 /* Let this be woken up by sigtimer so that we don't block here
550 * too much and completely starve X. We will sleep again shortly,
551 * and so catch up or detect the hang.
552 */
553 do {
554 if (ioctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE) == 0) {
555 kgem->need_throttle = 0;
556 return false;
557 }
558
559 if (errno == EIO)
560 return true;
561 } while (harder);
562
563 return false;
564 }
565
__kgem_throttle_retire(struct kgem * kgem,unsigned flags)566 static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
567 {
568 if (flags & CREATE_NO_RETIRE || !kgem->need_retire) {
569 DBG(("%s: not retiring\n", __FUNCTION__));
570 return false;
571 }
572
573 if (kgem_retire(kgem))
574 return true;
575
576 if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
577 DBG(("%s: not throttling\n", __FUNCTION__));
578 return false;
579 }
580
581 __kgem_throttle(kgem, false);
582 return kgem_retire(kgem);
583 }
584
__kgem_bo_map__gtt(struct kgem * kgem,struct kgem_bo * bo)585 static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
586 {
587 struct drm_i915_gem_mmap_gtt gtt;
588 void *ptr;
589 int err;
590
591 DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
592 bo->handle, bytes(bo)));
593
594 if (bo->tiling && !kgem->can_fence)
595 return NULL;
596
597 VG_CLEAR(gtt);
598 retry_gtt:
599 gtt.handle = bo->handle;
600 if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) {
601 DBG(("%s: failed %d, throttling/cleaning caches\n",
602 __FUNCTION__, err));
603 assert(err != EINVAL);
604
605 (void)__kgem_throttle_retire(kgem, 0);
606 if (kgem_expire_cache(kgem))
607 goto retry_gtt;
608
609 if (kgem_cleanup_cache(kgem))
610 goto retry_gtt;
611
612 ERR(("%s: failed to retrieve GTT offset for handle=%d: %d\n",
613 __FUNCTION__, bo->handle, -err));
614 return NULL;
615 }
616
617 retry_mmap:
618 ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
619 kgem->fd, gtt.offset);
620 if (ptr == MAP_FAILED) {
621 err = errno;
622 DBG(("%s: failed %d, throttling/cleaning caches\n",
623 __FUNCTION__, err));
624 assert(err != EINVAL);
625
626 if (__kgem_throttle_retire(kgem, 0))
627 goto retry_mmap;
628
629 if (kgem_cleanup_cache(kgem))
630 goto retry_mmap;
631
632 ERR(("%s: failed to mmap handle=%d, %d bytes, into GTT domain: %d\n",
633 __FUNCTION__, bo->handle, bytes(bo), err));
634 ptr = NULL;
635 }
636
637 /* Cache this mapping to avoid the overhead of an
638 * excruciatingly slow GTT pagefault. This is more an
639 * issue with compositing managers which need to
640 * frequently flush CPU damage to their GPU bo.
641 */
642 return bo->map__gtt = ptr;
643 }
644
__kgem_bo_map__wc(struct kgem * kgem,struct kgem_bo * bo)645 static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
646 {
647 struct local_i915_gem_mmap2 wc;
648 int err;
649
650 DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
651 bo->handle, bytes(bo)));
652 assert(kgem->has_wc_mmap);
653
654 VG_CLEAR(wc);
655
656 retry_wc:
657 wc.handle = bo->handle;
658 wc.offset = 0;
659 wc.size = bytes(bo);
660 wc.flags = I915_MMAP_WC;
661 if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
662 DBG(("%s: failed %d, throttling/cleaning caches\n",
663 __FUNCTION__, err));
664 assert(err != EINVAL);
665
666 if (__kgem_throttle_retire(kgem, 0))
667 goto retry_wc;
668
669 if (kgem_cleanup_cache(kgem))
670 goto retry_wc;
671
672 ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
673 __FUNCTION__, bo->handle, bytes(bo), -err));
674 return NULL;
675 }
676
677 VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
678
679 DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
680 return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
681 }
682
__kgem_bo_map__cpu(struct kgem * kgem,struct kgem_bo * bo)683 static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
684 {
685 struct local_i915_gem_mmap arg;
686 int err;
687
688 VG_CLEAR(arg);
689 arg.offset = 0;
690
691 retry:
692 arg.handle = bo->handle;
693 arg.size = bytes(bo);
694 if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP, &arg))) {
695 DBG(("%s: failed %d, throttling/cleaning caches\n",
696 __FUNCTION__, err));
697 assert(err != -EINVAL || bo->prime);
698
699 if (__kgem_throttle_retire(kgem, 0))
700 goto retry;
701
702 if (kgem_cleanup_cache(kgem))
703 goto retry;
704
705 ERR(("%s: failed to mmap handle=%d (prime? %d), %d bytes, into CPU domain: %d\n",
706 __FUNCTION__, bo->handle, bo->prime, bytes(bo), -err));
707 bo->purged = 1;
708 return NULL;
709 }
710
711 VG(VALGRIND_MAKE_MEM_DEFINED(arg.addr_ptr, bytes(bo)));
712
713 DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
714 return bo->map__cpu = (void *)(uintptr_t)arg.addr_ptr;
715 }
716
gem_write(int fd,uint32_t handle,int offset,int length,const void * src)717 static int gem_write(int fd, uint32_t handle,
718 int offset, int length,
719 const void *src)
720 {
721 struct drm_i915_gem_pwrite pwrite;
722
723 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
724 handle, offset, length));
725
726 VG_CLEAR(pwrite);
727 pwrite.handle = handle;
728 pwrite.offset = offset;
729 pwrite.size = length;
730 pwrite.data_ptr = (uintptr_t)src;
731 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
732 }
733
gem_write__cachealigned(int fd,uint32_t handle,int offset,int length,const void * src)734 static int gem_write__cachealigned(int fd, uint32_t handle,
735 int offset, int length,
736 const void *src)
737 {
738 struct drm_i915_gem_pwrite pwrite;
739
740 DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
741 handle, offset, length));
742
743 VG_CLEAR(pwrite);
744 pwrite.handle = handle;
745 /* align the transfer to cachelines; fortuitously this is safe! */
746 if ((offset | length) & 63) {
747 pwrite.offset = offset & ~63;
748 pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
749 pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
750 } else {
751 pwrite.offset = offset;
752 pwrite.size = length;
753 pwrite.data_ptr = (uintptr_t)src;
754 }
755 return do_ioctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
756 }
757
gem_read(int fd,uint32_t handle,const void * dst,int offset,int length)758 static int gem_read(int fd, uint32_t handle, const void *dst,
759 int offset, int length)
760 {
761 struct drm_i915_gem_pread pread;
762 int ret;
763
764 DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__,
765 handle, length));
766
767 VG_CLEAR(pread);
768 pread.handle = handle;
769 pread.offset = offset;
770 pread.size = length;
771 pread.data_ptr = (uintptr_t)dst;
772 ret = do_ioctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
773 if (ret) {
774 DBG(("%s: failed, errno=%d\n", __FUNCTION__, -ret));
775 return ret;
776 }
777
778 VG(VALGRIND_MAKE_MEM_DEFINED(dst, length));
779 return 0;
780 }
781
__kgem_busy(struct kgem * kgem,int handle)782 bool __kgem_busy(struct kgem *kgem, int handle)
783 {
784 struct drm_i915_gem_busy busy;
785
786 VG_CLEAR(busy);
787 busy.handle = handle;
788 busy.busy = !kgem->wedged;
789 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
790 DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
791 __FUNCTION__, handle, busy.busy, kgem->wedged));
792
793 return busy.busy;
794 }
795
kgem_bo_retire(struct kgem * kgem,struct kgem_bo * bo)796 static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
797 {
798 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
799 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
800 __kgem_busy(kgem, bo->handle)));
801 assert(bo->exec == NULL);
802 assert(list_is_empty(&bo->vma));
803
804 if (bo->rq)
805 __kgem_retire_requests_upto(kgem, bo);
806 ASSERT_IDLE(kgem, bo->handle);
807 assert_bo_retired(bo);
808 }
809
kgem_bo_maybe_retire(struct kgem * kgem,struct kgem_bo * bo)810 static void kgem_bo_maybe_retire(struct kgem *kgem, struct kgem_bo *bo)
811 {
812 DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
813 __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
814 __kgem_busy(kgem, bo->handle)));
815 assert(bo->exec == NULL);
816 assert(list_is_empty(&bo->vma));
817
818 if (bo->rq) {
819 if (!__kgem_busy(kgem, bo->handle))
820 __kgem_retire_requests_upto(kgem, bo);
821 } else {
822 assert(!bo->needs_flush);
823 ASSERT_IDLE(kgem, bo->handle);
824 }
825 }
826
kgem_bo_write(struct kgem * kgem,struct kgem_bo * bo,const void * data,int length)827 bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
828 const void *data, int length)
829 {
830 void *ptr;
831 int err;
832
833 assert(bo->refcnt);
834 assert(bo->proxy == NULL);
835 ASSERT_IDLE(kgem, bo->handle);
836
837 assert(length <= bytes(bo));
838 retry:
839 ptr = NULL;
840 if (bo->domain == DOMAIN_CPU || (kgem->has_llc && !bo->scanout)) {
841 ptr = bo->map__cpu;
842 if (ptr == NULL)
843 ptr = __kgem_bo_map__cpu(kgem, bo);
844 } else if (kgem->has_wc_mmap) {
845 ptr = bo->map__wc;
846 if (ptr == NULL)
847 ptr = __kgem_bo_map__wc(kgem, bo);
848 }
849 if (ptr) {
850 /* XXX unsynchronized? */
851 memcpy(ptr, data, length);
852 return true;
853 }
854
855 if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) {
856 DBG(("%s: failed %d, throttling/cleaning caches\n",
857 __FUNCTION__, err));
858 assert(err != EINVAL);
859
860 (void)__kgem_throttle_retire(kgem, 0);
861 if (kgem_expire_cache(kgem))
862 goto retry;
863
864 if (kgem_cleanup_cache(kgem))
865 goto retry;
866
867 ERR(("%s: failed to write %d bytes into BO handle=%d: %d\n",
868 __FUNCTION__, length, bo->handle, -err));
869 return false;
870 }
871
872 DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
873 if (bo->exec == NULL)
874 kgem_bo_maybe_retire(kgem, bo);
875 bo->domain = DOMAIN_NONE;
876 bo->gtt_dirty = true;
877 return true;
878 }
879
gem_create(int fd,int num_pages)880 static uint32_t gem_create(int fd, int num_pages)
881 {
882 struct drm_i915_gem_create create;
883
884 VG_CLEAR(create);
885 create.handle = 0;
886 create.size = PAGE_SIZE * num_pages;
887 (void)do_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
888
889 return create.handle;
890 }
891
892 static void
kgem_bo_set_purgeable(struct kgem * kgem,struct kgem_bo * bo)893 kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
894 {
895 #if !DBG_NO_MADV
896 struct drm_i915_gem_madvise madv;
897
898 assert(bo->exec == NULL);
899
900 VG_CLEAR(madv);
901 madv.handle = bo->handle;
902 madv.madv = I915_MADV_DONTNEED;
903 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
904 bo->purged = true;
905 kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU;
906 }
907 #endif
908 }
909
910 static bool
kgem_bo_is_retained(struct kgem * kgem,struct kgem_bo * bo)911 kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
912 {
913 #if DBG_NO_MADV
914 return true;
915 #else
916 struct drm_i915_gem_madvise madv;
917
918 if (!bo->purged)
919 return true;
920
921 VG_CLEAR(madv);
922 madv.handle = bo->handle;
923 madv.madv = I915_MADV_DONTNEED;
924 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
925 return madv.retained;
926
927 return false;
928 #endif
929 }
930
931 static bool
kgem_bo_clear_purgeable(struct kgem * kgem,struct kgem_bo * bo)932 kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
933 {
934 #if DBG_NO_MADV
935 return true;
936 #else
937 struct drm_i915_gem_madvise madv;
938
939 assert(bo->purged);
940
941 VG_CLEAR(madv);
942 madv.handle = bo->handle;
943 madv.madv = I915_MADV_WILLNEED;
944 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
945 bo->purged = !madv.retained;
946 kgem->need_purge |= !madv.retained && bo->domain != DOMAIN_CPU;
947 return madv.retained;
948 }
949
950 return false;
951 #endif
952 }
953
gem_close(int fd,uint32_t handle)954 static void gem_close(int fd, uint32_t handle)
955 {
956 struct drm_gem_close close;
957
958 VG_CLEAR(close);
959 close.handle = handle;
960 (void)do_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
961 }
962
__fls(unsigned long word)963 constant inline static unsigned long __fls(unsigned long word)
964 {
965 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
966 asm("bsr %1,%0"
967 : "=r" (word)
968 : "rm" (word));
969 return word;
970 #else
971 unsigned int v = 0;
972
973 while (word >>= 1)
974 v++;
975
976 return v;
977 #endif
978 }
979
cache_bucket(int num_pages)980 constant inline static int cache_bucket(int num_pages)
981 {
982 return __fls(num_pages);
983 }
984
__kgem_bo_init(struct kgem_bo * bo,int handle,int num_pages)985 static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
986 int handle, int num_pages)
987 {
988 DBG(("%s(handle=%d, num_pages=%d)\n", __FUNCTION__, handle, num_pages));
989
990 assert(num_pages);
991 memset(bo, 0, sizeof(*bo));
992
993 bo->refcnt = 1;
994 bo->handle = handle;
995 bo->target_handle = -1;
996 num_pages(bo) = num_pages;
997 bucket(bo) = cache_bucket(num_pages);
998 bo->reusable = true;
999 bo->domain = DOMAIN_CPU;
1000 list_init(&bo->request);
1001 list_init(&bo->list);
1002 list_init(&bo->vma);
1003
1004 return bo;
1005 }
1006
__kgem_bo_alloc(int handle,int num_pages)1007 static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
1008 {
1009 struct kgem_bo *bo;
1010
1011 if (__kgem_freed_bo) {
1012 bo = __kgem_freed_bo;
1013 __kgem_freed_bo = *(struct kgem_bo **)bo;
1014 } else {
1015 bo = malloc(sizeof(*bo));
1016 if (bo == NULL)
1017 return NULL;
1018 }
1019
1020 return __kgem_bo_init(bo, handle, num_pages);
1021 }
1022
__kgem_request_alloc(struct kgem * kgem)1023 static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
1024 {
1025 struct kgem_request *rq;
1026
1027 if (unlikely(kgem->wedged)) {
1028 rq = &kgem->static_request;
1029 } else {
1030 rq = __kgem_freed_request;
1031 if (rq) {
1032 __kgem_freed_request = *(struct kgem_request **)rq;
1033 } else {
1034 rq = malloc(sizeof(*rq));
1035 if (rq == NULL)
1036 rq = &kgem->static_request;
1037 }
1038 }
1039
1040 list_init(&rq->buffers);
1041 rq->bo = NULL;
1042 rq->ring = 0;
1043
1044 return rq;
1045 }
1046
__kgem_request_free(struct kgem_request * rq)1047 static void __kgem_request_free(struct kgem_request *rq)
1048 {
1049 _list_del(&rq->list);
1050 if (DBG_NO_MALLOC_CACHE) {
1051 free(rq);
1052 } else {
1053 *(struct kgem_request **)rq = __kgem_freed_request;
1054 __kgem_freed_request = rq;
1055 }
1056 }
1057
inactive(struct kgem * kgem,int num_pages)1058 static struct list *inactive(struct kgem *kgem, int num_pages)
1059 {
1060 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
1061 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
1062 return &kgem->inactive[cache_bucket(num_pages)];
1063 }
1064
active(struct kgem * kgem,int num_pages,int tiling)1065 static struct list *active(struct kgem *kgem, int num_pages, int tiling)
1066 {
1067 assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
1068 assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
1069 return &kgem->active[cache_bucket(num_pages)][tiling];
1070 }
1071
1072 static size_t
agp_aperture_size(struct pci_device * dev,unsigned gen)1073 agp_aperture_size(struct pci_device *dev, unsigned gen)
1074 {
1075 /* XXX assume that only future chipsets are unknown and follow
1076 * the post gen2 PCI layout.
1077 */
1078 return dev->regions[gen < 030 ? 0 : 2].size;
1079 }
1080
1081 static size_t
total_ram_size(void)1082 total_ram_size(void)
1083 {
1084 #ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
1085 struct sysinfo info;
1086 if (sysinfo(&info) == 0)
1087 return (size_t)info.totalram * info.mem_unit;
1088 #endif
1089
1090 #ifdef _SC_PHYS_PAGES
1091 return (size_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE);
1092 #endif
1093
1094 return 0;
1095 }
1096
1097 static unsigned
cpu_cache_size__cpuid4(void)1098 cpu_cache_size__cpuid4(void)
1099 {
1100 /* Deterministic Cache Parameters (Function 04h)":
1101 * When EAX is initialized to a value of 4, the CPUID instruction
1102 * returns deterministic cache information in the EAX, EBX, ECX
1103 * and EDX registers. This function requires ECX be initialized
1104 * with an index which indicates which cache to return information
1105 * about. The OS is expected to call this function (CPUID.4) with
1106 * ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
1107 * The order in which the caches are returned is not specified
1108 * and may change at Intel's discretion.
1109 *
1110 * Calculating the Cache Size in bytes:
1111 * = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
1112 */
1113
1114 unsigned int eax, ebx, ecx, edx;
1115 unsigned int llc_size = 0;
1116 int cnt;
1117
1118 if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
1119 return 0;
1120
1121 cnt = 0;
1122 do {
1123 unsigned associativity, line_partitions, line_size, sets;
1124
1125 __cpuid_count(4, cnt++, eax, ebx, ecx, edx);
1126
1127 if ((eax & 0x1f) == 0)
1128 break;
1129
1130 associativity = ((ebx >> 22) & 0x3ff) + 1;
1131 line_partitions = ((ebx >> 12) & 0x3ff) + 1;
1132 line_size = (ebx & 0xfff) + 1;
1133 sets = ecx + 1;
1134
1135 llc_size = associativity * line_partitions * line_size * sets;
1136 } while (1);
1137
1138 return llc_size;
1139 }
1140
1141 static unsigned
cpu_cache_size(void)1142 cpu_cache_size(void)
1143 {
1144 unsigned size;
1145 FILE *file;
1146
1147 size = cpu_cache_size__cpuid4();
1148 if (size)
1149 return size;
1150
1151 file = fopen("/proc/cpuinfo", "r");
1152 if (file) {
1153 size_t len = 0;
1154 char *line = NULL;
1155 while (getline(&line, &len, file) != -1) {
1156 int kb;
1157 if (sscanf(line, "cache size : %d KB", &kb) == 1) {
1158 /* Paranoid check against gargantuan caches */
1159 if (kb <= 1<<20)
1160 size = kb * 1024;
1161 break;
1162 }
1163 }
1164 free(line);
1165 fclose(file);
1166 }
1167
1168 if (size == 0)
1169 size = 64 * 1024;
1170
1171 return size;
1172 }
1173
gem_param(struct kgem * kgem,int name)1174 static int gem_param(struct kgem *kgem, int name)
1175 {
1176 drm_i915_getparam_t gp;
1177 int v = -1; /* No param uses the sign bit, reserve it for errors */
1178
1179 VG_CLEAR(gp);
1180 gp.param = name;
1181 gp.value = &v;
1182 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
1183 return -1;
1184
1185 VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
1186 return v;
1187 }
1188
test_has_execbuffer2(struct kgem * kgem)1189 static bool test_has_execbuffer2(struct kgem *kgem)
1190 {
1191 struct drm_i915_gem_execbuffer2 execbuf;
1192 int ret;
1193
1194 memset(&execbuf, 0, sizeof(execbuf));
1195 execbuf.buffer_count = 1;
1196
1197 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
1198 #ifdef __FreeBSD__
1199 /* XXX FreeBSD returns ENOENT instead of EFAULT. */
1200 if (ret == -ENOENT)
1201 return true;
1202 #endif
1203 return ret == -EFAULT;
1204 }
1205
test_has_no_reloc(struct kgem * kgem)1206 static bool test_has_no_reloc(struct kgem *kgem)
1207 {
1208 if (DBG_NO_FAST_RELOC)
1209 return false;
1210
1211 return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
1212 }
1213
test_has_handle_lut(struct kgem * kgem)1214 static bool test_has_handle_lut(struct kgem *kgem)
1215 {
1216 if (DBG_NO_HANDLE_LUT)
1217 return false;
1218
1219 return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
1220 }
1221
test_has_wt(struct kgem * kgem)1222 static bool test_has_wt(struct kgem *kgem)
1223 {
1224 if (DBG_NO_WT)
1225 return false;
1226
1227 return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
1228 }
1229
test_has_semaphores_enabled(struct kgem * kgem)1230 static bool test_has_semaphores_enabled(struct kgem *kgem)
1231 {
1232 FILE *file;
1233 bool detected = false;
1234 int ret;
1235
1236 if (DBG_NO_SEMAPHORES)
1237 return false;
1238
1239 ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
1240 if (ret != -1)
1241 return ret > 0;
1242
1243 file = fopen("/sys/module/i915/parameters/semaphores", "r");
1244 if (file) {
1245 int value;
1246 if (fscanf(file, "%d", &value) == 1)
1247 detected = value != 0;
1248 fclose(file);
1249 }
1250
1251 return detected;
1252 }
1253
is_hw_supported(struct kgem * kgem,struct pci_device * dev)1254 static bool is_hw_supported(struct kgem *kgem,
1255 struct pci_device *dev)
1256 {
1257 if (DBG_NO_HW)
1258 return false;
1259
1260 if (!test_has_execbuffer2(kgem))
1261 return false;
1262
1263 if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
1264 return kgem->has_blt;
1265
1266 /* Although pre-855gm the GMCH is fubar, it works mostly. So
1267 * let the user decide through "NoAccel" whether or not to risk
1268 * hw acceleration.
1269 */
1270
1271 if (kgem->gen == 060 && dev && dev->revision < 8) {
1272 /* pre-production SNB with dysfunctional BLT */
1273 return false;
1274 }
1275
1276 if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
1277 return kgem->has_blt;
1278
1279 return true;
1280 }
1281
test_has_relaxed_fencing(struct kgem * kgem)1282 static bool test_has_relaxed_fencing(struct kgem *kgem)
1283 {
1284 if (kgem->gen < 040) {
1285 if (DBG_NO_RELAXED_FENCING)
1286 return false;
1287
1288 return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
1289 } else
1290 return true;
1291 }
1292
test_has_coherent_mmap_gtt(struct kgem * kgem)1293 static bool test_has_coherent_mmap_gtt(struct kgem *kgem)
1294 {
1295 if (DBG_NO_COHERENT_MMAP_GTT)
1296 return false;
1297
1298 return gem_param(kgem, LOCAL_I915_PARAM_MMAP_GTT_COHERENT) > 0;
1299 }
1300
test_has_llc(struct kgem * kgem)1301 static bool test_has_llc(struct kgem *kgem)
1302 {
1303 int has_llc = -1;
1304
1305 if (DBG_NO_LLC)
1306 return false;
1307
1308 has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
1309 if (has_llc == -1) {
1310 DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
1311 has_llc = kgem->gen >= 060;
1312 }
1313
1314 return has_llc;
1315 }
1316
test_has_wc_mmap(struct kgem * kgem)1317 static bool test_has_wc_mmap(struct kgem *kgem)
1318 {
1319 struct local_i915_gem_mmap2 wc;
1320 bool ret;
1321
1322 if (DBG_NO_WC_MMAP)
1323 return false;
1324
1325 /* XXX See https://bugs.freedesktop.org/show_bug.cgi?id=90841 */
1326 if (kgem->gen < 033)
1327 return false;
1328
1329 if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
1330 return false;
1331
1332 VG_CLEAR(wc);
1333 wc.handle = gem_create(kgem->fd, 1);
1334 wc.offset = 0;
1335 wc.size = 4096;
1336 wc.flags = I915_MMAP_WC;
1337 ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
1338 gem_close(kgem->fd, wc.handle);
1339
1340 return ret;
1341 }
1342
test_has_caching(struct kgem * kgem)1343 static bool test_has_caching(struct kgem *kgem)
1344 {
1345 uint32_t handle;
1346 bool ret;
1347
1348 if (DBG_NO_CACHE_LEVEL)
1349 return false;
1350
1351 /* Incoherent blt and sampler hangs the GPU */
1352 if (kgem->gen == 040)
1353 return false;
1354
1355 handle = gem_create(kgem->fd, 1);
1356 if (handle == 0)
1357 return false;
1358
1359 ret = gem_set_caching(kgem->fd, handle, UNCACHED);
1360 gem_close(kgem->fd, handle);
1361 return ret;
1362 }
1363
test_has_userptr(struct kgem * kgem)1364 static bool test_has_userptr(struct kgem *kgem)
1365 {
1366 struct local_i915_gem_userptr arg;
1367 void *ptr;
1368
1369 if (DBG_NO_USERPTR)
1370 return false;
1371
1372 /* Incoherent blt and sampler hangs the GPU */
1373 if (kgem->gen == 040)
1374 return false;
1375
1376 if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
1377 return false;
1378
1379 VG_CLEAR(arg);
1380 arg.user_ptr = (uintptr_t)ptr;
1381 arg.user_size = PAGE_SIZE;
1382 arg.flags = I915_USERPTR_UNSYNCHRONIZED;
1383
1384 if (DBG_NO_UNSYNCHRONIZED_USERPTR ||
1385 do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg)) {
1386 arg.flags &= ~I915_USERPTR_UNSYNCHRONIZED;
1387 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_USERPTR, &arg))
1388 arg.handle = 0;
1389 /* Leak the userptr bo to keep the mmu_notifier alive */
1390 } else {
1391 gem_close(kgem->fd, arg.handle);
1392 free(ptr);
1393 }
1394
1395 return arg.handle != 0;
1396 }
1397
test_has_create2(struct kgem * kgem)1398 static bool test_has_create2(struct kgem *kgem)
1399 {
1400 #if defined(USE_CREATE2)
1401 struct local_i915_gem_create2 args;
1402
1403 if (DBG_NO_CREATE2)
1404 return false;
1405
1406 memset(&args, 0, sizeof(args));
1407 args.size = PAGE_SIZE;
1408 args.caching = DISPLAY;
1409 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
1410 gem_close(kgem->fd, args.handle);
1411
1412 return args.handle != 0;
1413 #else
1414 return false;
1415 #endif
1416 }
1417
test_can_blt_y(struct kgem * kgem)1418 static bool test_can_blt_y(struct kgem *kgem)
1419 {
1420 struct drm_i915_gem_exec_object2 object;
1421 uint32_t batch[] = {
1422 #define MI_LOAD_REGISTER_IMM (0x22<<23 | (3-2))
1423 #define BCS_SWCTRL 0x22200
1424 #define BCS_SRC_Y (1 << 0)
1425 #define BCS_DST_Y (1 << 1)
1426 MI_LOAD_REGISTER_IMM,
1427 BCS_SWCTRL,
1428 (BCS_SRC_Y | BCS_DST_Y) << 16 | (BCS_SRC_Y | BCS_DST_Y),
1429
1430 MI_LOAD_REGISTER_IMM,
1431 BCS_SWCTRL,
1432 (BCS_SRC_Y | BCS_DST_Y) << 16,
1433
1434 MI_BATCH_BUFFER_END,
1435 0,
1436 };
1437 int ret;
1438
1439 if (DBG_NO_BLT_Y)
1440 return false;
1441
1442 if (kgem->gen < 060)
1443 return false;
1444
1445 memset(&object, 0, sizeof(object));
1446 object.handle = gem_create(kgem->fd, 1);
1447
1448 ret = gem_write(kgem->fd, object.handle, 0, sizeof(batch), batch);
1449 if (ret == 0) {
1450 struct drm_i915_gem_execbuffer2 execbuf;
1451
1452 memset(&execbuf, 0, sizeof(execbuf));
1453 execbuf.buffers_ptr = (uintptr_t)&object;
1454 execbuf.buffer_count = 1;
1455 execbuf.flags = KGEM_BLT;
1456
1457 ret = do_ioctl(kgem->fd,
1458 DRM_IOCTL_I915_GEM_EXECBUFFER2,
1459 &execbuf);
1460 }
1461 gem_close(kgem->fd, object.handle);
1462
1463 return ret == 0;
1464 }
1465
gem_set_tiling(int fd,uint32_t handle,int tiling,int stride)1466 static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
1467 {
1468 struct drm_i915_gem_set_tiling set_tiling;
1469
1470 if (DBG_NO_TILING)
1471 return false;
1472
1473 VG_CLEAR(set_tiling);
1474 set_tiling.handle = handle;
1475 set_tiling.tiling_mode = tiling;
1476 set_tiling.stride = stride;
1477
1478 if (ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling) == 0)
1479 return set_tiling.tiling_mode == tiling;
1480
1481 return false;
1482 }
1483
test_can_scanout_y(struct kgem * kgem)1484 static bool test_can_scanout_y(struct kgem *kgem)
1485 {
1486 struct drm_mode_fb_cmd arg;
1487 bool ret = false;
1488
1489 if (DBG_NO_SCANOUT_Y)
1490 return false;
1491
1492 VG_CLEAR(arg);
1493 arg.width = 32;
1494 arg.height = 32;
1495 arg.pitch = 4*32;
1496 arg.bpp = 32;
1497 arg.depth = 24;
1498 arg.handle = gem_create(kgem->fd, 1);
1499
1500 if (gem_set_tiling(kgem->fd, arg.handle, I915_TILING_Y, arg.pitch))
1501 ret = do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0;
1502 if (!ret) {
1503 struct local_mode_fb_cmd2 {
1504 uint32_t fb_id;
1505 uint32_t width, height;
1506 uint32_t pixel_format;
1507 uint32_t flags;
1508
1509 uint32_t handles[4];
1510 uint32_t pitches[4];
1511 uint32_t offsets[4];
1512 uint64_t modifiers[4];
1513 } f;
1514 #define LOCAL_IOCTL_MODE_ADDFB2 DRM_IOWR(0xb8, struct local_mode_fb_cmd2)
1515 memset(&f, 0, sizeof(f));
1516 f.width = arg.width;
1517 f.height = arg.height;
1518 f.handles[0] = arg.handle;
1519 f.pitches[0] = arg.pitch;
1520 f.modifiers[0] = (uint64_t)1 << 56 | 2; /* MOD_Y_TILED */
1521 f.pixel_format = 'X' | 'R' << 8 | '2' << 16 | '4' << 24; /* XRGB8888 */
1522 f.flags = 1 << 1; /* + modifier */
1523 if (drmIoctl(kgem->fd, LOCAL_IOCTL_MODE_ADDFB2, &f) == 0) {
1524 ret = true;
1525 arg.fb_id = f.fb_id;
1526 }
1527 }
1528 do_ioctl(kgem->fd, DRM_IOCTL_MODE_RMFB, &arg.fb_id);
1529 gem_close(kgem->fd, arg.handle);
1530
1531 return ret;
1532 }
1533
test_has_dirtyfb(struct kgem * kgem)1534 static bool test_has_dirtyfb(struct kgem *kgem)
1535 {
1536 struct drm_mode_fb_cmd create;
1537 bool ret = false;
1538
1539 if (DBG_NO_DIRTYFB)
1540 return false;
1541
1542 VG_CLEAR(create);
1543 create.width = 32;
1544 create.height = 32;
1545 create.pitch = 4*32;
1546 create.bpp = 32;
1547 create.depth = 24; /* {bpp:32, depth:24} -> x8r8g8b8 */
1548 create.handle = gem_create(kgem->fd, 1);
1549 if (create.handle == 0)
1550 return false;
1551
1552 if (drmIoctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &create) == 0) {
1553 struct drm_mode_fb_dirty_cmd dirty;
1554
1555 memset(&dirty, 0, sizeof(dirty));
1556 dirty.fb_id = create.fb_id;
1557 ret = drmIoctl(kgem->fd,
1558 DRM_IOCTL_MODE_DIRTYFB,
1559 &dirty) == 0;
1560
1561 /* XXX There may be multiple levels of DIRTYFB, depending on
1562 * whether the kernel thinks tracking dirty regions is
1563 * beneficial vs flagging the whole fb as dirty.
1564 */
1565
1566 drmIoctl(kgem->fd,
1567 DRM_IOCTL_MODE_RMFB,
1568 &create.fb_id);
1569 }
1570 gem_close(kgem->fd, create.handle);
1571
1572 return ret;
1573 }
1574
test_has_secure_batches(struct kgem * kgem)1575 static bool test_has_secure_batches(struct kgem *kgem)
1576 {
1577 if (DBG_NO_SECURE_BATCHES)
1578 return false;
1579
1580 return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
1581 }
1582
test_has_pinned_batches(struct kgem * kgem)1583 static bool test_has_pinned_batches(struct kgem *kgem)
1584 {
1585 if (DBG_NO_PINNED_BATCHES)
1586 return false;
1587
1588 return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
1589 }
1590
kgem_init_pinned_batches(struct kgem * kgem)1591 static bool kgem_init_pinned_batches(struct kgem *kgem)
1592 {
1593 int count[2] = { 16, 4 };
1594 int size[2] = { 1, 4 };
1595 int ret = 0;
1596 int n, i;
1597
1598 if (unlikely(kgem->wedged))
1599 return true;
1600
1601 for (n = 0; n < ARRAY_SIZE(count); n++) {
1602 for (i = 0; i < count[n]; i++) {
1603 struct drm_i915_gem_pin pin;
1604 struct kgem_bo *bo;
1605
1606 VG_CLEAR(pin);
1607
1608 pin.handle = gem_create(kgem->fd, size[n]);
1609 if (pin.handle == 0)
1610 goto err;
1611
1612 DBG(("%s: new handle=%d, num_pages=%d\n",
1613 __FUNCTION__, pin.handle, size[n]));
1614
1615 bo = __kgem_bo_alloc(pin.handle, size[n]);
1616 if (bo == NULL) {
1617 gem_close(kgem->fd, pin.handle);
1618 goto err;
1619 }
1620
1621 pin.alignment = 0;
1622 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin);
1623 if (ret) {
1624 gem_close(kgem->fd, pin.handle);
1625 free(bo);
1626 goto err;
1627 }
1628 bo->presumed_offset = pin.offset;
1629 debug_alloc__bo(kgem, bo);
1630 list_add(&bo->list, &kgem->pinned_batches[n]);
1631 }
1632 }
1633
1634 return true;
1635
1636 err:
1637 for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
1638 while (!list_is_empty(&kgem->pinned_batches[n])) {
1639 kgem_bo_destroy(kgem,
1640 list_first_entry(&kgem->pinned_batches[n],
1641 struct kgem_bo, list));
1642 }
1643 }
1644
1645 /* If we fail to pin some memory for 830gm/845g, we need to disable
1646 * acceleration as otherwise the machine will eventually fail. However,
1647 * the kernel started arbitrarily rejecting PIN, so hope for the best
1648 * if the ioctl no longer works.
1649 */
1650 if (ret != -ENODEV && kgem->gen == 020)
1651 return false;
1652
1653 kgem->has_pinned_batches = false;
1654
1655 /* For simplicity populate the lists with a single unpinned bo */
1656 for (n = 0; n < ARRAY_SIZE(count); n++) {
1657 struct kgem_bo *bo;
1658 uint32_t handle;
1659
1660 handle = gem_create(kgem->fd, size[n]);
1661 if (handle == 0)
1662 return false;
1663
1664 bo = __kgem_bo_alloc(handle, size[n]);
1665 if (bo == NULL) {
1666 gem_close(kgem->fd, handle);
1667 return false;
1668 }
1669
1670 debug_alloc__bo(kgem, bo);
1671 list_add(&bo->list, &kgem->pinned_batches[n]);
1672 }
1673 return true;
1674 }
1675
kgem_init_swizzling(struct kgem * kgem)1676 static void kgem_init_swizzling(struct kgem *kgem)
1677 {
1678 struct local_i915_gem_get_tiling_v2 {
1679 uint32_t handle;
1680 uint32_t tiling_mode;
1681 uint32_t swizzle_mode;
1682 uint32_t phys_swizzle_mode;
1683 } tiling;
1684 #define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2)
1685
1686 memset(&tiling, 0, sizeof(tiling));
1687 tiling.handle = gem_create(kgem->fd, 1);
1688 if (!tiling.handle)
1689 return;
1690
1691 if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
1692 goto out;
1693
1694 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling))
1695 goto out;
1696
1697 DBG(("%s: swizzle_mode=%d, phys_swizzle_mode=%d\n",
1698 __FUNCTION__, tiling.swizzle_mode, tiling.phys_swizzle_mode));
1699
1700 kgem->can_fence =
1701 !DBG_NO_TILING &&
1702 tiling.swizzle_mode != I915_BIT_6_SWIZZLE_UNKNOWN;
1703
1704 if (kgem->gen < 050 && tiling.phys_swizzle_mode != tiling.swizzle_mode)
1705 goto out;
1706
1707 if (!DBG_NO_DETILING)
1708 choose_memcpy_tiled_x(kgem,
1709 tiling.swizzle_mode,
1710 __to_sna(kgem)->cpu_features);
1711 out:
1712 gem_close(kgem->fd, tiling.handle);
1713 DBG(("%s: can fence?=%d\n", __FUNCTION__, kgem->can_fence));
1714 }
1715
kgem_fixup_relocs(struct kgem * kgem,struct kgem_bo * bo,int shrink)1716 static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink)
1717 {
1718 int n;
1719
1720 bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
1721
1722 assert(kgem->nreloc__self <= 256);
1723 if (kgem->nreloc__self == 0)
1724 return;
1725
1726 DBG(("%s: fixing up %d%s self-relocations to handle=%p, presumed-offset=%llx\n",
1727 __FUNCTION__, kgem->nreloc__self,
1728 kgem->nreloc__self == 256 ? "+" : "",
1729 bo->handle, (long long)bo->presumed_offset));
1730 for (n = 0; n < kgem->nreloc__self; n++) {
1731 int i = kgem->reloc__self[n];
1732 uint64_t addr;
1733
1734 assert(kgem->reloc[i].target_handle == ~0U);
1735 kgem->reloc[i].target_handle = bo->target_handle;
1736 kgem->reloc[i].presumed_offset = bo->presumed_offset;
1737
1738 if (kgem->reloc[i].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1739 DBG(("%s: moving base of self-reloc[%d:%d] %d -> %d\n",
1740 __FUNCTION__, n, i,
1741 kgem->reloc[i].delta,
1742 kgem->reloc[i].delta - shrink));
1743
1744 kgem->reloc[i].delta -= shrink;
1745 }
1746 addr = (int)kgem->reloc[i].delta + bo->presumed_offset;
1747 kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] = addr;
1748 if (kgem->gen >= 0100)
1749 kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t) + 1] = addr >> 32;
1750 }
1751
1752 if (n == 256) {
1753 for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
1754 if (kgem->reloc[n].target_handle == ~0U) {
1755 uint64_t addr;
1756
1757 kgem->reloc[n].target_handle = bo->target_handle;
1758 kgem->reloc[n].presumed_offset = bo->presumed_offset;
1759
1760 if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
1761 DBG(("%s: moving base of reloc[%d] %d -> %d\n",
1762 __FUNCTION__, n,
1763 kgem->reloc[n].delta,
1764 kgem->reloc[n].delta - shrink));
1765 kgem->reloc[n].delta -= shrink;
1766 }
1767
1768 addr = (int)kgem->reloc[n].delta + bo->presumed_offset;
1769 kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] = addr;
1770 if (kgem->gen >= 0100)
1771 kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t) + 1] = addr >> 32;
1772 }
1773 }
1774 }
1775
1776 if (shrink) {
1777 DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink));
1778 for (n = 0; n < kgem->nreloc; n++) {
1779 if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
1780 kgem->reloc[n].offset -= shrink;
1781 }
1782 }
1783 }
1784
kgem_bo_wait(struct kgem * kgem,struct kgem_bo * bo)1785 static int kgem_bo_wait(struct kgem *kgem, struct kgem_bo *bo)
1786 {
1787 struct local_i915_gem_wait {
1788 uint32_t handle;
1789 uint32_t flags;
1790 int64_t timeout;
1791 } wait;
1792 #define LOCAL_I915_GEM_WAIT 0x2c
1793 #define LOCAL_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + LOCAL_I915_GEM_WAIT, struct local_i915_gem_wait)
1794 int ret;
1795
1796 DBG(("%s: waiting for handle=%d\n", __FUNCTION__, bo->handle));
1797 if (bo->rq == NULL)
1798 return 0;
1799
1800 VG_CLEAR(wait);
1801 wait.handle = bo->handle;
1802 wait.flags = 0;
1803 wait.timeout = -1;
1804 ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_WAIT, &wait);
1805 if (ret) {
1806 struct drm_i915_gem_set_domain set_domain;
1807
1808 VG_CLEAR(set_domain);
1809 set_domain.handle = bo->handle;
1810 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1811 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1812 ret = do_ioctl(kgem->fd,
1813 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1814 &set_domain);
1815 }
1816
1817 if (ret == 0)
1818 __kgem_retire_requests_upto(kgem, bo);
1819
1820 return ret;
1821 }
1822
kgem_new_batch(struct kgem * kgem)1823 static struct kgem_bo *kgem_new_batch(struct kgem *kgem)
1824 {
1825 struct kgem_bo *last;
1826 unsigned flags;
1827
1828 last = kgem->batch_bo;
1829 if (last) {
1830 kgem_fixup_relocs(kgem, last, 0);
1831 kgem->batch = NULL;
1832 }
1833
1834 if (kgem->batch) {
1835 assert(last == NULL);
1836 return NULL;
1837 }
1838
1839 flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE;
1840 if (!kgem->has_llc)
1841 flags |= CREATE_UNCACHED;
1842
1843 restart:
1844 kgem->batch_bo = kgem_create_linear(kgem,
1845 sizeof(uint32_t)*kgem->batch_size,
1846 flags);
1847 if (kgem->batch_bo)
1848 kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo);
1849 if (kgem->batch == NULL) {
1850 int ring = kgem->ring == KGEM_BLT;
1851 assert(ring < ARRAY_SIZE(kgem->requests));
1852
1853 if (kgem->batch_bo) {
1854 kgem_bo_destroy(kgem, kgem->batch_bo);
1855 kgem->batch_bo = NULL;
1856 }
1857
1858 if (!list_is_empty(&kgem->requests[ring])) {
1859 struct kgem_request *rq;
1860
1861 rq = list_first_entry(&kgem->requests[ring],
1862 struct kgem_request, list);
1863 assert(rq->ring == ring);
1864 assert(rq->bo);
1865 assert(RQ(rq->bo->rq) == rq);
1866 if (kgem_bo_wait(kgem, rq->bo) == 0)
1867 goto restart;
1868 }
1869
1870 if (flags & CREATE_NO_THROTTLE) {
1871 flags &= ~CREATE_NO_THROTTLE;
1872 if (kgem_cleanup_cache(kgem))
1873 goto restart;
1874 }
1875
1876 DBG(("%s: unable to map batch bo, mallocing(size=%d)\n",
1877 __FUNCTION__, sizeof(uint32_t)*kgem->batch_size));
1878 if (posix_memalign((void **)&kgem->batch, PAGE_SIZE,
1879 ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) {
1880 ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__));
1881 __kgem_set_wedged(kgem);
1882 }
1883 } else {
1884 DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n",
1885 __FUNCTION__, kgem->batch_bo->handle,
1886 sizeof(uint32_t)*kgem->batch_size));
1887 kgem_bo_sync__cpu(kgem, kgem->batch_bo);
1888 }
1889
1890 DBG(("%s: using last batch handle=%d\n",
1891 __FUNCTION__, last ? last->handle : 0));
1892 return last;
1893 }
1894
1895 static void
no_retire(struct kgem * kgem)1896 no_retire(struct kgem *kgem)
1897 {
1898 (void)kgem;
1899 }
1900
1901 static void
no_expire(struct kgem * kgem)1902 no_expire(struct kgem *kgem)
1903 {
1904 (void)kgem;
1905 }
1906
1907 static void
no_context_switch(struct kgem * kgem,int new_mode)1908 no_context_switch(struct kgem *kgem, int new_mode)
1909 {
1910 (void)kgem;
1911 (void)new_mode;
1912 }
1913
get_gtt_size(int fd)1914 static uint64_t get_gtt_size(int fd)
1915 {
1916 struct drm_i915_gem_get_aperture aperture;
1917 struct local_i915_gem_context_param {
1918 uint32_t context;
1919 uint32_t size;
1920 uint64_t param;
1921 #define LOCAL_CONTEXT_PARAM_BAN_PERIOD 0x1
1922 #define LOCAL_CONTEXT_PARAM_NO_ZEROMAP 0x2
1923 #define LOCAL_CONTEXT_PARAM_GTT_SIZE 0x3
1924 uint64_t value;
1925 } p;
1926 #define LOCAL_I915_GEM_CONTEXT_GETPARAM 0x34
1927 #define LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_CONTEXT_GETPARAM, struct local_i915_gem_context_param)
1928
1929 memset(&aperture, 0, sizeof(aperture));
1930
1931 memset(&p, 0, sizeof(p));
1932 p.param = LOCAL_CONTEXT_PARAM_GTT_SIZE;
1933 if (drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p) == 0)
1934 aperture.aper_size = p.value;
1935 if (aperture.aper_size == 0)
1936 (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1937 if (aperture.aper_size == 0)
1938 aperture.aper_size = 64*1024*1024;
1939
1940 DBG(("%s: aperture size %lld, available now %lld\n",
1941 __FUNCTION__,
1942 (long long)aperture.aper_size,
1943 (long long)aperture.aper_available_size));
1944
1945 /* clamp aperture to uint32_t for simplicity */
1946 if (aperture.aper_size > 0xc0000000)
1947 aperture.aper_size = 0xc0000000;
1948
1949 return aperture.aper_size;
1950 }
1951
get_gtt_type(int fd)1952 static int get_gtt_type(int fd)
1953 {
1954 struct drm_i915_getparam p;
1955 int val = 0;
1956
1957 memset(&p, 0, sizeof(p));
1958 p.param = I915_PARAM_HAS_ALIASING_PPGTT;
1959 p.value = &val;
1960
1961 drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &p);
1962 return val;
1963 }
1964
kgem_init(struct kgem * kgem,int fd,struct pci_device * dev,unsigned gen)1965 void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
1966 {
1967 size_t totalram;
1968 unsigned half_gpu_max;
1969 unsigned int i, j;
1970 uint64_t gtt_size;
1971
1972 DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
1973
1974 kgem->fd = fd;
1975 kgem->gen = gen;
1976
1977 kgem->retire = no_retire;
1978 kgem->expire = no_expire;
1979 kgem->context_switch = no_context_switch;
1980
1981 list_init(&kgem->requests[0]);
1982 list_init(&kgem->requests[1]);
1983 list_init(&kgem->batch_buffers);
1984 list_init(&kgem->active_buffers);
1985 list_init(&kgem->flushing);
1986 list_init(&kgem->large);
1987 list_init(&kgem->large_inactive);
1988 list_init(&kgem->snoop);
1989 list_init(&kgem->scanout);
1990 for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
1991 list_init(&kgem->pinned_batches[i]);
1992 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
1993 list_init(&kgem->inactive[i]);
1994 for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
1995 for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
1996 list_init(&kgem->active[i][j]);
1997 }
1998 for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
1999 for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
2000 list_init(&kgem->vma[i].inactive[j]);
2001 }
2002 kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
2003 kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
2004
2005 kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
2006 DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
2007 kgem->has_blt));
2008
2009 kgem->has_relaxed_delta =
2010 gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
2011 DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
2012 kgem->has_relaxed_delta));
2013
2014 kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
2015 DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
2016 kgem->has_relaxed_fencing));
2017
2018 kgem->has_coherent_mmap_gtt = test_has_coherent_mmap_gtt(kgem);
2019 DBG(("%s: has coherent writes into GTT maps? %d\n", __FUNCTION__,
2020 kgem->has_coherent_mmap_gtt));
2021
2022 kgem->has_llc = test_has_llc(kgem);
2023 DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
2024 kgem->has_llc));
2025
2026 kgem->has_wt = test_has_wt(kgem);
2027 DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
2028 kgem->has_wt));
2029
2030 kgem->has_wc_mmap = test_has_wc_mmap(kgem);
2031 DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
2032 kgem->has_wc_mmap));
2033
2034 kgem->has_caching = test_has_caching(kgem);
2035 DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
2036 kgem->has_caching));
2037
2038 kgem->has_userptr = test_has_userptr(kgem);
2039 DBG(("%s: has userptr? %d\n", __FUNCTION__,
2040 kgem->has_userptr));
2041
2042 kgem->has_create2 = test_has_create2(kgem);
2043 DBG(("%s: has create2? %d\n", __FUNCTION__,
2044 kgem->has_create2));
2045
2046 kgem->has_no_reloc = test_has_no_reloc(kgem);
2047 DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
2048 kgem->has_no_reloc));
2049
2050 kgem->has_handle_lut = test_has_handle_lut(kgem);
2051 DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
2052 kgem->has_handle_lut));
2053
2054 kgem->has_semaphores = false;
2055 if (kgem->has_blt && test_has_semaphores_enabled(kgem))
2056 kgem->has_semaphores = true;
2057 DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
2058 kgem->has_semaphores));
2059
2060 kgem->can_blt_cpu = gen >= 030;
2061 DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
2062 kgem->can_blt_cpu));
2063
2064 kgem->can_blt_y = test_can_blt_y(kgem);
2065 DBG(("%s: can blit to Y-tiled surfaces? %d\n", __FUNCTION__,
2066 kgem->can_blt_y));
2067
2068 kgem->can_render_y = gen != 021 && (gen >> 3) != 4;
2069 DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__,
2070 kgem->can_render_y));
2071
2072 kgem->can_scanout_y = test_can_scanout_y(kgem);
2073 DBG(("%s: can scanout Y-tiled surfaces? %d\n", __FUNCTION__,
2074 kgem->can_scanout_y));
2075
2076 kgem->has_dirtyfb = test_has_dirtyfb(kgem);
2077 DBG(("%s: has dirty fb? %d\n", __FUNCTION__, kgem->has_dirtyfb));
2078
2079 kgem->has_secure_batches = test_has_secure_batches(kgem);
2080 DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
2081 kgem->has_secure_batches));
2082
2083 kgem->has_pinned_batches = test_has_pinned_batches(kgem);
2084 DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
2085 kgem->has_pinned_batches));
2086
2087 if (!is_hw_supported(kgem, dev)) {
2088 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2089 "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
2090 __kgem_set_wedged(kgem);
2091 } else if (__kgem_throttle(kgem, false)) {
2092 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2093 "Detected a hung GPU, disabling acceleration.\n");
2094 __kgem_set_wedged(kgem);
2095 }
2096
2097 kgem->batch_size = UINT16_MAX & ~7;
2098 if (gen == 020 && !kgem->has_pinned_batches)
2099 /* Limited to what we can pin */
2100 kgem->batch_size = 4*1024;
2101 if (gen == 022)
2102 /* 865g cannot handle a batch spanning multiple pages */
2103 kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
2104 if (gen >= 070)
2105 kgem->batch_size = 16*1024;
2106 if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
2107 kgem->batch_size = 4*1024;
2108
2109 if (!kgem_init_pinned_batches(kgem)) {
2110 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
2111 "Unable to reserve memory for GPU, disabling acceleration.\n");
2112 __kgem_set_wedged(kgem);
2113 }
2114
2115 DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
2116 kgem->batch_size));
2117 kgem_new_batch(kgem);
2118
2119 kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
2120 DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
2121 __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
2122
2123 kgem->next_request = __kgem_request_alloc(kgem);
2124
2125 DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
2126 !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
2127 kgem->has_llc, kgem->has_caching, kgem->has_userptr));
2128
2129 kgem->has_full_ppgtt = get_gtt_type(fd) > 1;
2130
2131 gtt_size = get_gtt_size(fd);
2132 kgem->aperture_total = gtt_size;
2133 kgem->aperture_high = gtt_size * 3/4;
2134 kgem->aperture_low = gtt_size * 1/3;
2135 if (gen < 033) {
2136 /* Severe alignment penalties */
2137 kgem->aperture_high /= 2;
2138 kgem->aperture_low /= 2;
2139 }
2140 DBG(("%s: aperture low=%u [%u], high=%u [%u]\n", __FUNCTION__,
2141 kgem->aperture_low, kgem->aperture_low / (1024*1024),
2142 kgem->aperture_high, kgem->aperture_high / (1024*1024)));
2143
2144 kgem->aperture_mappable = 256 * 1024 * 1024;
2145 if (dev != NULL)
2146 kgem->aperture_mappable = agp_aperture_size(dev, gen);
2147 if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > gtt_size)
2148 kgem->aperture_mappable = gtt_size;
2149 DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
2150 kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
2151
2152 kgem->aperture_fenceable = MIN(256*1024*1024, kgem->aperture_mappable);
2153 DBG(("%s: aperture fenceable=%d [%d MiB]\n", __FUNCTION__,
2154 kgem->aperture_fenceable, kgem->aperture_fenceable / (1024*1024)));
2155
2156 kgem->buffer_size = 64 * 1024;
2157 while (kgem->buffer_size < kgem->aperture_mappable >> 10)
2158 kgem->buffer_size *= 2;
2159 if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
2160 kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
2161 kgem->buffer_size = 1 << __fls(kgem->buffer_size);
2162 DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
2163 kgem->buffer_size, kgem->buffer_size / 1024));
2164 assert(kgem->buffer_size);
2165
2166 kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
2167 kgem->max_gpu_size = kgem->max_object_size;
2168 if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
2169 kgem->max_gpu_size = MAX_CACHE_SIZE;
2170
2171 totalram = total_ram_size();
2172 if (totalram == 0) {
2173 DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
2174 __FUNCTION__));
2175 totalram = kgem->aperture_total;
2176 }
2177 DBG(("%s: total ram=%lld\n", __FUNCTION__, (long long)totalram));
2178 if (kgem->max_object_size > totalram / 2)
2179 kgem->max_object_size = totalram / 2;
2180 if (kgem->max_gpu_size > totalram / 4)
2181 kgem->max_gpu_size = totalram / 4;
2182
2183 if (kgem->aperture_high > totalram / 2) {
2184 kgem->aperture_high = totalram / 2;
2185 kgem->aperture_low = kgem->aperture_high / 4;
2186 DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__,
2187 kgem->aperture_low, kgem->aperture_low / (1024*1024),
2188 kgem->aperture_high, kgem->aperture_high / (1024*1024)));
2189 }
2190
2191 kgem->max_cpu_size = kgem->max_object_size;
2192
2193 half_gpu_max = kgem->max_gpu_size / 2;
2194 kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
2195 if (kgem->max_copy_tile_size > half_gpu_max)
2196 kgem->max_copy_tile_size = half_gpu_max;
2197
2198 if (kgem->has_llc)
2199 kgem->max_upload_tile_size = kgem->max_copy_tile_size;
2200 else
2201 kgem->max_upload_tile_size = kgem->aperture_fenceable / 4;
2202 if (kgem->max_upload_tile_size > half_gpu_max)
2203 kgem->max_upload_tile_size = half_gpu_max;
2204 if (kgem->max_upload_tile_size > kgem->aperture_high/2)
2205 kgem->max_upload_tile_size = kgem->aperture_high/2;
2206 if (kgem->max_upload_tile_size > kgem->aperture_low)
2207 kgem->max_upload_tile_size = kgem->aperture_low;
2208 if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
2209 kgem->max_upload_tile_size = 16*PAGE_SIZE;
2210
2211 kgem->large_object_size = MAX_CACHE_SIZE;
2212 if (kgem->large_object_size > half_gpu_max)
2213 kgem->large_object_size = half_gpu_max;
2214 if (kgem->max_copy_tile_size > kgem->aperture_high/2)
2215 kgem->max_copy_tile_size = kgem->aperture_high/2;
2216 if (kgem->max_copy_tile_size > kgem->aperture_low)
2217 kgem->max_copy_tile_size = kgem->aperture_low;
2218 if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
2219 kgem->max_copy_tile_size = 16*PAGE_SIZE;
2220
2221 if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
2222 if (kgem->large_object_size > kgem->max_cpu_size)
2223 kgem->large_object_size = kgem->max_cpu_size;
2224 } else
2225 kgem->max_cpu_size = 0;
2226 if (DBG_NO_CPU)
2227 kgem->max_cpu_size = 0;
2228
2229 DBG(("%s: maximum object size=%u\n",
2230 __FUNCTION__, kgem->max_object_size));
2231 DBG(("%s: large object thresold=%u\n",
2232 __FUNCTION__, kgem->large_object_size));
2233 DBG(("%s: max object sizes (gpu=%u, cpu=%u, tile upload=%u, copy=%u)\n",
2234 __FUNCTION__,
2235 kgem->max_gpu_size, kgem->max_cpu_size,
2236 kgem->max_upload_tile_size, kgem->max_copy_tile_size));
2237
2238 /* Convert the aperture thresholds to pages */
2239 kgem->aperture_mappable /= PAGE_SIZE;
2240 kgem->aperture_fenceable /= PAGE_SIZE;
2241 kgem->aperture_low /= PAGE_SIZE;
2242 kgem->aperture_high /= PAGE_SIZE;
2243 kgem->aperture_total /= PAGE_SIZE;
2244
2245 kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
2246 if ((int)kgem->fence_max < 0)
2247 kgem->fence_max = 5; /* minimum safe value for all hw */
2248 DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
2249
2250 kgem->batch_flags_base = 0;
2251 if (kgem->has_no_reloc)
2252 kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
2253 if (kgem->has_handle_lut)
2254 kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
2255 if (kgem->has_pinned_batches)
2256 kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
2257
2258 kgem_init_swizzling(kgem);
2259 }
2260
2261 /* XXX hopefully a good approximation */
kgem_get_unique_id(struct kgem * kgem)2262 static uint32_t kgem_get_unique_id(struct kgem *kgem)
2263 {
2264 uint32_t id;
2265 id = ++kgem->unique_id;
2266 if (id == 0)
2267 id = ++kgem->unique_id;
2268 return id;
2269 }
2270
kgem_pitch_alignment(struct kgem * kgem,unsigned flags)2271 inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
2272 {
2273 if (flags & CREATE_PRIME)
2274 return 256;
2275 if (flags & CREATE_SCANOUT)
2276 return 64;
2277 if (kgem->gen >= 0100)
2278 return 32;
2279 return 8;
2280 }
2281
kgem_get_tile_size(struct kgem * kgem,int tiling,int pitch,int * tile_width,int * tile_height,int * tile_size)2282 void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
2283 int *tile_width, int *tile_height, int *tile_size)
2284 {
2285 if (kgem->gen <= 030) {
2286 if (tiling) {
2287 if (kgem->gen < 030) {
2288 *tile_width = 128;
2289 *tile_height = 16;
2290 *tile_size = 2048;
2291 } else {
2292 *tile_width = 512;
2293 *tile_height = 8;
2294 *tile_size = 4096;
2295 }
2296 } else {
2297 *tile_width = 1;
2298 *tile_height = 1;
2299 *tile_size = 1;
2300 }
2301 } else switch (tiling) {
2302 default:
2303 case I915_TILING_NONE:
2304 *tile_width = 1;
2305 *tile_height = 1;
2306 *tile_size = 1;
2307 break;
2308 case I915_TILING_X:
2309 *tile_width = 512;
2310 *tile_height = 8;
2311 *tile_size = 4096;
2312 break;
2313 case I915_TILING_Y:
2314 *tile_width = 128;
2315 *tile_height = 32;
2316 *tile_size = 4096;
2317 break;
2318 }
2319
2320 /* Force offset alignment to tile-row */
2321 if (tiling && kgem->gen < 033)
2322 *tile_width = pitch;
2323 }
2324
kgem_surface_size(struct kgem * kgem,bool relaxed_fencing,unsigned flags,uint32_t width,uint32_t height,uint32_t bpp,uint32_t tiling,uint32_t * pitch)2325 static uint32_t kgem_surface_size(struct kgem *kgem,
2326 bool relaxed_fencing,
2327 unsigned flags,
2328 uint32_t width,
2329 uint32_t height,
2330 uint32_t bpp,
2331 uint32_t tiling,
2332 uint32_t *pitch)
2333 {
2334 uint32_t tile_width, tile_height;
2335 uint32_t size;
2336
2337 assert(width <= MAXSHORT);
2338 assert(height <= MAXSHORT);
2339 assert(bpp >= 8);
2340
2341 if (kgem->gen <= 030) {
2342 if (tiling) {
2343 if (kgem->gen < 030) {
2344 tile_width = 128;
2345 tile_height = 16;
2346 } else {
2347 tile_width = 512;
2348 tile_height = 8;
2349 }
2350 } else {
2351 tile_width = 2 * bpp >> 3;
2352 tile_width = ALIGN(tile_width,
2353 kgem_pitch_alignment(kgem, flags));
2354 tile_height = 1;
2355 }
2356 } else switch (tiling) {
2357 default:
2358 case I915_TILING_NONE:
2359 tile_width = 2 * bpp >> 3;
2360 tile_width = ALIGN(tile_width,
2361 kgem_pitch_alignment(kgem, flags));
2362 tile_height = 1;
2363 break;
2364
2365 case I915_TILING_X:
2366 tile_width = 512;
2367 tile_height = 8;
2368 break;
2369 case I915_TILING_Y:
2370 tile_width = 128;
2371 tile_height = 32;
2372 break;
2373 }
2374 /* XXX align to an even tile row */
2375 if (!kgem->has_relaxed_fencing)
2376 tile_height *= 2;
2377
2378 *pitch = ALIGN(width * bpp / 8, tile_width);
2379 height = ALIGN(height, tile_height);
2380 DBG(("%s: tile_width=%d, tile_height=%d => aligned pitch=%d, height=%d\n",
2381 __FUNCTION__, tile_width, tile_height, *pitch, height));
2382
2383 if (kgem->gen >= 040)
2384 return PAGE_ALIGN(*pitch * height);
2385
2386 /* If it is too wide for the blitter, don't even bother. */
2387 if (tiling != I915_TILING_NONE) {
2388 if (*pitch > 8192) {
2389 DBG(("%s: too wide for tiled surface (pitch=%d, limit=%d)\n",
2390 __FUNCTION__, *pitch, 8192));
2391 return 0;
2392 }
2393
2394 for (size = tile_width; size < *pitch; size <<= 1)
2395 ;
2396 *pitch = size;
2397 } else {
2398 if (*pitch >= 32768) {
2399 DBG(("%s: too wide for linear surface (pitch=%d, limit=%d)\n",
2400 __FUNCTION__, *pitch, 32767));
2401 return 0;
2402 }
2403 }
2404
2405 size = *pitch * height;
2406 if (relaxed_fencing || tiling == I915_TILING_NONE)
2407 return PAGE_ALIGN(size);
2408
2409 /* We need to allocate a pot fence region for a tiled buffer. */
2410 if (kgem->gen < 030)
2411 tile_width = 512 * 1024;
2412 else
2413 tile_width = 1024 * 1024;
2414 while (tile_width < size)
2415 tile_width *= 2;
2416 return tile_width;
2417 }
2418
kgem_check_surface_size(struct kgem * kgem,uint32_t width,uint32_t height,uint32_t bpp,uint32_t tiling,uint32_t pitch,uint32_t size)2419 bool kgem_check_surface_size(struct kgem *kgem,
2420 uint32_t width,
2421 uint32_t height,
2422 uint32_t bpp,
2423 uint32_t tiling,
2424 uint32_t pitch,
2425 uint32_t size)
2426 {
2427 uint32_t min_size, min_pitch;
2428 int tile_width, tile_height, tile_size;
2429
2430 DBG(("%s(width=%d, height=%d, bpp=%d, tiling=%d, pitch=%d, size=%d)\n",
2431 __FUNCTION__, width, height, bpp, tiling, pitch, size));
2432
2433 if (pitch & 3)
2434 return false;
2435
2436 min_size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, 0,
2437 width, height, bpp, tiling,
2438 &min_pitch);
2439
2440 DBG(("%s: min_pitch=%d, min_size=%d\n", __FUNCTION__, min_pitch, min_size));
2441
2442 if (size < min_size)
2443 return false;
2444
2445 if (pitch < min_pitch)
2446 return false;
2447
2448 kgem_get_tile_size(kgem, tiling, min_pitch,
2449 &tile_width, &tile_height, &tile_size);
2450
2451 DBG(("%s: tile_width=%d, tile_size=%d\n", __FUNCTION__, tile_width, tile_size));
2452 if (pitch & (tile_width - 1))
2453 return false;
2454 if (size & (tile_size - 1))
2455 return false;
2456
2457 return true;
2458 }
2459
kgem_aligned_height(struct kgem * kgem,uint32_t height,uint32_t tiling)2460 static uint32_t kgem_aligned_height(struct kgem *kgem,
2461 uint32_t height, uint32_t tiling)
2462 {
2463 uint32_t tile_height;
2464
2465 if (kgem->gen <= 030) {
2466 tile_height = tiling ? kgem->gen < 030 ? 16 : 8 : 1;
2467 } else switch (tiling) {
2468 /* XXX align to an even tile row */
2469 default:
2470 case I915_TILING_NONE:
2471 tile_height = 1;
2472 break;
2473 case I915_TILING_X:
2474 tile_height = 8;
2475 break;
2476 case I915_TILING_Y:
2477 tile_height = 32;
2478 break;
2479 }
2480
2481 /* XXX align to an even tile row */
2482 if (!kgem->has_relaxed_fencing)
2483 tile_height *= 2;
2484
2485 return ALIGN(height, tile_height);
2486 }
2487
2488 static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem * kgem,struct kgem_bo * bo)2489 kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
2490 {
2491 struct drm_i915_gem_exec_object2 *exec;
2492
2493 DBG(("%s: handle=%d, index=%d\n",
2494 __FUNCTION__, bo->handle, kgem->nexec));
2495
2496 assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
2497 bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
2498 exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
2499 exec->handle = bo->handle;
2500 exec->offset = bo->presumed_offset;
2501
2502 kgem->aperture += num_pages(bo);
2503
2504 return exec;
2505 }
2506
kgem_add_bo(struct kgem * kgem,struct kgem_bo * bo)2507 static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
2508 {
2509 assert(bo->refcnt);
2510 assert(bo->proxy == NULL);
2511
2512 bo->exec = kgem_add_handle(kgem, bo);
2513 bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
2514
2515 list_move_tail(&bo->request, &kgem->next_request->buffers);
2516 if (bo->io && !list_is_empty(&bo->list))
2517 list_move(&bo->list, &kgem->batch_buffers);
2518
2519 /* XXX is it worth working around gcc here? */
2520 kgem->flush |= bo->flush;
2521 }
2522
kgem_clear_swctrl(struct kgem * kgem)2523 static void kgem_clear_swctrl(struct kgem *kgem)
2524 {
2525 uint32_t *b;
2526
2527 if (kgem->bcs_state == 0)
2528 return;
2529
2530 DBG(("%s: clearin SWCTRL LRI from %x\n",
2531 __FUNCTION__, kgem->bcs_state));
2532
2533 b = kgem->batch + kgem->nbatch;
2534 kgem->nbatch += 7;
2535
2536 *b++ = MI_FLUSH_DW;
2537 *b++ = 0;
2538 *b++ = 0;
2539 *b++ = 0;
2540
2541 *b++ = MI_LOAD_REGISTER_IMM;
2542 *b++ = BCS_SWCTRL;
2543 *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16;
2544
2545 kgem->bcs_state = 0;
2546 }
2547
kgem_end_batch(struct kgem * kgem)2548 static uint32_t kgem_end_batch(struct kgem *kgem)
2549 {
2550 kgem_clear_swctrl(kgem);
2551 kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
2552 if (kgem->nbatch & 1)
2553 kgem->batch[kgem->nbatch++] = MI_NOOP;
2554
2555 return kgem->nbatch;
2556 }
2557
kgem_bo_binding_free(struct kgem * kgem,struct kgem_bo * bo)2558 static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
2559 {
2560 struct kgem_bo_binding *b;
2561
2562 b = bo->binding.next;
2563 while (b) {
2564 struct kgem_bo_binding *next = b->next;
2565 free(b);
2566 b = next;
2567 }
2568 }
2569
kgem_bo_free(struct kgem * kgem,struct kgem_bo * bo)2570 static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
2571 {
2572 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2573 assert(bo->refcnt == 0);
2574 assert(bo->proxy == NULL);
2575 assert(bo->exec == NULL);
2576 assert(!bo->snoop || bo->rq == NULL);
2577
2578 #ifdef DEBUG_MEMORY
2579 kgem->debug_memory.bo_allocs--;
2580 kgem->debug_memory.bo_bytes -= bytes(bo);
2581 #endif
2582
2583 kgem_bo_binding_free(kgem, bo);
2584 kgem_bo_rmfb(kgem, bo);
2585
2586 if (IS_USER_MAP(bo->map__cpu)) {
2587 assert(bo->rq == NULL);
2588 assert(!__kgem_busy(kgem, bo->handle));
2589 assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush);
2590 if (!(bo->io || bo->flush)) {
2591 DBG(("%s: freeing snooped base\n", __FUNCTION__));
2592 assert(bo != MAP(bo->map__cpu));
2593 free(MAP(bo->map__cpu));
2594 }
2595 bo->map__cpu = NULL;
2596 }
2597
2598 DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
2599 __FUNCTION__, bo->map__gtt, bo->map__cpu,
2600 bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
2601
2602 if (!list_is_empty(&bo->vma)) {
2603 _list_del(&bo->vma);
2604 kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2605 }
2606
2607 if (bo->map__gtt)
2608 munmap(bo->map__gtt, bytes(bo));
2609 if (bo->map__wc) {
2610 VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
2611 munmap(bo->map__wc, bytes(bo));
2612 }
2613 if (bo->map__cpu) {
2614 VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
2615 munmap(MAP(bo->map__cpu), bytes(bo));
2616 }
2617
2618 _list_del(&bo->list);
2619 _list_del(&bo->request);
2620 gem_close(kgem->fd, bo->handle);
2621
2622 if (!bo->io && !DBG_NO_MALLOC_CACHE) {
2623 *(struct kgem_bo **)bo = __kgem_freed_bo;
2624 __kgem_freed_bo = bo;
2625 } else
2626 free(bo);
2627 }
2628
kgem_bo_move_to_inactive(struct kgem * kgem,struct kgem_bo * bo)2629 inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
2630 struct kgem_bo *bo)
2631 {
2632 DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
2633
2634 assert(bo->refcnt == 0);
2635 assert(bo->reusable);
2636 assert(bo->rq == NULL);
2637 assert(bo->exec == NULL);
2638 assert(bo->domain != DOMAIN_GPU);
2639 assert(!bo->proxy);
2640 assert(!bo->io);
2641 assert(!bo->scanout);
2642 assert(!bo->snoop);
2643 assert(!bo->flush);
2644 assert(!bo->needs_flush);
2645 assert(!bo->delta);
2646 assert(list_is_empty(&bo->vma));
2647 assert_tiling(kgem, bo);
2648 assert_caching(kgem, bo);
2649 ASSERT_IDLE(kgem, bo->handle);
2650
2651 if (bucket(bo) >= NUM_CACHE_BUCKETS) {
2652 if (bo->map__gtt) {
2653 DBG(("%s: relinquishing large GTT mapping for handle=%d\n",
2654 __FUNCTION__, bo->handle));
2655 munmap(bo->map__gtt, bytes(bo));
2656 bo->map__gtt = NULL;
2657 }
2658
2659 list_move(&bo->list, &kgem->large_inactive);
2660 } else {
2661 assert(bo->flush == false);
2662 assert(list_is_empty(&bo->vma));
2663 list_move(&bo->list, &kgem->inactive[bucket(bo)]);
2664 if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
2665 DBG(("%s: relinquishing old GTT mapping for handle=%d\n",
2666 __FUNCTION__, bo->handle));
2667 munmap(bo->map__gtt, bytes(bo));
2668 bo->map__gtt = NULL;
2669 }
2670 if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
2671 list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
2672 kgem->vma[0].count++;
2673 }
2674 if (bo->map__cpu && list_is_empty(&bo->vma)) {
2675 list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
2676 kgem->vma[1].count++;
2677 }
2678 }
2679
2680 kgem->need_expire = true;
2681 }
2682
kgem_bo_replace_io(struct kgem_bo * bo)2683 static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
2684 {
2685 struct kgem_bo *base;
2686
2687 if (!bo->io)
2688 return bo;
2689
2690 assert(!bo->snoop);
2691 assert(!bo->purged);
2692 assert(!bo->scanout);
2693 assert(!bo->delta);
2694
2695 if (__kgem_freed_bo) {
2696 base = __kgem_freed_bo;
2697 __kgem_freed_bo = *(struct kgem_bo **)base;
2698 } else
2699 base = malloc(sizeof(*base));
2700 if (base) {
2701 DBG(("%s: transferring io handle=%d to bo\n",
2702 __FUNCTION__, bo->handle));
2703 /* transfer the handle to a minimum bo */
2704 memcpy(base, bo, sizeof(*base));
2705 base->io = false;
2706 list_init(&base->list);
2707 list_replace(&bo->request, &base->request);
2708 list_replace(&bo->vma, &base->vma);
2709 free(bo);
2710 bo = base;
2711 } else
2712 bo->reusable = false;
2713
2714 return bo;
2715 }
2716
kgem_bo_remove_from_inactive(struct kgem * kgem,struct kgem_bo * bo)2717 inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
2718 struct kgem_bo *bo)
2719 {
2720 DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
2721
2722 list_del(&bo->list);
2723 assert(bo->rq == NULL);
2724 assert(bo->exec == NULL);
2725 assert(!bo->purged);
2726 if (!list_is_empty(&bo->vma)) {
2727 assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
2728 list_del(&bo->vma);
2729 kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
2730 }
2731 }
2732
kgem_bo_remove_from_active(struct kgem * kgem,struct kgem_bo * bo)2733 inline static void kgem_bo_remove_from_active(struct kgem *kgem,
2734 struct kgem_bo *bo)
2735 {
2736 DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
2737
2738 list_del(&bo->list);
2739 assert(bo->rq != NULL);
2740 if (RQ(bo->rq) == (void *)kgem) {
2741 assert(bo->exec == NULL);
2742 list_del(&bo->request);
2743 }
2744 assert(list_is_empty(&bo->vma));
2745 }
2746
_kgem_bo_delete_buffer(struct kgem * kgem,struct kgem_bo * bo)2747 static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
2748 {
2749 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
2750
2751 DBG(("%s: size=%d, offset=%d, parent used=%d\n",
2752 __FUNCTION__, bo->size.bytes, bo->delta, io->used));
2753
2754 if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
2755 io->used = bo->delta;
2756 }
2757
check_scanout_size(struct kgem * kgem,struct kgem_bo * bo,int width,int height)2758 static bool check_scanout_size(struct kgem *kgem,
2759 struct kgem_bo *bo,
2760 int width, int height)
2761 {
2762 struct drm_mode_fb_cmd info;
2763
2764 assert(bo->scanout);
2765
2766 VG_CLEAR(info);
2767 info.fb_id = bo->delta;
2768
2769 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_GETFB, &info))
2770 return false;
2771
2772 gem_close(kgem->fd, info.handle);
2773
2774 if (width > info.width || height > info.height) {
2775 DBG(("%s: not using scanout %d (%dx%d), want (%dx%d)\n",
2776 __FUNCTION__,
2777 info.fb_id, info.width, info.height,
2778 width, height));
2779 return false;
2780 }
2781
2782 return true;
2783 }
2784
kgem_bo_move_to_scanout(struct kgem * kgem,struct kgem_bo * bo)2785 static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
2786 {
2787 assert(bo->refcnt == 0);
2788 assert(bo->scanout);
2789 assert(!bo->flush);
2790 assert(!bo->snoop);
2791 assert(!bo->io);
2792
2793 if (bo->purged) { /* for stolen fb */
2794 if (!bo->exec) {
2795 DBG(("%s: discarding purged scanout - stolen?\n",
2796 __FUNCTION__));
2797 kgem_bo_free(kgem, bo);
2798 }
2799 return;
2800 }
2801
2802 DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
2803 __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
2804 if (bo->rq)
2805 list_move_tail(&bo->list, &kgem->scanout);
2806 else
2807 list_move(&bo->list, &kgem->scanout);
2808
2809 kgem->need_expire = true;
2810 }
2811
kgem_bo_move_to_snoop(struct kgem * kgem,struct kgem_bo * bo)2812 static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
2813 {
2814 assert(bo->reusable);
2815 assert(!bo->scanout);
2816 assert(!bo->flush);
2817 assert(!bo->needs_flush);
2818 assert(bo->refcnt == 0);
2819 assert(bo->exec == NULL);
2820 assert(!bo->purged);
2821 assert(!bo->delta);
2822
2823 if (DBG_NO_SNOOP_CACHE) {
2824 kgem_bo_free(kgem, bo);
2825 return;
2826 }
2827
2828 if (num_pages(bo) > kgem->max_cpu_size >> 13) {
2829 DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
2830 __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
2831 kgem_bo_free(kgem, bo);
2832 return;
2833 }
2834
2835 assert(bo->tiling == I915_TILING_NONE);
2836 assert(bo->rq == NULL);
2837
2838 DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
2839 list_add(&bo->list, &kgem->snoop);
2840 kgem->need_expire = true;
2841 }
2842
kgem_bo_move_to_cache(struct kgem * kgem,struct kgem_bo * bo)2843 static bool kgem_bo_move_to_cache(struct kgem *kgem, struct kgem_bo *bo)
2844 {
2845 bool retired = false;
2846
2847 DBG(("%s: release handle=%d\n", __FUNCTION__, bo->handle));
2848
2849 if (bo->prime) {
2850 DBG(("%s: discarding imported prime handle=%d\n",
2851 __FUNCTION__, bo->handle));
2852 kgem_bo_free(kgem, bo);
2853 } else if (bo->snoop) {
2854 kgem_bo_move_to_snoop(kgem, bo);
2855 } else if (bo->scanout) {
2856 kgem_bo_move_to_scanout(kgem, bo);
2857 } else if ((bo = kgem_bo_replace_io(bo))->reusable) {
2858 kgem_bo_move_to_inactive(kgem, bo);
2859 retired = true;
2860 } else
2861 kgem_bo_free(kgem, bo);
2862
2863 return retired;
2864 }
2865
2866 static struct kgem_bo *
search_snoop_cache(struct kgem * kgem,unsigned int num_pages,unsigned flags)2867 search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
2868 {
2869 struct kgem_bo *bo, *first = NULL;
2870
2871 DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
2872
2873 if ((kgem->has_caching | kgem->has_userptr) == 0)
2874 return NULL;
2875
2876 if (list_is_empty(&kgem->snoop)) {
2877 DBG(("%s: inactive and cache empty\n", __FUNCTION__));
2878 if (!__kgem_throttle_retire(kgem, flags)) {
2879 DBG(("%s: nothing retired\n", __FUNCTION__));
2880 return NULL;
2881 }
2882 }
2883
2884 list_for_each_entry(bo, &kgem->snoop, list) {
2885 assert(bo->refcnt == 0);
2886 assert(bo->snoop);
2887 assert(!bo->scanout);
2888 assert(!bo->purged);
2889 assert(bo->proxy == NULL);
2890 assert(bo->tiling == I915_TILING_NONE);
2891 assert(bo->rq == NULL);
2892 assert(bo->exec == NULL);
2893
2894 if (num_pages > num_pages(bo))
2895 continue;
2896
2897 if (num_pages(bo) > 2*num_pages) {
2898 if (first == NULL)
2899 first = bo;
2900 continue;
2901 }
2902
2903 list_del(&bo->list);
2904 bo->pitch = 0;
2905 bo->delta = 0;
2906
2907 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
2908 __FUNCTION__, bo->handle, num_pages(bo)));
2909 return bo;
2910 }
2911
2912 if (first) {
2913 list_del(&first->list);
2914 first->pitch = 0;
2915 first->delta = 0;
2916
2917 DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
2918 __FUNCTION__, first->handle, num_pages(first)));
2919 return first;
2920 }
2921
2922 return NULL;
2923 }
2924
kgem_bo_undo(struct kgem * kgem,struct kgem_bo * bo)2925 void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
2926 {
2927 if (kgem->nexec != 1 || bo->exec == NULL)
2928 return;
2929
2930 assert(bo);
2931 DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
2932 __FUNCTION__, bo->handle));
2933
2934 assert(bo->exec == &_kgem_dummy_exec || bo->exec == &kgem->exec[0]);
2935 assert(kgem->exec[0].handle == bo->handle);
2936 assert(RQ(bo->rq) == kgem->next_request);
2937
2938 bo->refcnt++;
2939 kgem_reset(kgem);
2940 bo->refcnt--;
2941
2942 assert(kgem->nreloc == 0);
2943 assert(kgem->nexec == 0);
2944 assert(bo->exec == NULL);
2945 }
2946
kgem_bo_pair_undo(struct kgem * kgem,struct kgem_bo * a,struct kgem_bo * b)2947 void kgem_bo_pair_undo(struct kgem *kgem, struct kgem_bo *a, struct kgem_bo *b)
2948 {
2949 if (kgem->nexec > 2)
2950 return;
2951
2952 if (kgem->nexec == 1) {
2953 if (a)
2954 kgem_bo_undo(kgem, a);
2955 if (b)
2956 kgem_bo_undo(kgem, b);
2957 return;
2958 }
2959
2960 if (a == NULL || b == NULL)
2961 return;
2962 assert(a != b);
2963 if (a->exec == NULL || b->exec == NULL)
2964 return;
2965
2966 DBG(("%s: only handles in batch, discarding last operations for handle=%d (index=%d) and handle=%d (index=%d)\n",
2967 __FUNCTION__,
2968 a->handle, a->proxy ? -1 : a->exec - kgem->exec,
2969 b->handle, b->proxy ? -1 : b->exec - kgem->exec));
2970
2971 assert(a->exec == &_kgem_dummy_exec ||
2972 a->exec == &kgem->exec[0] ||
2973 a->exec == &kgem->exec[1]);
2974 assert(a->handle == kgem->exec[0].handle || a->handle == kgem->exec[1].handle);
2975 assert(RQ(a->rq) == kgem->next_request);
2976 assert(b->exec == &_kgem_dummy_exec ||
2977 b->exec == &kgem->exec[0] ||
2978 b->exec == &kgem->exec[1]);
2979 assert(b->handle == kgem->exec[0].handle || b->handle == kgem->exec[1].handle);
2980 assert(RQ(b->rq) == kgem->next_request);
2981
2982 a->refcnt++;
2983 b->refcnt++;
2984 kgem_reset(kgem);
2985 b->refcnt--;
2986 a->refcnt--;
2987
2988 assert(kgem->nreloc == 0);
2989 assert(kgem->nexec == 0);
2990 assert(a->exec == NULL);
2991 assert(b->exec == NULL);
2992 }
2993
__kgem_bo_destroy(struct kgem * kgem,struct kgem_bo * bo)2994 static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
2995 {
2996 DBG(("%s: handle=%d, size=%d\n", __FUNCTION__, bo->handle, bytes(bo)));
2997
2998 assert(list_is_empty(&bo->list));
2999 assert(list_is_empty(&bo->vma));
3000 assert(bo->refcnt == 0);
3001 assert(bo->proxy == NULL);
3002 assert(bo->active_scanout == 0);
3003 assert_tiling(kgem, bo);
3004
3005 bo->binding.offset = 0;
3006
3007 if (DBG_NO_CACHE)
3008 goto destroy;
3009
3010 if (bo->prime)
3011 goto destroy;
3012
3013 if (bo->snoop && !bo->flush) {
3014 DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
3015 assert(bo->reusable);
3016 assert(list_is_empty(&bo->list));
3017 if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
3018 __kgem_bo_clear_busy(bo);
3019 if (bo->rq == NULL)
3020 kgem_bo_move_to_snoop(kgem, bo);
3021 return;
3022 }
3023 if (!IS_USER_MAP(bo->map__cpu))
3024 bo->flush = false;
3025
3026 if (bo->scanout) {
3027 kgem_bo_move_to_scanout(kgem, bo);
3028 return;
3029 }
3030
3031 if (bo->io)
3032 bo = kgem_bo_replace_io(bo);
3033 if (!bo->reusable) {
3034 DBG(("%s: handle=%d, not reusable\n",
3035 __FUNCTION__, bo->handle));
3036 goto destroy;
3037 }
3038
3039 assert(list_is_empty(&bo->vma));
3040 assert(list_is_empty(&bo->list));
3041 assert(bo->flush == false);
3042 assert(bo->snoop == false);
3043 assert(bo->io == false);
3044 assert(bo->scanout == false);
3045 assert_caching(kgem, bo);
3046
3047 kgem_bo_undo(kgem, bo);
3048 assert(bo->refcnt == 0);
3049
3050 if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
3051 __kgem_bo_clear_busy(bo);
3052
3053 if (bo->rq) {
3054 struct list *cache;
3055
3056 DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
3057 if (bucket(bo) < NUM_CACHE_BUCKETS)
3058 cache = &kgem->active[bucket(bo)][bo->tiling];
3059 else
3060 cache = &kgem->large;
3061 list_add(&bo->list, cache);
3062 return;
3063 }
3064
3065 assert(bo->exec == NULL);
3066 assert(list_is_empty(&bo->request));
3067
3068 if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) {
3069 if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
3070 goto destroy;
3071
3072 DBG(("%s: handle=%d, purged\n",
3073 __FUNCTION__, bo->handle));
3074 }
3075
3076 kgem_bo_move_to_inactive(kgem, bo);
3077 return;
3078
3079 destroy:
3080 if (!bo->exec)
3081 kgem_bo_free(kgem, bo);
3082 }
3083
kgem_bo_unref(struct kgem * kgem,struct kgem_bo * bo)3084 static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
3085 {
3086 assert(bo->refcnt);
3087 if (--bo->refcnt == 0)
3088 __kgem_bo_destroy(kgem, bo);
3089 }
3090
kgem_buffer_release(struct kgem * kgem,struct kgem_buffer * bo)3091 static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
3092 {
3093 assert(bo->base.io);
3094 while (!list_is_empty(&bo->base.vma)) {
3095 struct kgem_bo *cached;
3096
3097 cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
3098 assert(cached->proxy == &bo->base);
3099 assert(cached != &bo->base);
3100 list_del(&cached->vma);
3101
3102 assert(*(struct kgem_bo **)cached->map__gtt == cached);
3103 *(struct kgem_bo **)cached->map__gtt = NULL;
3104 cached->map__gtt = NULL;
3105
3106 kgem_bo_destroy(kgem, cached);
3107 }
3108 }
3109
kgem_retire__buffers(struct kgem * kgem)3110 void kgem_retire__buffers(struct kgem *kgem)
3111 {
3112 while (!list_is_empty(&kgem->active_buffers)) {
3113 struct kgem_buffer *bo =
3114 list_last_entry(&kgem->active_buffers,
3115 struct kgem_buffer,
3116 base.list);
3117
3118 DBG(("%s: handle=%d, busy? %d [%d]\n",
3119 __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL));
3120
3121 assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request);
3122 if (bo->base.rq)
3123 break;
3124
3125 DBG(("%s: releasing upload cache for handle=%d? %d\n",
3126 __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
3127 list_del(&bo->base.list);
3128 kgem_buffer_release(kgem, bo);
3129 kgem_bo_unref(kgem, &bo->base);
3130 }
3131 }
3132
kgem_retire__flushing(struct kgem * kgem)3133 static bool kgem_retire__flushing(struct kgem *kgem)
3134 {
3135 struct kgem_bo *bo, *next;
3136 bool retired = false;
3137
3138 list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
3139 assert(RQ(bo->rq) == (void *)kgem);
3140 assert(bo->exec == NULL);
3141
3142 if (__kgem_busy(kgem, bo->handle))
3143 break;
3144
3145 __kgem_bo_clear_busy(bo);
3146
3147 if (bo->refcnt)
3148 continue;
3149
3150 retired |= kgem_bo_move_to_cache(kgem, bo);
3151 }
3152 #if HAS_DEBUG_FULL
3153 {
3154 int count = 0;
3155 list_for_each_entry(bo, &kgem->flushing, request)
3156 count++;
3157 DBG(("%s: %d bo on flushing list, retired? %d\n", __FUNCTION__, count, retired));
3158 }
3159 #endif
3160
3161 kgem->need_retire |= !list_is_empty(&kgem->flushing);
3162
3163 return retired;
3164 }
3165
__kgem_bo_flush(struct kgem * kgem,struct kgem_bo * bo)3166 static bool __kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
3167 {
3168 struct drm_i915_gem_busy busy;
3169
3170 if (!bo->needs_flush)
3171 return false;
3172
3173 bo->needs_flush = false;
3174
3175 VG_CLEAR(busy);
3176 busy.handle = bo->handle;
3177 busy.busy = !kgem->wedged;
3178 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
3179 DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
3180 __FUNCTION__, bo->handle, busy.busy, kgem->wedged));
3181
3182 if (busy.busy == 0)
3183 return false;
3184
3185 DBG(("%s: moving %d to flushing\n",
3186 __FUNCTION__, bo->handle));
3187 list_add(&bo->request, &kgem->flushing);
3188 bo->rq = MAKE_REQUEST(kgem, !!(busy.busy & ~0x1ffff));
3189 bo->needs_flush = busy.busy & 0xffff;
3190 kgem->need_retire = true;
3191 return true;
3192 }
3193
__kgem_retire_rq(struct kgem * kgem,struct kgem_request * rq)3194 static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
3195 {
3196 bool retired = false;
3197
3198 DBG(("%s: request %d complete\n",
3199 __FUNCTION__, rq->bo->handle));
3200 assert(RQ(rq->bo->rq) == rq);
3201 assert(rq != (struct kgem_request *)kgem);
3202 assert(rq != &kgem->static_request);
3203
3204 if (rq == kgem->fence[rq->ring])
3205 kgem->fence[rq->ring] = NULL;
3206
3207 while (!list_is_empty(&rq->buffers)) {
3208 struct kgem_bo *bo;
3209
3210 bo = list_first_entry(&rq->buffers,
3211 struct kgem_bo,
3212 request);
3213
3214 assert(RQ(bo->rq) == rq);
3215 assert(bo->exec == NULL);
3216 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3217
3218 list_del(&bo->request);
3219
3220 if (unlikely(__kgem_bo_flush(kgem, bo))) {
3221 assert(bo != rq->bo);
3222 DBG(("%s: movied %d to flushing\n",
3223 __FUNCTION__, bo->handle));
3224 continue;
3225 }
3226
3227 bo->domain = DOMAIN_NONE;
3228 bo->rq = NULL;
3229 if (bo->refcnt)
3230 continue;
3231
3232 retired |= kgem_bo_move_to_cache(kgem, bo);
3233 }
3234
3235 assert(rq->bo->rq == NULL);
3236 assert(rq->bo->exec == NULL);
3237 assert(list_is_empty(&rq->bo->request));
3238 assert(rq->bo->refcnt > 0);
3239
3240 if (--rq->bo->refcnt == 0) {
3241 kgem_bo_move_to_inactive(kgem, rq->bo);
3242 retired = true;
3243 }
3244
3245 __kgem_request_free(rq);
3246 return retired;
3247 }
3248
kgem_retire__requests_ring(struct kgem * kgem,int ring)3249 static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
3250 {
3251 bool retired = false;
3252
3253 assert(ring < ARRAY_SIZE(kgem->requests));
3254 while (!list_is_empty(&kgem->requests[ring])) {
3255 struct kgem_request *rq;
3256
3257 DBG(("%s: retiring ring %d\n", __FUNCTION__, ring));
3258
3259 rq = list_first_entry(&kgem->requests[ring],
3260 struct kgem_request,
3261 list);
3262 assert(rq->ring == ring);
3263 assert(rq->bo);
3264 assert(RQ(rq->bo->rq) == rq);
3265 if (__kgem_busy(kgem, rq->bo->handle))
3266 break;
3267
3268 retired |= __kgem_retire_rq(kgem, rq);
3269 }
3270
3271 #if HAS_DEBUG_FULL
3272 {
3273 struct kgem_bo *bo;
3274 int count = 0;
3275
3276 list_for_each_entry(bo, &kgem->requests[ring], request)
3277 count++;
3278
3279 bo = NULL;
3280 if (!list_is_empty(&kgem->requests[ring]))
3281 bo = list_first_entry(&kgem->requests[ring],
3282 struct kgem_request,
3283 list)->bo;
3284
3285 DBG(("%s: ring=%d, %d outstanding requests, oldest=%d, retired? %d\n",
3286 __FUNCTION__, ring, count, bo ? bo->handle : 0, retired));
3287 }
3288 #endif
3289
3290 return retired;
3291 }
3292
kgem_retire__requests(struct kgem * kgem)3293 static bool kgem_retire__requests(struct kgem *kgem)
3294 {
3295 bool retired = false;
3296 int n;
3297
3298 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3299 retired |= kgem_retire__requests_ring(kgem, n);
3300 kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
3301 }
3302
3303 return retired;
3304 }
3305
kgem_retire(struct kgem * kgem)3306 bool kgem_retire(struct kgem *kgem)
3307 {
3308 bool retired = false;
3309
3310 DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire));
3311
3312 kgem->need_retire = false;
3313
3314 retired |= kgem_retire__flushing(kgem);
3315 retired |= kgem_retire__requests(kgem);
3316
3317 DBG(("%s -- retired=%d, need_retire=%d\n",
3318 __FUNCTION__, retired, kgem->need_retire));
3319
3320 kgem->retire(kgem);
3321
3322 return retired;
3323 }
3324
__kgem_ring_is_idle(struct kgem * kgem,int ring)3325 bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
3326 {
3327 struct kgem_request *rq;
3328
3329 assert(ring < ARRAY_SIZE(kgem->requests));
3330 assert(!list_is_empty(&kgem->requests[ring]));
3331
3332 rq = kgem->fence[ring];
3333 if (rq) {
3334 struct kgem_request *tmp;
3335
3336 if (__kgem_busy(kgem, rq->bo->handle)) {
3337 DBG(("%s: last fence handle=%d still busy\n",
3338 __FUNCTION__, rq->bo->handle));
3339 return false;
3340 }
3341
3342 do {
3343 tmp = list_first_entry(&kgem->requests[ring],
3344 struct kgem_request,
3345 list);
3346 assert(tmp->ring == ring);
3347 __kgem_retire_rq(kgem, tmp);
3348 } while (tmp != rq);
3349
3350 assert(kgem->fence[ring] == NULL);
3351 if (list_is_empty(&kgem->requests[ring]))
3352 return true;
3353 }
3354
3355 rq = list_last_entry(&kgem->requests[ring],
3356 struct kgem_request, list);
3357 assert(rq->ring == ring);
3358 assert(rq->bo);
3359 assert(RQ(rq->bo->rq) == rq);
3360 if (__kgem_busy(kgem, rq->bo->handle)) {
3361 DBG(("%s: last requests handle=%d still busy\n",
3362 __FUNCTION__, rq->bo->handle));
3363 kgem->fence[ring] = rq;
3364 return false;
3365 }
3366
3367 DBG(("%s: ring=%d idle (handle=%d)\n",
3368 __FUNCTION__, ring, rq->bo->handle));
3369
3370 while (!list_is_empty(&kgem->requests[ring])) {
3371 rq = list_first_entry(&kgem->requests[ring],
3372 struct kgem_request,
3373 list);
3374 assert(rq->ring == ring);
3375 __kgem_retire_rq(kgem, rq);
3376 }
3377
3378 return true;
3379 }
3380
__kgem_retire_requests_upto(struct kgem * kgem,struct kgem_bo * bo)3381 bool __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo)
3382 {
3383 struct kgem_request * const rq = RQ(bo->rq), *tmp;
3384 struct list *requests = &kgem->requests[rq->ring];
3385
3386 DBG(("%s(handle=%d, ring=%d)\n", __FUNCTION__, bo->handle, rq->ring));
3387
3388 assert(rq != &kgem->static_request);
3389 if (rq == (struct kgem_request *)kgem) {
3390 __kgem_bo_clear_busy(bo);
3391 return false;
3392 }
3393
3394 assert(rq->ring < ARRAY_SIZE(kgem->requests));
3395 do {
3396 tmp = list_first_entry(requests, struct kgem_request, list);
3397 assert(tmp->ring == rq->ring);
3398 __kgem_retire_rq(kgem, tmp);
3399 } while (tmp != rq);
3400
3401 assert(bo->needs_flush || bo->rq == NULL);
3402 assert(bo->needs_flush || list_is_empty(&bo->request));
3403 assert(bo->needs_flush || bo->domain == DOMAIN_NONE);
3404 return bo->rq;
3405 }
3406
3407 #if 0
3408 static void kgem_commit__check_reloc(struct kgem *kgem)
3409 {
3410 struct kgem_request *rq = kgem->next_request;
3411 struct kgem_bo *bo;
3412 bool has_64bit = kgem->gen >= 0100;
3413 int i;
3414
3415 for (i = 0; i < kgem->nreloc; i++) {
3416 list_for_each_entry(bo, &rq->buffers, request) {
3417 if (bo->target_handle == kgem->reloc[i].target_handle) {
3418 uint64_t value = 0;
3419 gem_read(kgem->fd, rq->bo->handle, &value, kgem->reloc[i].offset, has_64bit ? 8 : 4);
3420 assert(bo->exec->offset == -1 || value == bo->exec->offset + (int)kgem->reloc[i].delta);
3421 break;
3422 }
3423 }
3424 }
3425 }
3426 #else
3427 #define kgem_commit__check_reloc(kgem)
3428 #endif
3429
3430 #ifndef NDEBUG
kgem_commit__check_buffers(struct kgem * kgem)3431 static void kgem_commit__check_buffers(struct kgem *kgem)
3432 {
3433 struct kgem_buffer *bo;
3434
3435 list_for_each_entry(bo, &kgem->active_buffers, base.list)
3436 assert(bo->base.exec == NULL);
3437 }
3438 #else
3439 #define kgem_commit__check_buffers(kgem)
3440 #endif
3441
kgem_commit(struct kgem * kgem)3442 static void kgem_commit(struct kgem *kgem)
3443 {
3444 struct kgem_request *rq = kgem->next_request;
3445 struct kgem_bo *bo, *next;
3446
3447 kgem_commit__check_reloc(kgem);
3448
3449 list_for_each_entry_safe(bo, next, &rq->buffers, request) {
3450 assert(next->request.prev == &bo->request);
3451
3452 DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
3453 __FUNCTION__, bo->handle, bo->proxy != NULL,
3454 bo->gpu_dirty, bo->needs_flush, bo->snoop,
3455 (unsigned)bo->exec->offset));
3456
3457 assert(bo->exec);
3458 assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
3459 assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
3460
3461 bo->presumed_offset = bo->exec->offset;
3462 bo->exec = NULL;
3463 bo->target_handle = -1;
3464
3465 if (!bo->refcnt && !bo->reusable) {
3466 assert(!bo->snoop);
3467 assert(!bo->proxy);
3468 kgem_bo_free(kgem, bo);
3469 continue;
3470 }
3471
3472 bo->binding.offset = 0;
3473 bo->domain = DOMAIN_GPU;
3474 bo->gpu_dirty = false;
3475 bo->gtt_dirty = false;
3476
3477 if (bo->proxy) {
3478 /* proxies are not used for domain tracking */
3479 __kgem_bo_clear_busy(bo);
3480 }
3481
3482 kgem->scanout_busy |= bo->scanout && bo->needs_flush;
3483 }
3484
3485 if (rq == &kgem->static_request) {
3486 struct drm_i915_gem_set_domain set_domain;
3487
3488 DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
3489
3490 VG_CLEAR(set_domain);
3491 set_domain.handle = rq->bo->handle;
3492 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
3493 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
3494 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
3495 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
3496 kgem_throttle(kgem);
3497 }
3498
3499 while (!list_is_empty(&rq->buffers)) {
3500 bo = list_first_entry(&rq->buffers,
3501 struct kgem_bo,
3502 request);
3503
3504 assert(RQ(bo->rq) == rq);
3505 assert(bo->exec == NULL);
3506 assert(bo->domain == DOMAIN_GPU);
3507
3508 list_del(&bo->request);
3509 bo->domain = DOMAIN_NONE;
3510 bo->rq = NULL;
3511
3512 if (bo->refcnt == 0)
3513 _kgem_bo_destroy(kgem, bo);
3514 }
3515
3516 kgem_retire(kgem);
3517 assert(list_is_empty(&rq->buffers));
3518
3519 assert(rq->bo->map__gtt == NULL);
3520 assert(rq->bo->map__wc == NULL);
3521 assert(rq->bo->map__cpu == NULL);
3522 gem_close(kgem->fd, rq->bo->handle);
3523 kgem_cleanup_cache(kgem);
3524 } else {
3525 assert(rq != (struct kgem_request *)kgem);
3526 assert(rq->ring < ARRAY_SIZE(kgem->requests));
3527 assert(rq->bo);
3528 list_add_tail(&rq->list, &kgem->requests[rq->ring]);
3529 kgem->need_throttle = kgem->need_retire = 1;
3530
3531 if (kgem->fence[rq->ring] == NULL &&
3532 __kgem_busy(kgem, rq->bo->handle))
3533 kgem->fence[rq->ring] = rq;
3534 }
3535
3536 kgem->next_request = NULL;
3537
3538 kgem_commit__check_buffers(kgem);
3539 }
3540
kgem_close_list(struct kgem * kgem,struct list * head)3541 static void kgem_close_list(struct kgem *kgem, struct list *head)
3542 {
3543 while (!list_is_empty(head))
3544 kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
3545 }
3546
kgem_close_inactive(struct kgem * kgem)3547 static void kgem_close_inactive(struct kgem *kgem)
3548 {
3549 unsigned int i;
3550
3551 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
3552 kgem_close_list(kgem, &kgem->inactive[i]);
3553 assert(list_is_empty(&kgem->inactive[i]));
3554 }
3555 }
3556
kgem_finish_buffers(struct kgem * kgem)3557 static void kgem_finish_buffers(struct kgem *kgem)
3558 {
3559 struct kgem_buffer *bo, *next;
3560
3561 list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
3562 DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
3563 __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
3564 bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
3565 bo->base.refcnt));
3566
3567 assert(next->base.list.prev == &bo->base.list);
3568 assert(bo->base.io);
3569 assert(bo->base.refcnt >= 1);
3570
3571 if (bo->base.refcnt > 1 && !bo->base.exec) {
3572 DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
3573 __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
3574 continue;
3575 }
3576
3577 if (!bo->write) {
3578 assert(bo->base.exec || bo->base.refcnt > 1);
3579 goto decouple;
3580 }
3581
3582 if (bo->mmapped) {
3583 uint32_t used;
3584
3585 assert(!bo->need_io);
3586
3587 used = ALIGN(bo->used, PAGE_SIZE);
3588 if (!DBG_NO_UPLOAD_ACTIVE &&
3589 used + PAGE_SIZE <= bytes(&bo->base) &&
3590 (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) {
3591 DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n",
3592 __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt));
3593 bo->used = used;
3594 list_move(&bo->base.list,
3595 &kgem->active_buffers);
3596 kgem->need_retire = true;
3597 continue;
3598 }
3599 DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
3600 __FUNCTION__, bo->used, bo->mmapped));
3601 goto decouple;
3602 }
3603
3604 if (!bo->used || !bo->base.exec) {
3605 /* Unless we replace the handle in the execbuffer,
3606 * then this bo will become active. So decouple it
3607 * from the buffer list and track it in the normal
3608 * manner.
3609 */
3610 goto decouple;
3611 }
3612
3613 assert(bo->need_io);
3614 assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
3615 assert(bo->base.domain != DOMAIN_GPU);
3616
3617 if (bo->base.refcnt == 1 &&
3618 bo->base.size.pages.count > 1 &&
3619 bo->used < bytes(&bo->base) / 2) {
3620 struct kgem_bo *shrink;
3621 unsigned alloc = NUM_PAGES(bo->used);
3622
3623 shrink = search_snoop_cache(kgem, alloc,
3624 CREATE_INACTIVE | CREATE_NO_RETIRE);
3625 if (shrink) {
3626 void *map;
3627 int n;
3628
3629 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3630 __FUNCTION__,
3631 bo->used, bytes(&bo->base), bytes(shrink),
3632 bo->base.handle, shrink->handle));
3633
3634 assert(bo->used <= bytes(shrink));
3635 map = kgem_bo_map__cpu(kgem, shrink);
3636 if (map) {
3637 memcpy(map, bo->mem, bo->used);
3638
3639 shrink->target_handle =
3640 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3641 for (n = 0; n < kgem->nreloc; n++) {
3642 if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3643 uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset;
3644 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr;
3645 if (kgem->gen >= 0100)
3646 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32;
3647
3648 kgem->reloc[n].target_handle = shrink->target_handle;
3649 kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3650 }
3651 }
3652
3653 bo->base.exec->handle = shrink->handle;
3654 bo->base.exec->offset = shrink->presumed_offset;
3655 shrink->exec = bo->base.exec;
3656 shrink->rq = bo->base.rq;
3657 list_replace(&bo->base.request,
3658 &shrink->request);
3659 list_init(&bo->base.request);
3660 shrink->needs_flush = bo->base.gpu_dirty;
3661
3662 bo->base.exec = NULL;
3663 bo->base.rq = NULL;
3664 bo->base.gpu_dirty = false;
3665 bo->base.needs_flush = false;
3666 bo->used = 0;
3667
3668 goto decouple;
3669 }
3670
3671 __kgem_bo_destroy(kgem, shrink);
3672 }
3673
3674 shrink = search_linear_cache(kgem, alloc,
3675 CREATE_INACTIVE | CREATE_NO_RETIRE);
3676 if (shrink) {
3677 int n;
3678
3679 DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
3680 __FUNCTION__,
3681 bo->used, bytes(&bo->base), bytes(shrink),
3682 bo->base.handle, shrink->handle));
3683
3684 assert(bo->used <= bytes(shrink));
3685 if (gem_write__cachealigned(kgem->fd, shrink->handle,
3686 0, bo->used, bo->mem) == 0) {
3687 shrink->target_handle =
3688 kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
3689 for (n = 0; n < kgem->nreloc; n++) {
3690 if (kgem->reloc[n].target_handle == bo->base.target_handle) {
3691 uint64_t addr = (int)kgem->reloc[n].delta + shrink->presumed_offset;
3692 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = addr;
3693 if (kgem->gen >= 0100)
3694 kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0]) + 1] = addr >> 32;
3695
3696 kgem->reloc[n].target_handle = shrink->target_handle;
3697 kgem->reloc[n].presumed_offset = shrink->presumed_offset;
3698 }
3699 }
3700
3701 bo->base.exec->handle = shrink->handle;
3702 bo->base.exec->offset = shrink->presumed_offset;
3703 shrink->exec = bo->base.exec;
3704 shrink->rq = bo->base.rq;
3705 list_replace(&bo->base.request,
3706 &shrink->request);
3707 list_init(&bo->base.request);
3708 shrink->needs_flush = bo->base.gpu_dirty;
3709
3710 bo->base.exec = NULL;
3711 bo->base.rq = NULL;
3712 bo->base.gpu_dirty = false;
3713 bo->base.needs_flush = false;
3714 bo->used = 0;
3715
3716 goto decouple;
3717 }
3718
3719 __kgem_bo_destroy(kgem, shrink);
3720 }
3721 }
3722
3723 DBG(("%s: handle=%d, uploading %d/%d\n",
3724 __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
3725 ASSERT_IDLE(kgem, bo->base.handle);
3726 assert(bo->used <= bytes(&bo->base));
3727 gem_write__cachealigned(kgem->fd, bo->base.handle,
3728 0, bo->used, bo->mem);
3729 bo->need_io = 0;
3730
3731 decouple:
3732 DBG(("%s: releasing handle=%d\n",
3733 __FUNCTION__, bo->base.handle));
3734 list_del(&bo->base.list);
3735 kgem_bo_unref(kgem, &bo->base);
3736 }
3737 }
3738
kgem_cleanup(struct kgem * kgem)3739 static void kgem_cleanup(struct kgem *kgem)
3740 {
3741 int n;
3742
3743 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
3744 while (!list_is_empty(&kgem->requests[n])) {
3745 struct kgem_request *rq;
3746
3747 rq = list_first_entry(&kgem->requests[n],
3748 struct kgem_request,
3749 list);
3750 assert(rq->ring == n);
3751 while (!list_is_empty(&rq->buffers)) {
3752 struct kgem_bo *bo;
3753
3754 bo = list_first_entry(&rq->buffers,
3755 struct kgem_bo,
3756 request);
3757
3758 bo->exec = NULL;
3759 bo->gpu_dirty = false;
3760 __kgem_bo_clear_busy(bo);
3761 if (bo->refcnt == 0)
3762 kgem_bo_free(kgem, bo);
3763 }
3764
3765 if (--rq->bo->refcnt == 0)
3766 kgem_bo_free(kgem, rq->bo);
3767
3768 __kgem_request_free(rq);
3769 }
3770 }
3771
3772 kgem_close_inactive(kgem);
3773 }
3774
3775 static int
kgem_batch_write(struct kgem * kgem,struct kgem_bo * bo,uint32_t size)3776 kgem_batch_write(struct kgem *kgem,
3777 struct kgem_bo *bo,
3778 uint32_t size)
3779 {
3780 char *ptr;
3781 int ret;
3782
3783 assert(bo->exec == NULL);
3784 assert(bo->rq == NULL);
3785 assert(!__kgem_busy(kgem, bo->handle));
3786
3787 #if DBG_NO_EXEC
3788 {
3789 uint32_t batch[] = { MI_BATCH_BUFFER_END, 0};
3790 return gem_write(kgem->fd, bo->handle, 0, sizeof(batch), batch);
3791 }
3792 #endif
3793
3794 assert(!bo->scanout);
3795 retry:
3796 ptr = NULL;
3797 if (bo->domain == DOMAIN_CPU || kgem->has_llc) {
3798 ptr = bo->map__cpu;
3799 if (ptr == NULL)
3800 ptr = __kgem_bo_map__cpu(kgem, bo);
3801 } else if (kgem->has_wc_mmap) {
3802 ptr = bo->map__wc;
3803 if (ptr == NULL)
3804 ptr = __kgem_bo_map__wc(kgem, bo);
3805 }
3806 if (ptr) {
3807 memcpy(ptr, kgem->batch, sizeof(uint32_t)*kgem->nbatch);
3808 if (kgem->surface != kgem->batch_size) {
3809 ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3810 ret -= sizeof(uint32_t) * kgem->surface;
3811 ptr += size - ret;
3812 memcpy(ptr, kgem->batch + kgem->surface,
3813 (kgem->batch_size - kgem->surface)*sizeof(uint32_t));
3814 }
3815 return 0;
3816 }
3817
3818 /* If there is no surface data, just upload the batch */
3819 if (kgem->surface == kgem->batch_size) {
3820 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3821 0, sizeof(uint32_t)*kgem->nbatch,
3822 kgem->batch)) == 0)
3823 return 0;
3824
3825 goto expire;
3826 }
3827
3828 /* Are the batch pages conjoint with the surface pages? */
3829 if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
3830 assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
3831 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3832 0, kgem->batch_size*sizeof(uint32_t),
3833 kgem->batch)) == 0)
3834 return 0;
3835
3836 goto expire;
3837 }
3838
3839 /* Disjoint surface/batch, upload separately */
3840 if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
3841 0, sizeof(uint32_t)*kgem->nbatch,
3842 kgem->batch)))
3843 goto expire;
3844
3845 ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
3846 ret -= sizeof(uint32_t) * kgem->surface;
3847 assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
3848 if (gem_write(kgem->fd, bo->handle,
3849 size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
3850 kgem->batch + kgem->surface))
3851 goto expire;
3852
3853 return 0;
3854
3855 expire:
3856 assert(ret != EINVAL);
3857
3858 (void)__kgem_throttle_retire(kgem, 0);
3859 if (kgem_expire_cache(kgem))
3860 goto retry;
3861
3862 if (kgem_cleanup_cache(kgem))
3863 goto retry;
3864
3865 ERR(("%s: failed to write batch (handle=%d): %d\n",
3866 __FUNCTION__, bo->handle, -ret));
3867 return ret;
3868 }
3869
kgem_reset(struct kgem * kgem)3870 void kgem_reset(struct kgem *kgem)
3871 {
3872 if (kgem->next_request) {
3873 struct kgem_request *rq = kgem->next_request;
3874
3875 while (!list_is_empty(&rq->buffers)) {
3876 struct kgem_bo *bo =
3877 list_first_entry(&rq->buffers,
3878 struct kgem_bo,
3879 request);
3880 list_del(&bo->request);
3881
3882 assert(RQ(bo->rq) == rq);
3883
3884 bo->binding.offset = 0;
3885 bo->exec = NULL;
3886 bo->target_handle = -1;
3887 bo->gpu_dirty = false;
3888
3889 if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
3890 assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
3891 list_add(&bo->request, &kgem->flushing);
3892 bo->rq = (void *)kgem;
3893 kgem->need_retire = true;
3894 } else
3895 __kgem_bo_clear_busy(bo);
3896
3897 if (bo->refcnt || bo->rq)
3898 continue;
3899
3900 kgem_bo_move_to_cache(kgem, bo);
3901 }
3902
3903 if (rq != &kgem->static_request) {
3904 list_init(&rq->list);
3905 __kgem_request_free(rq);
3906 }
3907 }
3908
3909 kgem->nfence = 0;
3910 kgem->nexec = 0;
3911 kgem->nreloc = 0;
3912 kgem->nreloc__self = 0;
3913 kgem->aperture = 0;
3914 kgem->aperture_fenced = 0;
3915 kgem->aperture_max_fence = 0;
3916 kgem->nbatch = 0;
3917 kgem->surface = kgem->batch_size;
3918 kgem->mode = KGEM_NONE;
3919 kgem->needs_semaphore = false;
3920 kgem->needs_reservation = false;
3921 kgem->flush = 0;
3922 kgem->batch_flags = kgem->batch_flags_base;
3923 assert(kgem->batch);
3924
3925 kgem->next_request = __kgem_request_alloc(kgem);
3926
3927 kgem_sna_reset(kgem);
3928 }
3929
compact_batch_surface(struct kgem * kgem,int * shrink)3930 static int compact_batch_surface(struct kgem *kgem, int *shrink)
3931 {
3932 int size, n;
3933
3934 if (!kgem->has_relaxed_delta)
3935 return kgem->batch_size * sizeof(uint32_t);
3936
3937 /* See if we can pack the contents into one or two pages */
3938 n = ALIGN(kgem->batch_size, 1024);
3939 size = n - kgem->surface + kgem->nbatch;
3940 size = ALIGN(size, 1024);
3941
3942 *shrink = (n - size) * sizeof(uint32_t);
3943 return size * sizeof(uint32_t);
3944 }
3945
first_available(struct kgem * kgem,struct list * list)3946 static struct kgem_bo *first_available(struct kgem *kgem, struct list *list)
3947 {
3948 struct kgem_bo *bo;
3949
3950 list_for_each_entry(bo, list, list) {
3951 assert(bo->refcnt > 0);
3952
3953 if (bo->rq) {
3954 assert(RQ(bo->rq)->bo == bo);
3955 if (__kgem_busy(kgem, bo->handle))
3956 break;
3957
3958 __kgem_retire_rq(kgem, RQ(bo->rq));
3959 assert(bo->rq == NULL);
3960 }
3961
3962 if (bo->refcnt > 1)
3963 continue;
3964
3965 list_move_tail(&bo->list, list);
3966 return kgem_bo_reference(bo);
3967 }
3968
3969 return NULL;
3970 }
3971
3972 static struct kgem_bo *
kgem_create_batch(struct kgem * kgem)3973 kgem_create_batch(struct kgem *kgem)
3974 {
3975 struct kgem_bo *bo;
3976 int size, shrink = 0;
3977
3978 #if !DBG_NO_SHRINK_BATCHES
3979 if (kgem->surface != kgem->batch_size)
3980 size = compact_batch_surface(kgem, &shrink);
3981 else
3982 size = kgem->nbatch * sizeof(uint32_t);
3983
3984 if (size <= 4096) {
3985 bo = first_available(kgem, &kgem->pinned_batches[0]);
3986 if (bo)
3987 goto write;
3988 }
3989
3990 if (size <= 16384) {
3991 bo = first_available(kgem, &kgem->pinned_batches[1]);
3992 if (bo)
3993 goto write;
3994 }
3995
3996 if (kgem->gen == 020) {
3997 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
3998 if (bo)
3999 goto write;
4000
4001 /* Nothing available for reuse, rely on the kernel wa */
4002 if (kgem->has_pinned_batches) {
4003 bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
4004 if (bo) {
4005 kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
4006 goto write;
4007 }
4008 }
4009
4010 if (size < 16384) {
4011 bo = list_first_entry(&kgem->pinned_batches[size > 4096],
4012 struct kgem_bo,
4013 list);
4014 list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
4015
4016 DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
4017 if (kgem_bo_wait(kgem, bo))
4018 return NULL;
4019
4020 kgem_retire(kgem);
4021 assert(bo->rq == NULL);
4022 bo = kgem_bo_reference(bo);
4023 goto write;
4024 }
4025 }
4026 #else
4027 if (kgem->surface != kgem->batch_size)
4028 size = kgem->batch_size * sizeof(uint32_t);
4029 else
4030 size = kgem->nbatch * sizeof(uint32_t);
4031 #endif
4032
4033 if (!kgem->batch_bo || !kgem->has_llc) {
4034 bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
4035 if (bo) {
4036 write:
4037 kgem_fixup_relocs(kgem, bo, shrink);
4038 if (kgem_batch_write(kgem, bo, size)) {
4039 kgem_bo_destroy(kgem, bo);
4040 return NULL;
4041 }
4042 return bo;
4043 }
4044 }
4045
4046 return kgem_new_batch(kgem);
4047 }
4048
4049 #if !NDEBUG
dump_file(const char * path)4050 static bool dump_file(const char *path)
4051 {
4052 FILE *file;
4053 size_t len = 0;
4054 char *line = NULL;
4055
4056 file = fopen(path, "r");
4057 if (file == NULL)
4058 return false;
4059
4060 while (getline(&line, &len, file) != -1)
4061 ErrorF("%s", line);
4062
4063 free(line);
4064 fclose(file);
4065 return true;
4066 }
4067
dump_debugfs(struct kgem * kgem,const char * name)4068 static void dump_debugfs(struct kgem *kgem, const char *name)
4069 {
4070 char path[80];
4071 int minor = kgem_get_minor(kgem);
4072
4073 if (minor < 0)
4074 return;
4075
4076 sprintf(path, "/sys/kernel/debug/dri/%d/%s", minor, name);
4077 if (dump_file(path))
4078 return;
4079
4080 sprintf(path, "/debug/dri/%d/%s", minor, name);
4081 if (dump_file(path))
4082 return;
4083 }
4084
dump_gtt_info(struct kgem * kgem)4085 static void dump_gtt_info(struct kgem *kgem)
4086 {
4087 dump_debugfs(kgem, "i915_gem_gtt");
4088 }
4089
dump_fence_regs(struct kgem * kgem)4090 static void dump_fence_regs(struct kgem *kgem)
4091 {
4092 dump_debugfs(kgem, "i915_gem_fence_regs");
4093 }
4094 #endif
4095
do_execbuf(struct kgem * kgem,struct drm_i915_gem_execbuffer2 * execbuf)4096 static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf)
4097 {
4098 int ret;
4099
4100 retry:
4101 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
4102 if (ret == 0)
4103 return 0;
4104
4105 DBG(("%s: failed ret=%d, throttling and discarding cache\n", __FUNCTION__, ret));
4106 (void)__kgem_throttle_retire(kgem, 0);
4107 if (kgem_expire_cache(kgem))
4108 goto retry;
4109
4110 if (kgem_cleanup_cache(kgem))
4111 goto retry;
4112
4113 /* last gasp */
4114 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
4115 if (ret != -ENOSPC)
4116 return ret;
4117
4118 /* One final trick up our sleeve for when we run out of space.
4119 * We turn everything off to free up our pinned framebuffers,
4120 * sprites and cursors, and try just one more time.
4121 */
4122
4123 xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
4124 "Failed to submit rendering commands, trying again with outputs disabled.\n");
4125
4126 if (sna_mode_disable(__to_sna(kgem))) {
4127 kgem_cleanup_cache(kgem);
4128 ret = do_ioctl(kgem->fd,
4129 DRM_IOCTL_I915_GEM_EXECBUFFER2,
4130 execbuf);
4131 DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret));
4132 sna_mode_enable(__to_sna(kgem));
4133 }
4134
4135 return ret;
4136 }
4137
_kgem_submit(struct kgem * kgem)4138 void _kgem_submit(struct kgem *kgem)
4139 {
4140 struct kgem_request *rq;
4141 uint32_t batch_end;
4142 int i, ret;
4143
4144 assert(!DBG_NO_HW);
4145 assert(!kgem->wedged);
4146
4147 assert(kgem->nbatch);
4148 assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
4149 assert(kgem->nbatch <= kgem->surface);
4150
4151 batch_end = kgem_end_batch(kgem);
4152 kgem_sna_flush(kgem);
4153
4154 DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n",
4155 kgem->mode, kgem->ring, kgem->batch_flags,
4156 batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
4157 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced));
4158
4159 assert(kgem->nbatch <= kgem->batch_size);
4160 assert(kgem->nbatch <= kgem->surface);
4161 assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
4162 assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
4163 assert(kgem->nfence <= kgem->fence_max);
4164
4165 kgem_finish_buffers(kgem);
4166
4167 #if SHOW_BATCH_BEFORE
4168 __kgem_batch_debug(kgem, batch_end);
4169 #endif
4170
4171 rq = kgem->next_request;
4172 assert(rq->bo == NULL);
4173
4174 rq->bo = kgem_create_batch(kgem);
4175 if (rq->bo) {
4176 struct drm_i915_gem_execbuffer2 execbuf;
4177
4178 assert(!rq->bo->needs_flush);
4179
4180 i = kgem->nexec++;
4181 kgem->exec[i].handle = rq->bo->handle;
4182 kgem->exec[i].relocation_count = kgem->nreloc;
4183 kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
4184 kgem->exec[i].alignment = 0;
4185 kgem->exec[i].offset = rq->bo->presumed_offset;
4186 /* Make sure the kernel releases any fence, ignored if gen4+ */
4187 kgem->exec[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4188 kgem->exec[i].rsvd1 = 0;
4189 kgem->exec[i].rsvd2 = 0;
4190
4191 rq->bo->exec = &kgem->exec[i];
4192 rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
4193 list_add(&rq->bo->request, &rq->buffers);
4194 rq->ring = kgem->ring == KGEM_BLT;
4195
4196 memset(&execbuf, 0, sizeof(execbuf));
4197 execbuf.buffers_ptr = (uintptr_t)kgem->exec;
4198 execbuf.buffer_count = kgem->nexec;
4199 if (kgem->gen < 030)
4200 execbuf.batch_len = batch_end*sizeof(uint32_t);
4201 execbuf.flags = kgem->ring | kgem->batch_flags;
4202
4203 if (DBG_DUMP) {
4204 int fd = open("/tmp/i915-batchbuffers.dump",
4205 O_WRONLY | O_CREAT | O_APPEND,
4206 0666);
4207 if (fd != -1) {
4208 ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
4209 fd = close(fd);
4210 }
4211 }
4212
4213 ret = do_execbuf(kgem, &execbuf);
4214 } else
4215 ret = -ENOMEM;
4216
4217 if (ret < 0) {
4218 kgem_throttle(kgem);
4219 if (!kgem->wedged) {
4220 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
4221 "Failed to submit rendering commands (%s), disabling acceleration.\n",
4222 strerror(-ret));
4223 __kgem_set_wedged(kgem);
4224 }
4225
4226 #if !NDEBUG
4227 ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
4228 kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
4229 kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
4230
4231 for (i = 0; i < kgem->nexec; i++) {
4232 struct kgem_bo *bo, *found = NULL;
4233
4234 list_for_each_entry(bo, &kgem->next_request->buffers, request) {
4235 if (bo->handle == kgem->exec[i].handle) {
4236 found = bo;
4237 break;
4238 }
4239 }
4240 ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
4241 i,
4242 kgem->exec[i].handle,
4243 (int)kgem->exec[i].offset,
4244 found ? kgem_bo_size(found) : -1,
4245 found ? found->tiling : -1,
4246 (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
4247 found ? found->snoop : -1,
4248 found ? found->purged : -1);
4249 }
4250 for (i = 0; i < kgem->nreloc; i++) {
4251 ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
4252 i,
4253 (int)kgem->reloc[i].offset,
4254 kgem->reloc[i].target_handle,
4255 kgem->reloc[i].delta,
4256 kgem->reloc[i].read_domains,
4257 kgem->reloc[i].write_domain,
4258 (int)kgem->reloc[i].presumed_offset);
4259 }
4260
4261 {
4262 struct drm_i915_gem_get_aperture aperture;
4263 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
4264 ErrorF("Aperture size %lld, available %lld\n",
4265 (long long)aperture.aper_size,
4266 (long long)aperture.aper_available_size);
4267 }
4268
4269 if (ret == -ENOSPC)
4270 dump_gtt_info(kgem);
4271 if (ret == -EDEADLK)
4272 dump_fence_regs(kgem);
4273
4274 if (DEBUG_SYNC) {
4275 int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
4276 if (fd != -1) {
4277 int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
4278 assert(ignored == batch_end*sizeof(uint32_t));
4279 close(fd);
4280 }
4281
4282 FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
4283 }
4284 #endif
4285 } else {
4286 if (DEBUG_SYNC) {
4287 struct drm_i915_gem_set_domain set_domain;
4288
4289 VG_CLEAR(set_domain);
4290 set_domain.handle = rq->bo->handle;
4291 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
4292 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
4293
4294 ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
4295 }
4296
4297 #if SHOW_BATCH_AFTER
4298 if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0)
4299 __kgem_batch_debug(kgem, batch_end);
4300 #endif
4301
4302 kgem_commit(kgem);
4303 }
4304
4305 if (unlikely(kgem->wedged))
4306 kgem_cleanup(kgem);
4307
4308 kgem_reset(kgem);
4309
4310 assert(kgem->next_request != NULL);
4311 }
4312
kgem_throttle(struct kgem * kgem)4313 void kgem_throttle(struct kgem *kgem)
4314 {
4315 if (unlikely(kgem->wedged))
4316 return;
4317
4318 if (__kgem_throttle(kgem, true)) {
4319 xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
4320 "Detected a hung GPU, disabling acceleration.\n");
4321 __kgem_set_wedged(kgem);
4322 kgem->need_throttle = false;
4323 }
4324 }
4325
kgem_is_wedged(struct kgem * kgem)4326 int kgem_is_wedged(struct kgem *kgem)
4327 {
4328 return __kgem_throttle(kgem, true);
4329 }
4330
kgem_purge_cache(struct kgem * kgem)4331 static void kgem_purge_cache(struct kgem *kgem)
4332 {
4333 struct kgem_bo *bo, *next;
4334 int i;
4335
4336 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4337 list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
4338 if (!kgem_bo_is_retained(kgem, bo)) {
4339 DBG(("%s: purging %d\n",
4340 __FUNCTION__, bo->handle));
4341 kgem_bo_free(kgem, bo);
4342 }
4343 }
4344 }
4345
4346 kgem->need_purge = false;
4347 }
4348
kgem_clean_scanout_cache(struct kgem * kgem)4349 void kgem_clean_scanout_cache(struct kgem *kgem)
4350 {
4351 while (!list_is_empty(&kgem->scanout)) {
4352 struct kgem_bo *bo;
4353
4354 bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
4355
4356 assert(bo->scanout);
4357 assert(!bo->refcnt);
4358 assert(!bo->prime);
4359 assert(bo->proxy == NULL);
4360
4361 if (bo->exec || __kgem_busy(kgem, bo->handle))
4362 break;
4363
4364 DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
4365 __FUNCTION__, bo->handle, bo->delta, bo->reusable));
4366 list_del(&bo->list);
4367
4368 kgem_bo_rmfb(kgem, bo);
4369 bo->scanout = false;
4370
4371 if (!bo->purged) {
4372 bo->reusable = true;
4373 if (kgem->has_llc &&
4374 !gem_set_caching(kgem->fd, bo->handle, SNOOPED))
4375 bo->reusable = false;
4376
4377 }
4378
4379 __kgem_bo_destroy(kgem, bo);
4380 }
4381 }
4382
kgem_clean_large_cache(struct kgem * kgem)4383 void kgem_clean_large_cache(struct kgem *kgem)
4384 {
4385 while (!list_is_empty(&kgem->large_inactive)) {
4386 kgem_bo_free(kgem,
4387 list_first_entry(&kgem->large_inactive,
4388 struct kgem_bo, list));
4389
4390 }
4391 }
4392
kgem_expire_cache(struct kgem * kgem)4393 bool kgem_expire_cache(struct kgem *kgem)
4394 {
4395 time_t now, expire;
4396 struct kgem_bo *bo;
4397 unsigned int size = 0, count = 0;
4398 bool idle;
4399 unsigned int i;
4400
4401 if (!time(&now))
4402 return false;
4403
4404 while (__kgem_freed_bo) {
4405 bo = __kgem_freed_bo;
4406 __kgem_freed_bo = *(struct kgem_bo **)bo;
4407 free(bo);
4408 }
4409
4410 while (__kgem_freed_request) {
4411 struct kgem_request *rq = __kgem_freed_request;
4412 __kgem_freed_request = *(struct kgem_request **)rq;
4413 free(rq);
4414 }
4415
4416 kgem_clean_large_cache(kgem);
4417 if (__to_sna(kgem)->scrn->vtSema)
4418 kgem_clean_scanout_cache(kgem);
4419
4420 expire = 0;
4421 list_for_each_entry(bo, &kgem->snoop, list) {
4422 if (bo->delta) {
4423 expire = now - MAX_INACTIVE_TIME/2;
4424 break;
4425 }
4426
4427 assert(now);
4428 bo->delta = now;
4429 }
4430 if (expire) {
4431 while (!list_is_empty(&kgem->snoop)) {
4432 bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
4433
4434 if (bo->delta > expire)
4435 break;
4436
4437 kgem_bo_free(kgem, bo);
4438 }
4439 }
4440 #ifdef DEBUG_MEMORY
4441 {
4442 long snoop_size = 0;
4443 int snoop_count = 0;
4444 list_for_each_entry(bo, &kgem->snoop, list)
4445 snoop_count++, snoop_size += bytes(bo);
4446 DBG(("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
4447 __FUNCTION__, snoop_count, snoop_size));
4448 }
4449 #endif
4450
4451 kgem_retire(kgem);
4452 if (unlikely(kgem->wedged))
4453 kgem_cleanup(kgem);
4454
4455 kgem->expire(kgem);
4456
4457 if (kgem->need_purge)
4458 kgem_purge_cache(kgem);
4459
4460 if (kgem->need_retire)
4461 kgem_retire(kgem);
4462
4463 expire = 0;
4464 idle = true;
4465 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4466 idle &= list_is_empty(&kgem->inactive[i]);
4467 list_for_each_entry(bo, &kgem->inactive[i], list) {
4468 if (bo->delta) {
4469 expire = now - MAX_INACTIVE_TIME;
4470 break;
4471 }
4472
4473 assert(now);
4474 kgem_bo_set_purgeable(kgem, bo);
4475 bo->delta = now;
4476 }
4477 }
4478 if (expire == 0) {
4479 DBG(("%s: idle? %d\n", __FUNCTION__, idle));
4480 kgem->need_expire = !idle;
4481 return false;
4482 }
4483
4484 idle = true;
4485 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4486 struct list preserve;
4487
4488 list_init(&preserve);
4489 while (!list_is_empty(&kgem->inactive[i])) {
4490 bo = list_last_entry(&kgem->inactive[i],
4491 struct kgem_bo, list);
4492
4493 if (bo->delta > expire) {
4494 idle = false;
4495 break;
4496 }
4497
4498 if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) {
4499 idle = false;
4500 list_move_tail(&bo->list, &preserve);
4501 } else {
4502 count++;
4503 size += bytes(bo);
4504 kgem_bo_free(kgem, bo);
4505 DBG(("%s: expiring handle=%d\n",
4506 __FUNCTION__, bo->handle));
4507 }
4508 }
4509 list_splice_tail(&preserve, &kgem->inactive[i]);
4510 }
4511
4512 #ifdef DEBUG_MEMORY
4513 {
4514 long inactive_size = 0;
4515 int inactive_count = 0;
4516 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
4517 list_for_each_entry(bo, &kgem->inactive[i], list)
4518 inactive_count++, inactive_size += bytes(bo);
4519 DBG(("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
4520 __FUNCTION__, inactive_count, inactive_size));
4521 }
4522 #endif
4523
4524 DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
4525 __FUNCTION__, count, size, idle));
4526
4527 kgem->need_expire = !idle;
4528 return count;
4529 (void)count;
4530 (void)size;
4531 }
4532
kgem_cleanup_cache(struct kgem * kgem)4533 bool kgem_cleanup_cache(struct kgem *kgem)
4534 {
4535 unsigned int i;
4536 int n;
4537
4538 DBG(("%s\n", __FUNCTION__));
4539
4540 /* sync to the most recent request */
4541 for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
4542 if (!list_is_empty(&kgem->requests[n])) {
4543 struct kgem_request *rq;
4544
4545 rq = list_last_entry(&kgem->requests[n],
4546 struct kgem_request,
4547 list);
4548
4549 DBG(("%s: sync on cleanup\n", __FUNCTION__));
4550 assert(rq->ring == n);
4551 assert(rq->bo);
4552 assert(RQ(rq->bo->rq) == rq);
4553 kgem_bo_wait(kgem, rq->bo);
4554 }
4555 assert(list_is_empty(&kgem->requests[n]));
4556 }
4557
4558 kgem_retire(kgem);
4559 kgem_cleanup(kgem);
4560
4561 DBG(("%s: need_expire?=%d\n", __FUNCTION__, kgem->need_expire));
4562 if (!kgem->need_expire)
4563 return false;
4564
4565 for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
4566 while (!list_is_empty(&kgem->inactive[i]))
4567 kgem_bo_free(kgem,
4568 list_last_entry(&kgem->inactive[i],
4569 struct kgem_bo, list));
4570 }
4571
4572 kgem_clean_large_cache(kgem);
4573 kgem_clean_scanout_cache(kgem);
4574
4575 while (!list_is_empty(&kgem->snoop))
4576 kgem_bo_free(kgem,
4577 list_last_entry(&kgem->snoop,
4578 struct kgem_bo, list));
4579
4580 while (__kgem_freed_bo) {
4581 struct kgem_bo *bo = __kgem_freed_bo;
4582 __kgem_freed_bo = *(struct kgem_bo **)bo;
4583 free(bo);
4584 }
4585
4586 kgem->need_purge = false;
4587 kgem->need_expire = false;
4588
4589 DBG(("%s: complete\n", __FUNCTION__));
4590 return true;
4591 }
4592
4593 static struct kgem_bo *
search_linear_cache(struct kgem * kgem,unsigned int num_pages,unsigned flags)4594 search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
4595 {
4596 struct kgem_bo *bo, *first = NULL;
4597 bool use_active = (flags & CREATE_INACTIVE) == 0;
4598 struct list *cache;
4599
4600 DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
4601 __FUNCTION__, num_pages, flags, use_active,
4602 num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
4603 MAX_CACHE_SIZE / PAGE_SIZE));
4604
4605 assert(num_pages);
4606
4607 if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
4608 DBG(("%s: searching large buffers\n", __FUNCTION__));
4609 retry_large:
4610 cache = use_active ? &kgem->large : &kgem->large_inactive;
4611 list_for_each_entry_safe(bo, first, cache, list) {
4612 assert(bo->refcnt == 0);
4613 assert(bo->reusable);
4614 assert(!bo->scanout);
4615
4616 if (num_pages > num_pages(bo))
4617 goto discard;
4618
4619 if (bo->tiling != I915_TILING_NONE) {
4620 if (use_active && kgem->gen < 040)
4621 goto discard;
4622
4623 if (!kgem_set_tiling(kgem, bo,
4624 I915_TILING_NONE, 0))
4625 goto discard;
4626 }
4627 assert(bo->tiling == I915_TILING_NONE);
4628 bo->pitch = 0;
4629
4630 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
4631 goto discard;
4632
4633 list_del(&bo->list);
4634 if (RQ(bo->rq) == (void *)kgem) {
4635 assert(bo->exec == NULL);
4636 list_del(&bo->request);
4637 }
4638
4639 bo->delta = 0;
4640 assert_tiling(kgem, bo);
4641 return bo;
4642
4643 discard:
4644 if (!use_active)
4645 kgem_bo_free(kgem, bo);
4646 }
4647
4648 if (use_active) {
4649 use_active = false;
4650 goto retry_large;
4651 }
4652
4653 if (__kgem_throttle_retire(kgem, flags))
4654 goto retry_large;
4655
4656 return NULL;
4657 }
4658
4659 if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
4660 DBG(("%s: inactive and cache bucket empty\n",
4661 __FUNCTION__));
4662
4663 if (flags & CREATE_NO_RETIRE) {
4664 DBG(("%s: can not retire\n", __FUNCTION__));
4665 return NULL;
4666 }
4667
4668 if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
4669 DBG(("%s: active cache bucket empty\n", __FUNCTION__));
4670 return NULL;
4671 }
4672
4673 if (!__kgem_throttle_retire(kgem, flags)) {
4674 DBG(("%s: nothing retired\n", __FUNCTION__));
4675 return NULL;
4676 }
4677
4678 if (list_is_empty(inactive(kgem, num_pages))) {
4679 DBG(("%s: active cache bucket still empty after retire\n",
4680 __FUNCTION__));
4681 return NULL;
4682 }
4683 }
4684
4685 if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4686 int for_cpu = !!(flags & CREATE_CPU_MAP);
4687 DBG(("%s: searching for inactive %s map\n",
4688 __FUNCTION__, for_cpu ? "cpu" : "gtt"));
4689 cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
4690 list_for_each_entry(bo, cache, vma) {
4691 assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
4692 assert(bucket(bo) == cache_bucket(num_pages));
4693 assert(bo->proxy == NULL);
4694 assert(bo->rq == NULL);
4695 assert(bo->exec == NULL);
4696 assert(!bo->scanout);
4697
4698 if (num_pages > num_pages(bo)) {
4699 DBG(("inactive too small: %d < %d\n",
4700 num_pages(bo), num_pages));
4701 continue;
4702 }
4703
4704 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4705 kgem_bo_free(kgem, bo);
4706 break;
4707 }
4708
4709 if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0)) {
4710 kgem_bo_free(kgem, bo);
4711 break;
4712 }
4713
4714 kgem_bo_remove_from_inactive(kgem, bo);
4715 assert(list_is_empty(&bo->vma));
4716 assert(list_is_empty(&bo->list));
4717
4718 assert(bo->tiling == I915_TILING_NONE);
4719 assert(bo->pitch == 0);
4720 bo->delta = 0;
4721 DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n",
4722 __FUNCTION__, bo->handle, num_pages(bo)));
4723 assert(use_active || bo->domain != DOMAIN_GPU);
4724 assert(!bo->needs_flush);
4725 assert_tiling(kgem, bo);
4726 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4727 return bo;
4728 }
4729
4730 if (flags & CREATE_EXACT)
4731 return NULL;
4732
4733 if (flags & CREATE_CPU_MAP && !kgem->has_llc)
4734 return NULL;
4735 }
4736
4737 cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
4738 list_for_each_entry(bo, cache, list) {
4739 assert(bo->refcnt == 0);
4740 assert(bo->reusable);
4741 assert(!!bo->rq == !!use_active);
4742 assert(bo->proxy == NULL);
4743 assert(!bo->scanout);
4744
4745 if (num_pages > num_pages(bo))
4746 continue;
4747
4748 if (use_active &&
4749 kgem->gen <= 040 &&
4750 bo->tiling != I915_TILING_NONE)
4751 continue;
4752
4753 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
4754 kgem_bo_free(kgem, bo);
4755 break;
4756 }
4757
4758 if (I915_TILING_NONE != bo->tiling) {
4759 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
4760 continue;
4761
4762 if (first)
4763 continue;
4764
4765 if (!kgem_set_tiling(kgem, bo, I915_TILING_NONE, 0))
4766 continue;
4767 }
4768 assert(bo->tiling == I915_TILING_NONE);
4769 bo->pitch = 0;
4770
4771 if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
4772 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4773 int for_cpu = !!(flags & CREATE_CPU_MAP);
4774 if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
4775 if (first != NULL)
4776 break;
4777
4778 first = bo;
4779 continue;
4780 }
4781 } else {
4782 if (first != NULL)
4783 break;
4784
4785 first = bo;
4786 continue;
4787 }
4788 } else {
4789 if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo))
4790 continue;
4791
4792 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
4793 if (first != NULL)
4794 break;
4795
4796 first = bo;
4797 continue;
4798 }
4799 }
4800
4801 if (use_active)
4802 kgem_bo_remove_from_active(kgem, bo);
4803 else
4804 kgem_bo_remove_from_inactive(kgem, bo);
4805
4806 assert(bo->tiling == I915_TILING_NONE);
4807 assert(bo->pitch == 0);
4808 bo->delta = 0;
4809 DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n",
4810 __FUNCTION__, bo->handle, num_pages(bo),
4811 use_active ? "active" : "inactive"));
4812 assert(list_is_empty(&bo->list));
4813 assert(list_is_empty(&bo->vma));
4814 assert(use_active || bo->domain != DOMAIN_GPU);
4815 assert(!bo->needs_flush || use_active);
4816 assert_tiling(kgem, bo);
4817 ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
4818 return bo;
4819 }
4820
4821 if (first) {
4822 assert(first->tiling == I915_TILING_NONE);
4823
4824 if (use_active)
4825 kgem_bo_remove_from_active(kgem, first);
4826 else
4827 kgem_bo_remove_from_inactive(kgem, first);
4828
4829 first->pitch = 0;
4830 first->delta = 0;
4831 DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
4832 __FUNCTION__, first->handle, num_pages(first),
4833 use_active ? "active" : "inactive"));
4834 assert(list_is_empty(&first->list));
4835 assert(list_is_empty(&first->vma));
4836 assert(use_active || first->domain != DOMAIN_GPU);
4837 assert(!first->needs_flush || use_active);
4838 ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
4839 return first;
4840 }
4841
4842 return NULL;
4843 }
4844
kgem_create_for_name(struct kgem * kgem,uint32_t name)4845 struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
4846 {
4847 struct drm_gem_open open_arg;
4848 struct drm_i915_gem_get_tiling tiling;
4849 struct kgem_bo *bo;
4850
4851 DBG(("%s(name=%d)\n", __FUNCTION__, name));
4852
4853 VG_CLEAR(open_arg);
4854 open_arg.name = name;
4855 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg))
4856 return NULL;
4857
4858 DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
4859
4860 VG_CLEAR(tiling);
4861 tiling.handle = open_arg.handle;
4862 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4863 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4864 gem_close(kgem->fd, open_arg.handle);
4865 return NULL;
4866 }
4867
4868 DBG(("%s: handle=%d, tiling=%d\n", __FUNCTION__, tiling.handle, tiling.tiling_mode));
4869
4870 bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE);
4871 if (bo == NULL) {
4872 gem_close(kgem->fd, open_arg.handle);
4873 return NULL;
4874 }
4875
4876 bo->unique_id = kgem_get_unique_id(kgem);
4877 bo->tiling = tiling.tiling_mode;
4878 bo->prime = true;
4879 bo->reusable = false;
4880 kgem_bo_unclean(kgem, bo);
4881
4882 debug_alloc__bo(kgem, bo);
4883 return bo;
4884 }
4885
kgem_create_for_prime(struct kgem * kgem,int name,uint32_t size)4886 struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size)
4887 {
4888 #ifdef DRM_IOCTL_PRIME_FD_TO_HANDLE
4889 struct drm_prime_handle args;
4890 struct drm_i915_gem_get_tiling tiling;
4891 struct local_i915_gem_caching caching;
4892 struct kgem_bo *bo;
4893 off_t seek;
4894
4895 DBG(("%s(name=%d)\n", __FUNCTION__, name));
4896
4897 VG_CLEAR(args);
4898 args.fd = name;
4899 args.flags = 0;
4900 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args)) {
4901 DBG(("%s(name=%d) fd-to-handle failed, ret=%d\n", __FUNCTION__, name, errno));
4902 return NULL;
4903 }
4904
4905 VG_CLEAR(tiling);
4906 tiling.handle = args.handle;
4907 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) {
4908 DBG(("%s(name=%d) get-tiling failed, ret=%d\n", __FUNCTION__, name, errno));
4909 gem_close(kgem->fd, args.handle);
4910 return NULL;
4911 }
4912
4913 /* Query actual size, overriding specified if available */
4914 seek = lseek(args.fd, 0, SEEK_END);
4915 DBG(("%s: estimated size=%ld, actual=%lld\n",
4916 __FUNCTION__, (long)size, (long long)seek));
4917 if (seek != -1) {
4918 if (size > seek) {
4919 DBG(("%s(name=%d) estimated required size [%d] is larger than actual [%ld]\n", __FUNCTION__, name, size, (long)seek));
4920 gem_close(kgem->fd, args.handle);
4921 return NULL;
4922 }
4923 size = seek;
4924 }
4925
4926 DBG(("%s: new handle=%d, tiling=%d\n", __FUNCTION__,
4927 args.handle, tiling.tiling_mode));
4928 bo = __kgem_bo_alloc(args.handle, NUM_PAGES(size));
4929 if (bo == NULL) {
4930 gem_close(kgem->fd, args.handle);
4931 return NULL;
4932 }
4933
4934 bo->unique_id = kgem_get_unique_id(kgem);
4935 bo->tiling = tiling.tiling_mode;
4936 bo->reusable = false;
4937 bo->prime = true;
4938 bo->domain = DOMAIN_NONE;
4939
4940 /* is this a special bo (e.g. scanout or CPU coherent)? */
4941
4942 VG_CLEAR(caching);
4943 caching.handle = args.handle;
4944 caching.caching = kgem->has_llc;
4945 (void)drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_CACHING, &caching);
4946 DBG(("%s: imported handle=%d has caching %d\n", __FUNCTION__, args.handle, caching.caching));
4947 switch (caching.caching) {
4948 case 0:
4949 if (kgem->has_llc) {
4950 DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4951 __FUNCTION__, args.handle));
4952 bo->scanout = true;
4953 }
4954 break;
4955 case 1:
4956 if (!kgem->has_llc) {
4957 DBG(("%s: interpreting handle=%d as a foreign snooped buffer\n",
4958 __FUNCTION__, args.handle));
4959 bo->snoop = true;
4960 if (bo->tiling) {
4961 DBG(("%s: illegal snooped tiled buffer\n", __FUNCTION__));
4962 kgem_bo_free(kgem, bo);
4963 return NULL;
4964 }
4965 }
4966 break;
4967 case 2:
4968 DBG(("%s: interpreting handle=%d as a foreign scanout\n",
4969 __FUNCTION__, args.handle));
4970 bo->scanout = true;
4971 break;
4972 }
4973
4974 debug_alloc__bo(kgem, bo);
4975 return bo;
4976 #else
4977 return NULL;
4978 #endif
4979 }
4980
kgem_bo_export_to_prime(struct kgem * kgem,struct kgem_bo * bo)4981 int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo)
4982 {
4983 #if defined(DRM_IOCTL_PRIME_HANDLE_TO_FD) && defined(O_CLOEXEC)
4984 struct drm_prime_handle args;
4985
4986 assert(kgem_bo_is_fenced(kgem, bo));
4987
4988 VG_CLEAR(args);
4989 args.handle = bo->handle;
4990 args.flags = O_CLOEXEC;
4991
4992 if (do_ioctl(kgem->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args))
4993 return -1;
4994
4995 bo->reusable = false;
4996 return args.fd;
4997 #else
4998 return -1;
4999 #endif
5000 }
5001
kgem_create_linear(struct kgem * kgem,int size,unsigned flags)5002 struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
5003 {
5004 struct kgem_bo *bo;
5005 uint32_t handle;
5006
5007 DBG(("%s(%d)\n", __FUNCTION__, size));
5008 assert(size);
5009
5010 if (flags & CREATE_GTT_MAP && kgem->has_llc) {
5011 flags &= ~CREATE_GTT_MAP;
5012 flags |= CREATE_CPU_MAP;
5013 }
5014
5015 size = NUM_PAGES(size);
5016 if ((flags & CREATE_UNCACHED) == 0) {
5017 bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
5018 if (bo) {
5019 assert(!bo->purged);
5020 assert(!bo->delta);
5021 assert(bo->domain != DOMAIN_GPU);
5022 ASSERT_IDLE(kgem, bo->handle);
5023 bo->refcnt = 1;
5024 return bo;
5025 }
5026
5027 if (flags & CREATE_CACHED)
5028 return NULL;
5029 }
5030
5031 handle = gem_create(kgem->fd, size);
5032 if (handle == 0)
5033 return NULL;
5034
5035 DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
5036 bo = __kgem_bo_alloc(handle, size);
5037 if (bo == NULL) {
5038 gem_close(kgem->fd, handle);
5039 return NULL;
5040 }
5041
5042 debug_alloc__bo(kgem, bo);
5043 return bo;
5044 }
5045
kgem_choose_tiling(struct kgem * kgem,int tiling,int width,int height,int bpp)5046 int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp)
5047 {
5048 if (DBG_NO_TILING)
5049 return tiling < 0 ? tiling : I915_TILING_NONE;
5050
5051 if (kgem->gen < 040) {
5052 if (tiling && width * bpp > 8192 * 8) {
5053 DBG(("%s: pitch too large for tliing [%d]\n",
5054 __FUNCTION__, width*bpp/8));
5055 tiling = I915_TILING_NONE;
5056 goto done;
5057 }
5058 } else {
5059 if (width*bpp > (MAXSHORT-512) * 8) {
5060 if (tiling > 0)
5061 tiling = -tiling;
5062 else if (tiling == 0)
5063 tiling = -I915_TILING_X;
5064 DBG(("%s: large pitch [%d], forcing TILING [%d]\n",
5065 __FUNCTION__, width*bpp/8, tiling));
5066 } else if (tiling && (width|height) > 8192) {
5067 DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
5068 __FUNCTION__, width, height));
5069 tiling = -I915_TILING_X;
5070 }
5071
5072 /* fences limited to 128k (256k on ivb) */
5073 assert(width * bpp <= 128 * 1024 * 8);
5074 }
5075
5076 if (tiling < 0)
5077 return tiling;
5078
5079 if (tiling == I915_TILING_Y && !kgem->can_render_y)
5080 tiling = I915_TILING_X;
5081
5082 if (tiling && (height == 1 || width == 1)) {
5083 DBG(("%s: disabling tiling [%dx%d] for single row/col\n",
5084 __FUNCTION__,width, height));
5085 tiling = I915_TILING_NONE;
5086 goto done;
5087 }
5088 if (tiling == I915_TILING_Y && height <= 16) {
5089 DBG(("%s: too short [%d] for TILING_Y\n",
5090 __FUNCTION__,height));
5091 tiling = I915_TILING_X;
5092 }
5093 if (tiling && width * bpp > 8 * (4096 - 64)) {
5094 DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
5095 __FUNCTION__,
5096 width, height, width*bpp/8,
5097 tiling));
5098 return -tiling;
5099 }
5100 if (tiling == I915_TILING_X && height < 4) {
5101 DBG(("%s: too short [%d] for TILING_X\n",
5102 __FUNCTION__, height));
5103 tiling = I915_TILING_NONE;
5104 goto done;
5105 }
5106
5107 if (tiling == I915_TILING_X && width * bpp <= 8*512) {
5108 DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
5109 __FUNCTION__, width, bpp));
5110 tiling = I915_TILING_NONE;
5111 goto done;
5112 }
5113 if (tiling == I915_TILING_Y && width * bpp < 8*128) {
5114 DBG(("%s: too thin [%d] for TILING_Y\n",
5115 __FUNCTION__, width));
5116 tiling = I915_TILING_NONE;
5117 goto done;
5118 }
5119
5120 if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
5121 DBG(("%s: too small [%d bytes] for TILING_%c\n", __FUNCTION__,
5122 ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
5123 tiling == I915_TILING_X ? 'X' : 'Y'));
5124 tiling = I915_TILING_NONE;
5125 goto done;
5126 }
5127
5128 if (tiling && width * bpp >= 8 * 4096 / 2) {
5129 DBG(("%s: TLB near-miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
5130 __FUNCTION__,
5131 width, height, width*bpp/8,
5132 tiling));
5133 return -tiling;
5134 }
5135
5136 done:
5137 DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
5138 return tiling;
5139 }
5140
bits_per_pixel(int depth)5141 static int bits_per_pixel(int depth)
5142 {
5143 switch (depth) {
5144 case 8: return 8;
5145 case 15:
5146 case 16: return 16;
5147 case 24:
5148 case 30:
5149 case 32: return 32;
5150 default: return 0;
5151 }
5152 }
5153
kgem_can_create_2d(struct kgem * kgem,int width,int height,int depth)5154 unsigned kgem_can_create_2d(struct kgem *kgem,
5155 int width, int height, int depth)
5156 {
5157 uint32_t pitch, size;
5158 unsigned flags = 0;
5159 int tiling;
5160 int bpp;
5161
5162 DBG(("%s: %dx%d @ %d\n", __FUNCTION__, width, height, depth));
5163
5164 bpp = bits_per_pixel(depth);
5165 if (bpp == 0) {
5166 DBG(("%s: unhandled depth %d\n", __FUNCTION__, depth));
5167 return 0;
5168 }
5169
5170 if (width > MAXSHORT || height > MAXSHORT) {
5171 DBG(("%s: unhandled size %dx%d\n",
5172 __FUNCTION__, width, height));
5173 return 0;
5174 }
5175
5176 size = kgem_surface_size(kgem, false, 0,
5177 width, height, bpp,
5178 I915_TILING_NONE, &pitch);
5179 DBG(("%s: untiled size=%d\n", __FUNCTION__, size));
5180 if (size > 0) {
5181 if (size <= kgem->max_cpu_size)
5182 flags |= KGEM_CAN_CREATE_CPU;
5183 if (size > 4096 && size <= kgem->max_gpu_size)
5184 flags |= KGEM_CAN_CREATE_GPU;
5185 if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
5186 flags |= KGEM_CAN_CREATE_GTT;
5187 if (size > kgem->large_object_size)
5188 flags |= KGEM_CAN_CREATE_LARGE;
5189 if (size > kgem->max_object_size) {
5190 DBG(("%s: too large (untiled) %d > %d\n",
5191 __FUNCTION__, size, kgem->max_object_size));
5192 return 0;
5193 }
5194 }
5195
5196 tiling = kgem_choose_tiling(kgem, I915_TILING_X,
5197 width, height, bpp);
5198 if (tiling != I915_TILING_NONE) {
5199 size = kgem_surface_size(kgem, false, 0,
5200 width, height, bpp, tiling,
5201 &pitch);
5202 DBG(("%s: tiled[%d] size=%d\n", __FUNCTION__, tiling, size));
5203 if (size > 0 && size <= kgem->max_gpu_size)
5204 flags |= KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
5205 if (size > 0 && size <= PAGE_SIZE*kgem->aperture_mappable/4)
5206 flags |= KGEM_CAN_CREATE_GTT;
5207 if (size > PAGE_SIZE*kgem->aperture_mappable/4)
5208 flags &= ~KGEM_CAN_CREATE_GTT;
5209 if (size > kgem->large_object_size)
5210 flags |= KGEM_CAN_CREATE_LARGE;
5211 if (size > kgem->max_object_size) {
5212 DBG(("%s: too large (tiled) %d > %d\n",
5213 __FUNCTION__, size, kgem->max_object_size));
5214 return 0;
5215 }
5216 if (kgem->gen < 040) {
5217 int fence_size = 1024 * 1024;
5218 while (fence_size < size)
5219 fence_size <<= 1;
5220 if (fence_size > kgem->max_gpu_size)
5221 flags &= ~KGEM_CAN_CREATE_GPU | KGEM_CAN_CREATE_TILED;
5222 if (fence_size > PAGE_SIZE*kgem->aperture_fenceable/4)
5223 flags &= ~KGEM_CAN_CREATE_GTT;
5224 }
5225 }
5226
5227 return flags;
5228 }
5229
kgem_bo_fenced_size(struct kgem * kgem,struct kgem_bo * bo)5230 inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
5231 {
5232 unsigned int size;
5233
5234 assert(bo->tiling);
5235 assert_tiling(kgem, bo);
5236 assert(kgem->gen < 040);
5237
5238 if (kgem->gen < 030)
5239 size = 512 * 1024 / PAGE_SIZE;
5240 else
5241 size = 1024 * 1024 / PAGE_SIZE;
5242 while (size < num_pages(bo))
5243 size <<= 1;
5244
5245 return size;
5246 }
5247
5248 static struct kgem_bo *
__kgem_bo_create_as_display(struct kgem * kgem,int size,int tiling,int pitch)5249 __kgem_bo_create_as_display(struct kgem *kgem, int size, int tiling, int pitch)
5250 {
5251 struct local_i915_gem_create2 args;
5252 struct kgem_bo *bo;
5253
5254 if (!kgem->has_create2)
5255 return NULL;
5256
5257 memset(&args, 0, sizeof(args));
5258 args.size = size * PAGE_SIZE;
5259 args.placement = LOCAL_I915_CREATE_PLACEMENT_STOLEN;
5260 args.caching = DISPLAY;
5261 args.tiling_mode = tiling;
5262 args.stride = pitch;
5263
5264 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args)) {
5265 args.placement = LOCAL_I915_CREATE_PLACEMENT_SYSTEM;
5266 if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args))
5267 return NULL;
5268 }
5269
5270 bo = __kgem_bo_alloc(args.handle, size);
5271 if (bo == NULL) {
5272 gem_close(kgem->fd, args.handle);
5273 return NULL;
5274 }
5275
5276 bo->unique_id = kgem_get_unique_id(kgem);
5277 bo->tiling = tiling;
5278 bo->pitch = pitch;
5279 if (args.placement == LOCAL_I915_CREATE_PLACEMENT_STOLEN) {
5280 bo->purged = true; /* for asserts against CPU access */
5281 }
5282 bo->reusable = false; /* so that unclaimed scanouts are freed */
5283 bo->domain = DOMAIN_NONE;
5284
5285 if (__kgem_busy(kgem, bo->handle)) {
5286 assert(bo->exec == NULL);
5287 list_add(&bo->request, &kgem->flushing);
5288 bo->rq = (void *)kgem;
5289 kgem->need_retire = true;
5290 }
5291
5292 assert_tiling(kgem, bo);
5293 debug_alloc__bo(kgem, bo);
5294
5295 return bo;
5296 }
5297
__kgem_bo_make_scanout(struct kgem * kgem,struct kgem_bo * bo,int width,int height)5298 static void __kgem_bo_make_scanout(struct kgem *kgem,
5299 struct kgem_bo *bo,
5300 int width, int height)
5301 {
5302 ScrnInfoPtr scrn = __to_sna(kgem)->scrn;
5303 struct drm_mode_fb_cmd arg;
5304
5305 assert(bo->proxy == NULL);
5306
5307 if (!scrn->vtSema)
5308 return;
5309
5310 DBG(("%s: create fb %dx%d@%d/%d\n",
5311 __FUNCTION__, width, height, scrn->depth, scrn->bitsPerPixel));
5312
5313 VG_CLEAR(arg);
5314 arg.width = width;
5315 arg.height = height;
5316 arg.pitch = bo->pitch;
5317 arg.bpp = scrn->bitsPerPixel;
5318 arg.depth = scrn->depth;
5319 arg.handle = bo->handle;
5320
5321 /* First move the scanout out of cached memory */
5322 if (kgem->has_llc) {
5323 if (!gem_set_caching(kgem->fd, bo->handle, DISPLAY) &&
5324 !gem_set_caching(kgem->fd, bo->handle, UNCACHED))
5325 return;
5326 }
5327
5328 bo->scanout = true;
5329
5330 /* Then pre-emptively move the object into the mappable
5331 * portion to avoid rebinding later when busy.
5332 */
5333 if (bo->map__gtt == NULL)
5334 bo->map__gtt = __kgem_bo_map__gtt(kgem, bo);
5335 if (bo->map__gtt) {
5336 if (sigtrap_get() == 0) {
5337 *(uint32_t *)bo->map__gtt = 0;
5338 sigtrap_put();
5339 }
5340 bo->domain = DOMAIN_GTT;
5341 }
5342
5343 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg) == 0) {
5344 DBG(("%s: attached fb=%d to handle=%d\n",
5345 __FUNCTION__, arg.fb_id, arg.handle));
5346 bo->delta = arg.fb_id;
5347 }
5348 }
5349
tiling_changed(struct kgem_bo * bo,int tiling,int pitch)5350 static bool tiling_changed(struct kgem_bo *bo, int tiling, int pitch)
5351 {
5352 if (tiling != bo->tiling)
5353 return true;
5354
5355 return tiling != I915_TILING_NONE && pitch != bo->pitch;
5356 }
5357
set_gpu_tiling(struct kgem * kgem,struct kgem_bo * bo,int tiling,int pitch)5358 static void set_gpu_tiling(struct kgem *kgem,
5359 struct kgem_bo *bo,
5360 int tiling, int pitch)
5361 {
5362 DBG(("%s: handle=%d, tiling=%d, pitch=%d\n",
5363 __FUNCTION__, bo->handle, tiling, pitch));
5364
5365 if (tiling_changed(bo, tiling, pitch) && bo->map__gtt) {
5366 if (!list_is_empty(&bo->vma)) {
5367 list_del(&bo->vma);
5368 kgem->vma[0].count--;
5369 }
5370 munmap(bo->map__gtt, bytes(bo));
5371 bo->map__gtt = NULL;
5372 }
5373
5374 bo->tiling = tiling;
5375 bo->pitch = pitch;
5376 }
5377
kgem_bo_is_fenced(struct kgem * kgem,struct kgem_bo * bo)5378 bool kgem_bo_is_fenced(struct kgem *kgem, struct kgem_bo *bo)
5379 {
5380 struct drm_i915_gem_get_tiling tiling;
5381
5382 assert(kgem);
5383 assert(bo);
5384
5385 VG_CLEAR(tiling);
5386 tiling.handle = bo->handle;
5387 tiling.tiling_mode = bo->tiling;
5388 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
5389 return tiling.tiling_mode == bo->tiling; /* assume pitch is fine! */
5390 }
5391
kgem_create_2d(struct kgem * kgem,int width,int height,int bpp,int tiling,uint32_t flags)5392 struct kgem_bo *kgem_create_2d(struct kgem *kgem,
5393 int width,
5394 int height,
5395 int bpp,
5396 int tiling,
5397 uint32_t flags)
5398 {
5399 struct list *cache;
5400 struct kgem_bo *bo;
5401 uint32_t pitch, tiled_height, size;
5402 uint32_t handle;
5403 int i, bucket, retry;
5404 bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
5405
5406 if (tiling < 0)
5407 exact = true, tiling = -tiling;
5408
5409 DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
5410 width, height, bpp, tiling, exact,
5411 !!(flags & CREATE_INACTIVE),
5412 !!(flags & CREATE_CPU_MAP),
5413 !!(flags & CREATE_GTT_MAP),
5414 !!(flags & CREATE_SCANOUT),
5415 !!(flags & CREATE_PRIME),
5416 !!(flags & CREATE_TEMPORARY)));
5417
5418 size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
5419 width, height, bpp, tiling, &pitch);
5420 if (size == 0) {
5421 DBG(("%s: invalid surface size (too large?)\n", __FUNCTION__));
5422 return NULL;
5423 }
5424
5425 size /= PAGE_SIZE;
5426 bucket = cache_bucket(size);
5427
5428 if (flags & CREATE_SCANOUT) {
5429 struct kgem_bo *last = NULL;
5430
5431 list_for_each_entry_reverse(bo, &kgem->scanout, list) {
5432 assert(bo->scanout);
5433 assert(!bo->flush);
5434 assert(!bo->refcnt);
5435 assert_tiling(kgem, bo);
5436
5437 if (size > num_pages(bo) || num_pages(bo) > 2*size)
5438 continue;
5439
5440 if (bo->tiling != tiling || bo->pitch != pitch)
5441 /* No tiling/pitch without recreating fb */
5442 continue;
5443
5444 if (bo->delta && !check_scanout_size(kgem, bo, width, height))
5445 kgem_bo_rmfb(kgem, bo);
5446
5447 if (flags & CREATE_INACTIVE && bo->rq) {
5448 last = bo;
5449 continue;
5450 }
5451
5452 list_del(&bo->list);
5453
5454 bo->unique_id = kgem_get_unique_id(kgem);
5455 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5456 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5457 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5458 assert_tiling(kgem, bo);
5459 bo->refcnt = 1;
5460 return bo;
5461 }
5462
5463 if (last) {
5464 list_del(&last->list);
5465
5466 last->unique_id = kgem_get_unique_id(kgem);
5467 DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5468 last->pitch, last->tiling, last->handle, last->unique_id));
5469 assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
5470 assert_tiling(kgem, last);
5471 last->refcnt = 1;
5472 return last;
5473 }
5474
5475 if (__to_sna(kgem)->scrn->vtSema) {
5476 ScrnInfoPtr scrn = __to_sna(kgem)->scrn;
5477
5478 list_for_each_entry_reverse(bo, &kgem->scanout, list) {
5479 struct drm_mode_fb_cmd arg;
5480
5481 assert(bo->scanout);
5482 assert(!bo->refcnt);
5483
5484 if (size > num_pages(bo) || num_pages(bo) > 2*size)
5485 continue;
5486
5487 if (flags & CREATE_INACTIVE && bo->rq)
5488 continue;
5489
5490 list_del(&bo->list);
5491
5492 if (bo->tiling != tiling || bo->pitch != pitch) {
5493 if (bo->delta) {
5494 kgem_bo_rmfb(kgem, bo);
5495 bo->delta = 0;
5496 }
5497
5498 if (!kgem_set_tiling(kgem, bo,
5499 tiling, pitch)) {
5500 bo->scanout = false;
5501 __kgem_bo_destroy(kgem, bo);
5502 break;
5503 }
5504 }
5505
5506 VG_CLEAR(arg);
5507 arg.width = width;
5508 arg.height = height;
5509 arg.pitch = bo->pitch;
5510 arg.bpp = scrn->bitsPerPixel;
5511 arg.depth = scrn->depth;
5512 arg.handle = bo->handle;
5513
5514 if (do_ioctl(kgem->fd, DRM_IOCTL_MODE_ADDFB, &arg)) {
5515 bo->scanout = false;
5516 __kgem_bo_destroy(kgem, bo);
5517 break;
5518 }
5519
5520 bo->delta = arg.fb_id;
5521 bo->unique_id = kgem_get_unique_id(kgem);
5522
5523 DBG((" 2:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5524 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5525 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5526 assert_tiling(kgem, bo);
5527 bo->refcnt = 1;
5528 return bo;
5529 }
5530 }
5531
5532 if (flags & CREATE_CACHED)
5533 return NULL;
5534
5535 bo = __kgem_bo_create_as_display(kgem, size, tiling, pitch);
5536 if (bo)
5537 return bo;
5538
5539 flags |= CREATE_INACTIVE;
5540 }
5541
5542 if (bucket >= NUM_CACHE_BUCKETS) {
5543 DBG(("%s: large bo num pages=%d, bucket=%d\n",
5544 __FUNCTION__, size, bucket));
5545
5546 if (flags & CREATE_INACTIVE)
5547 goto large_inactive;
5548
5549 tiled_height = kgem_aligned_height(kgem, height, tiling);
5550
5551 list_for_each_entry(bo, &kgem->large, list) {
5552 assert(!bo->purged);
5553 assert(!bo->scanout);
5554 assert(bo->refcnt == 0);
5555 assert(bo->reusable);
5556 assert_tiling(kgem, bo);
5557
5558 if (kgem->gen < 040) {
5559 if (bo->pitch < pitch) {
5560 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5561 bo->tiling, tiling,
5562 bo->pitch, pitch));
5563 continue;
5564 }
5565
5566 if (bo->pitch * tiled_height > bytes(bo))
5567 continue;
5568 } else {
5569 if (num_pages(bo) < size)
5570 continue;
5571
5572 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5573 if (exact) {
5574 DBG(("tiled and pitch not exact: tiling=%d, (want %d), pitch=%d, need %d\n",
5575 bo->tiling, tiling,
5576 bo->pitch, pitch));
5577 continue;
5578 }
5579
5580 set_gpu_tiling(kgem, bo, tiling, pitch);
5581 }
5582 }
5583
5584 kgem_bo_remove_from_active(kgem, bo);
5585
5586 bo->unique_id = kgem_get_unique_id(kgem);
5587 bo->delta = 0;
5588 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5589 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5590 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5591 assert_tiling(kgem, bo);
5592 bo->refcnt = 1;
5593 return bo;
5594 }
5595
5596 large_inactive:
5597 __kgem_throttle_retire(kgem, flags);
5598 list_for_each_entry(bo, &kgem->large_inactive, list) {
5599 assert(bo->refcnt == 0);
5600 assert(bo->reusable);
5601 assert(!bo->scanout);
5602 assert_tiling(kgem, bo);
5603
5604 if (size > num_pages(bo))
5605 continue;
5606
5607 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5608 if (kgem->gen >= 040 && !exact)
5609 set_gpu_tiling(kgem, bo, tiling, pitch);
5610 else
5611 continue;
5612 }
5613
5614 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5615 kgem_bo_free(kgem, bo);
5616 break;
5617 }
5618
5619 list_del(&bo->list);
5620
5621 assert(bo->domain != DOMAIN_GPU);
5622 bo->unique_id = kgem_get_unique_id(kgem);
5623 bo->delta = 0;
5624 DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5625 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5626 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5627 assert_tiling(kgem, bo);
5628 bo->refcnt = 1;
5629
5630 if (flags & CREATE_SCANOUT)
5631 __kgem_bo_make_scanout(kgem, bo, width, height);
5632
5633 return bo;
5634 }
5635
5636 goto create;
5637 }
5638
5639 if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
5640 int for_cpu = !!(flags & CREATE_CPU_MAP);
5641 if (kgem->has_llc && tiling == I915_TILING_NONE)
5642 for_cpu = 1;
5643 /* We presume that we will need to upload to this bo,
5644 * and so would prefer to have an active VMA.
5645 */
5646 cache = &kgem->vma[for_cpu].inactive[bucket];
5647 do {
5648 list_for_each_entry(bo, cache, vma) {
5649 assert(bucket(bo) == bucket);
5650 assert(bo->refcnt == 0);
5651 assert(!bo->scanout);
5652 assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
5653 assert(bo->rq == NULL);
5654 assert(bo->exec == NULL);
5655 assert(list_is_empty(&bo->request));
5656 assert(bo->flush == false);
5657 assert_tiling(kgem, bo);
5658
5659 if (size > num_pages(bo)) {
5660 DBG(("inactive too small: %d < %d\n",
5661 num_pages(bo), size));
5662 continue;
5663 }
5664
5665 if (flags & UNCACHED && !kgem->has_llc && bo->domain != DOMAIN_CPU)
5666 continue;
5667
5668 if (bo->tiling != tiling ||
5669 (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
5670 if (bo->map__gtt ||
5671 !kgem_set_tiling(kgem, bo,
5672 tiling, pitch)) {
5673 DBG(("inactive GTT vma with wrong tiling: %d < %d\n",
5674 bo->tiling, tiling));
5675 kgem_bo_free(kgem, bo);
5676 break;
5677 }
5678 }
5679
5680 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5681 kgem_bo_free(kgem, bo);
5682 break;
5683 }
5684
5685 if (tiling == I915_TILING_NONE)
5686 bo->pitch = pitch;
5687
5688 assert(bo->tiling == tiling);
5689 assert(bo->pitch >= pitch);
5690 bo->delta = 0;
5691 bo->unique_id = kgem_get_unique_id(kgem);
5692
5693 kgem_bo_remove_from_inactive(kgem, bo);
5694 assert(list_is_empty(&bo->list));
5695 assert(list_is_empty(&bo->vma));
5696
5697 DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5698 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5699 assert(bo->reusable);
5700 assert(bo->domain != DOMAIN_GPU);
5701 ASSERT_IDLE(kgem, bo->handle);
5702 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5703 assert_tiling(kgem, bo);
5704 bo->refcnt = 1;
5705 return bo;
5706 }
5707 } while (!list_is_empty(cache) &&
5708 __kgem_throttle_retire(kgem, flags));
5709
5710 if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
5711 if (list_is_empty(&kgem->active[bucket][tiling]) &&
5712 list_is_empty(&kgem->inactive[bucket]))
5713 flags &= ~CREATE_CACHED;
5714
5715 goto create;
5716 }
5717 }
5718
5719 if (flags & CREATE_INACTIVE)
5720 goto skip_active_search;
5721
5722 /* Best active match */
5723 retry = NUM_CACHE_BUCKETS - bucket;
5724 if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
5725 retry = 3;
5726 search_active:
5727 assert(bucket < NUM_CACHE_BUCKETS);
5728 cache = &kgem->active[bucket][tiling];
5729 if (tiling) {
5730 tiled_height = kgem_aligned_height(kgem, height, tiling);
5731 list_for_each_entry(bo, cache, list) {
5732 assert(!bo->purged);
5733 assert(bo->refcnt == 0);
5734 assert(bucket(bo) == bucket);
5735 assert(bo->reusable);
5736 assert(bo->tiling == tiling);
5737 assert(bo->flush == false);
5738 assert(!bo->scanout);
5739 assert_tiling(kgem, bo);
5740
5741 if (kgem->gen < 040) {
5742 if (bo->pitch < pitch) {
5743 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5744 bo->tiling, tiling,
5745 bo->pitch, pitch));
5746 continue;
5747 }
5748
5749 if (bo->pitch * tiled_height > bytes(bo))
5750 continue;
5751 } else {
5752 if (num_pages(bo) < size)
5753 continue;
5754
5755 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5756 if (exact) {
5757 DBG(("tiled and pitch not exact: tiling=%d, (want %d), pitch=%d, need %d\n",
5758 bo->tiling, tiling,
5759 bo->pitch, pitch));
5760 continue;
5761 }
5762
5763 set_gpu_tiling(kgem, bo, tiling, pitch);
5764 }
5765 }
5766 assert(bo->tiling == tiling);
5767 assert(bo->pitch >= pitch);
5768
5769 kgem_bo_remove_from_active(kgem, bo);
5770
5771 bo->unique_id = kgem_get_unique_id(kgem);
5772 bo->delta = 0;
5773 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5774 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5775 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5776 assert_tiling(kgem, bo);
5777 bo->refcnt = 1;
5778 return bo;
5779 }
5780 } else {
5781 list_for_each_entry(bo, cache, list) {
5782 assert(bucket(bo) == bucket);
5783 assert(!bo->purged);
5784 assert(bo->refcnt == 0);
5785 assert(bo->reusable);
5786 assert(!bo->scanout);
5787 assert(bo->tiling == tiling);
5788 assert(bo->flush == false);
5789 assert_tiling(kgem, bo);
5790
5791 if (num_pages(bo) < size)
5792 continue;
5793
5794 kgem_bo_remove_from_active(kgem, bo);
5795
5796 bo->pitch = pitch;
5797 bo->unique_id = kgem_get_unique_id(kgem);
5798 bo->delta = 0;
5799 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5800 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5801 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5802 assert_tiling(kgem, bo);
5803 bo->refcnt = 1;
5804 return bo;
5805 }
5806 }
5807
5808 if (kgem->gen >= 040) {
5809 for (i = I915_TILING_Y; i >= I915_TILING_NONE; i--) {
5810 cache = &kgem->active[bucket][i];
5811 list_for_each_entry(bo, cache, list) {
5812 assert(!bo->purged);
5813 assert(bo->refcnt == 0);
5814 assert(bo->reusable);
5815 assert(!bo->scanout);
5816 assert(bo->flush == false);
5817 assert_tiling(kgem, bo);
5818
5819 if (num_pages(bo) < size)
5820 continue;
5821
5822 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5823 if (exact || kgem->gen < 040)
5824 continue;
5825
5826 set_gpu_tiling(kgem, bo, tiling, pitch);
5827 }
5828 assert(bo->tiling == tiling);
5829 assert(bo->pitch >= pitch);
5830
5831 kgem_bo_remove_from_active(kgem, bo);
5832
5833 bo->unique_id = kgem_get_unique_id(kgem);
5834 bo->delta = 0;
5835 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5836 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5837 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5838 assert_tiling(kgem, bo);
5839 bo->refcnt = 1;
5840 return bo;
5841 }
5842 }
5843 } else if (!exact) { /* allow an active near-miss? */
5844 for (i = tiling; i >= I915_TILING_NONE; i--) {
5845 tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
5846 width, height, bpp, tiling, &pitch);
5847 cache = active(kgem, tiled_height / PAGE_SIZE, i);
5848 tiled_height = kgem_aligned_height(kgem, height, i);
5849 list_for_each_entry(bo, cache, list) {
5850 assert(!bo->purged);
5851 assert(bo->refcnt == 0);
5852 assert(bo->reusable);
5853 assert(!bo->scanout);
5854 assert(bo->flush == false);
5855 assert_tiling(kgem, bo);
5856
5857 if (bo->tiling) {
5858 if (bo->pitch < pitch) {
5859 DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
5860 bo->tiling, tiling,
5861 bo->pitch, pitch));
5862 continue;
5863 }
5864 } else
5865 bo->pitch = pitch;
5866
5867 if (bo->pitch * tiled_height > bytes(bo))
5868 continue;
5869
5870 kgem_bo_remove_from_active(kgem, bo);
5871
5872 bo->unique_id = kgem_get_unique_id(kgem);
5873 bo->delta = 0;
5874 DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5875 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5876 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5877 assert_tiling(kgem, bo);
5878 bo->refcnt = 1;
5879 return bo;
5880 }
5881 }
5882 }
5883
5884 if (--retry) {
5885 bucket++;
5886 goto search_active;
5887 }
5888
5889 skip_active_search:
5890 bucket = cache_bucket(size);
5891 retry = NUM_CACHE_BUCKETS - bucket;
5892 if (retry > 3)
5893 retry = 3;
5894 search_inactive:
5895 /* Now just look for a close match and prefer any currently active */
5896 assert(bucket < NUM_CACHE_BUCKETS);
5897 cache = &kgem->inactive[bucket];
5898 list_for_each_entry(bo, cache, list) {
5899 assert(bucket(bo) == bucket);
5900 assert(bo->reusable);
5901 assert(!bo->scanout);
5902 assert(bo->flush == false);
5903 assert_tiling(kgem, bo);
5904
5905 if (size > num_pages(bo)) {
5906 DBG(("inactive too small: %d < %d\n",
5907 num_pages(bo), size));
5908 continue;
5909 }
5910
5911 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5912 if (exact || kgem->gen < 040) {
5913 kgem_bo_free(kgem, bo);
5914 break;
5915 }
5916
5917 set_gpu_tiling(kgem, bo, tiling, pitch);
5918 }
5919
5920 if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
5921 kgem_bo_free(kgem, bo);
5922 break;
5923 }
5924
5925 kgem_bo_remove_from_inactive(kgem, bo);
5926 assert(list_is_empty(&bo->list));
5927 assert(list_is_empty(&bo->vma));
5928 assert(bo->tiling == tiling);
5929 assert(bo->pitch >= pitch);
5930
5931 bo->delta = 0;
5932 bo->unique_id = kgem_get_unique_id(kgem);
5933 assert(bo->pitch);
5934 DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
5935 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5936 assert(bo->refcnt == 0);
5937 assert(bo->reusable);
5938 assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
5939 ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
5940 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5941 assert_tiling(kgem, bo);
5942 bo->refcnt = 1;
5943
5944 if (flags & CREATE_SCANOUT)
5945 __kgem_bo_make_scanout(kgem, bo, width, height);
5946
5947 return bo;
5948 }
5949
5950 if ((flags & CREATE_NO_RETIRE) == 0) {
5951 list_for_each_entry_reverse(bo, &kgem->active[bucket][tiling], list) {
5952 if (bo->exec)
5953 break;
5954
5955 if (size > num_pages(bo))
5956 continue;
5957
5958 if (__kgem_busy(kgem, bo->handle)) {
5959 if (flags & CREATE_NO_THROTTLE)
5960 goto no_retire;
5961
5962 do {
5963 if (!kgem->need_throttle) {
5964 DBG(("%s: not throttling for active handle=%d\n", __FUNCTION__, bo->handle));
5965 goto no_retire;
5966 }
5967
5968 __kgem_throttle(kgem, false);
5969 } while (__kgem_busy(kgem, bo->handle));
5970 }
5971
5972 DBG(("%s: flushed active handle=%d\n", __FUNCTION__, bo->handle));
5973
5974 kgem_bo_remove_from_active(kgem, bo);
5975 __kgem_bo_clear_busy(bo);
5976
5977 if (!kgem_set_tiling(kgem, bo, tiling, pitch)) {
5978 if (exact || kgem->gen < 040)
5979 goto no_retire;
5980
5981 set_gpu_tiling(kgem, bo, tiling, pitch);
5982 }
5983 assert(bo->tiling == tiling);
5984 assert(bo->pitch >= pitch);
5985
5986 bo->unique_id = kgem_get_unique_id(kgem);
5987 bo->delta = 0;
5988 DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
5989 bo->pitch, bo->tiling, bo->handle, bo->unique_id));
5990 assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
5991 assert_tiling(kgem, bo);
5992 bo->refcnt = 1;
5993
5994 if (flags & CREATE_SCANOUT)
5995 __kgem_bo_make_scanout(kgem, bo, width, height);
5996
5997 return bo;
5998 }
5999 no_retire:
6000 flags |= CREATE_NO_RETIRE;
6001 }
6002
6003 if (--retry) {
6004 bucket++;
6005 goto search_inactive;
6006 }
6007
6008 create:
6009 if (flags & CREATE_CACHED) {
6010 DBG(("%s: no cached bo found, requested not to create a new bo\n", __FUNCTION__));
6011 return NULL;
6012 }
6013
6014 if (bucket >= NUM_CACHE_BUCKETS)
6015 size = ALIGN(size, 1024);
6016 handle = gem_create(kgem->fd, size);
6017 if (handle == 0) {
6018 DBG(("%s: kernel allocation (gem_create) failure\n", __FUNCTION__));
6019 return NULL;
6020 }
6021
6022 bo = __kgem_bo_alloc(handle, size);
6023 if (!bo) {
6024 DBG(("%s: malloc failed\n", __FUNCTION__));
6025 gem_close(kgem->fd, handle);
6026 return NULL;
6027 }
6028
6029 bo->unique_id = kgem_get_unique_id(kgem);
6030 if (kgem_set_tiling(kgem, bo, tiling, pitch)) {
6031 if (flags & CREATE_SCANOUT)
6032 __kgem_bo_make_scanout(kgem, bo, width, height);
6033 } else {
6034 if (kgem->gen >= 040) {
6035 assert(!kgem->can_fence);
6036 bo->tiling = tiling;
6037 bo->pitch = pitch;
6038 } else {
6039 if (flags & CREATE_EXACT) {
6040 DBG(("%s: failed to set exact tiling (gem_set_tiling)\n", __FUNCTION__));
6041 gem_close(kgem->fd, handle);
6042 free(bo);
6043 return NULL;
6044 }
6045 }
6046 }
6047
6048 assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
6049 assert_tiling(kgem, bo);
6050
6051 debug_alloc__bo(kgem, bo);
6052
6053 DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
6054 bo->pitch, bo->tiling, bo->handle, bo->unique_id,
6055 size, num_pages(bo), bucket(bo)));
6056 return bo;
6057 }
6058
kgem_create_cpu_2d(struct kgem * kgem,int width,int height,int bpp,uint32_t flags)6059 struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
6060 int width,
6061 int height,
6062 int bpp,
6063 uint32_t flags)
6064 {
6065 struct kgem_bo *bo;
6066 int stride, size;
6067
6068 if (DBG_NO_CPU)
6069 return NULL;
6070
6071 DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
6072
6073 if (kgem->has_llc) {
6074 bo = kgem_create_2d(kgem, width, height, bpp,
6075 I915_TILING_NONE, flags);
6076 if (bo == NULL)
6077 return bo;
6078
6079 assert(bo->tiling == I915_TILING_NONE);
6080 assert_tiling(kgem, bo);
6081
6082 if (kgem_bo_map__cpu(kgem, bo) == NULL) {
6083 kgem_bo_destroy(kgem, bo);
6084 return NULL;
6085 }
6086
6087 return bo;
6088 }
6089
6090 assert(width > 0 && height > 0);
6091 stride = ALIGN(width, 2) * bpp >> 3;
6092 stride = ALIGN(stride, 4);
6093 size = stride * ALIGN(height, 2);
6094 assert(size >= PAGE_SIZE);
6095
6096 DBG(("%s: %dx%d, %d bpp, stride=%d\n",
6097 __FUNCTION__, width, height, bpp, stride));
6098
6099 bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
6100 if (bo) {
6101 assert(bo->tiling == I915_TILING_NONE);
6102 assert_tiling(kgem, bo);
6103 assert(bo->snoop);
6104 bo->refcnt = 1;
6105 bo->pitch = stride;
6106 bo->unique_id = kgem_get_unique_id(kgem);
6107 return bo;
6108 }
6109
6110 if (kgem->has_caching) {
6111 bo = kgem_create_linear(kgem, size, flags);
6112 if (bo == NULL)
6113 return NULL;
6114
6115 assert(bo->tiling == I915_TILING_NONE);
6116 assert_tiling(kgem, bo);
6117
6118 assert(!__kgem_busy(kgem, bo->handle));
6119 if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
6120 kgem_bo_destroy(kgem, bo);
6121 return NULL;
6122 }
6123 bo->snoop = true;
6124
6125 if (kgem_bo_map__cpu(kgem, bo) == NULL) {
6126 kgem_bo_destroy(kgem, bo);
6127 return NULL;
6128 }
6129
6130 bo->pitch = stride;
6131 bo->unique_id = kgem_get_unique_id(kgem);
6132 return bo;
6133 }
6134
6135 if (kgem->has_userptr) {
6136 void *ptr;
6137
6138 /* XXX */
6139 //if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
6140 if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
6141 return NULL;
6142
6143 bo = kgem_create_map(kgem, ptr, size, false);
6144 if (bo == NULL) {
6145 free(ptr);
6146 return NULL;
6147 }
6148
6149 bo->pitch = stride;
6150 bo->unique_id = kgem_get_unique_id(kgem);
6151 return bo;
6152 }
6153
6154 return NULL;
6155 }
6156
_kgem_bo_destroy(struct kgem * kgem,struct kgem_bo * bo)6157 void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
6158 {
6159 DBG(("%s: handle=%d, proxy? %d\n",
6160 __FUNCTION__, bo->handle, bo->proxy != NULL));
6161
6162 if (bo->proxy) {
6163 assert(!bo->reusable);
6164 kgem_bo_binding_free(kgem, bo);
6165
6166 assert(list_is_empty(&bo->list));
6167 _list_del(&bo->vma);
6168 _list_del(&bo->request);
6169
6170 if (bo->io && bo->domain == DOMAIN_CPU)
6171 _kgem_bo_delete_buffer(kgem, bo);
6172
6173 kgem_bo_unref(kgem, bo->proxy);
6174
6175 if (DBG_NO_MALLOC_CACHE) {
6176 free(bo);
6177 } else {
6178 *(struct kgem_bo **)bo = __kgem_freed_bo;
6179 __kgem_freed_bo = bo;
6180 }
6181 } else
6182 __kgem_bo_destroy(kgem, bo);
6183 }
6184
__kgem_flush(struct kgem * kgem,struct kgem_bo * bo)6185 static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
6186 {
6187 assert(bo->rq);
6188 assert(bo->exec == NULL);
6189 assert(bo->needs_flush);
6190
6191 /* The kernel will emit a flush *and* update its own flushing lists. */
6192 if (!__kgem_busy(kgem, bo->handle))
6193 __kgem_bo_clear_busy(bo);
6194
6195 DBG(("%s: handle=%d, busy?=%d\n",
6196 __FUNCTION__, bo->handle, bo->rq != NULL));
6197 }
6198
kgem_scanout_flush(struct kgem * kgem,struct kgem_bo * bo)6199 void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
6200 {
6201 if (!bo->needs_flush && !bo->gtt_dirty)
6202 return;
6203
6204 kgem_bo_submit(kgem, bo);
6205
6206 /* If the kernel fails to emit the flush, then it will be forced when
6207 * we assume direct access. And as the usual failure is EIO, we do
6208 * not actually care.
6209 */
6210 assert(bo->exec == NULL);
6211 if (bo->rq)
6212 __kgem_flush(kgem, bo);
6213
6214 if (bo->scanout && kgem->needs_dirtyfb) {
6215 struct drm_mode_fb_dirty_cmd cmd;
6216 memset(&cmd, 0, sizeof(cmd));
6217 cmd.fb_id = bo->delta;
6218 (void)drmIoctl(kgem->fd, DRM_IOCTL_MODE_DIRTYFB, &cmd);
6219 }
6220
6221 /* Whatever actually happens, we can regard the GTT write domain
6222 * as being flushed.
6223 */
6224 __kgem_bo_clear_dirty(bo);
6225 }
6226
nearly_idle(struct kgem * kgem)6227 inline static bool nearly_idle(struct kgem *kgem)
6228 {
6229 int ring = kgem->ring == KGEM_BLT;
6230
6231 assert(ring < ARRAY_SIZE(kgem->requests));
6232 if (list_is_singular(&kgem->requests[ring]))
6233 return true;
6234
6235 return __kgem_ring_is_idle(kgem, ring);
6236 }
6237
needs_semaphore(struct kgem * kgem,struct kgem_bo * bo)6238 inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
6239 {
6240 if (kgem->needs_semaphore)
6241 return false;
6242
6243 if (bo->rq == NULL || RQ_RING(bo->rq) == kgem->ring)
6244 return false;
6245
6246 kgem->needs_semaphore = true;
6247 return true;
6248 }
6249
needs_reservation(struct kgem * kgem,struct kgem_bo * bo)6250 inline static bool needs_reservation(struct kgem *kgem, struct kgem_bo *bo)
6251 {
6252 if (kgem->needs_reservation)
6253 return false;
6254
6255 if (bo->presumed_offset)
6256 return false;
6257
6258 kgem->needs_reservation = true;
6259 return nearly_idle(kgem);
6260 }
6261
needs_batch_flush(struct kgem * kgem,struct kgem_bo * bo)6262 inline static bool needs_batch_flush(struct kgem *kgem, struct kgem_bo *bo)
6263 {
6264 bool flush = false;
6265
6266 if (needs_semaphore(kgem, bo)) {
6267 DBG(("%s: flushing before handle=%d for required semaphore\n", __FUNCTION__, bo->handle));
6268 flush = true;
6269 }
6270
6271 if (needs_reservation(kgem, bo)) {
6272 DBG(("%s: flushing before handle=%d for new reservation\n", __FUNCTION__, bo->handle));
6273 flush = true;
6274 }
6275
6276 return kgem->nreloc ? flush : false;
6277 }
6278
aperture_check(struct kgem * kgem,unsigned num_pages)6279 static bool aperture_check(struct kgem *kgem, unsigned num_pages)
6280 {
6281 struct drm_i915_gem_get_aperture aperture;
6282 int reserve;
6283
6284 if (kgem->aperture)
6285 return false;
6286
6287 /* Leave some space in case of alignment issues */
6288 reserve = kgem->aperture_mappable / 2;
6289 if (kgem->gen < 033 && reserve < kgem->aperture_max_fence)
6290 reserve = kgem->aperture_max_fence;
6291 if (!kgem->has_llc)
6292 reserve += kgem->nexec * PAGE_SIZE * 2;
6293
6294 DBG(("%s: num_pages=%d, holding %d pages in reserve, total aperture %d\n",
6295 __FUNCTION__, num_pages, reserve, kgem->aperture_total));
6296 num_pages += reserve;
6297
6298 VG_CLEAR(aperture);
6299 aperture.aper_available_size = kgem->aperture_total;
6300 aperture.aper_available_size *= PAGE_SIZE;
6301 (void)do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
6302
6303 DBG(("%s: aperture required %ld bytes, available %ld bytes\n",
6304 __FUNCTION__,
6305 (long)num_pages * PAGE_SIZE,
6306 (long)aperture.aper_available_size));
6307
6308 return num_pages <= aperture.aper_available_size / PAGE_SIZE;
6309 }
6310
kgem_flush(struct kgem * kgem,bool flush)6311 static inline bool kgem_flush(struct kgem *kgem, bool flush)
6312 {
6313 if (unlikely(kgem->wedged))
6314 return false;
6315
6316 if (kgem->nreloc == 0)
6317 return true;
6318
6319 if (__to_sna(kgem)->flags & SNA_POWERSAVE)
6320 return true;
6321
6322 if (kgem->flush == flush && kgem->aperture < kgem->aperture_low)
6323 return true;
6324
6325 DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n",
6326 __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring)));
6327 return !kgem_ring_is_idle(kgem, kgem->ring);
6328 }
6329
kgem_check_bo(struct kgem * kgem,...)6330 bool kgem_check_bo(struct kgem *kgem, ...)
6331 {
6332 va_list ap;
6333 struct kgem_bo *bo;
6334 int num_exec = 0;
6335 int num_pages = 0;
6336 bool flush = false;
6337 bool busy = true;
6338
6339 va_start(ap, kgem);
6340 while ((bo = va_arg(ap, struct kgem_bo *))) {
6341 while (bo->proxy)
6342 bo = bo->proxy;
6343 if (bo->exec)
6344 continue;
6345
6346 if (needs_batch_flush(kgem, bo)) {
6347 va_end(ap);
6348 return false;
6349 }
6350
6351 num_pages += num_pages(bo);
6352 num_exec++;
6353
6354 flush |= bo->flush;
6355 busy &= bo->rq != NULL;
6356 }
6357 va_end(ap);
6358
6359 DBG(("%s: num_pages=+%d, num_exec=+%d\n",
6360 __FUNCTION__, num_pages, num_exec));
6361
6362 if (!num_pages)
6363 return true;
6364
6365 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
6366 DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
6367 kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
6368 return false;
6369 }
6370
6371 if (num_pages + kgem->aperture > kgem->aperture_high) {
6372 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
6373 __FUNCTION__, kgem->aperture, num_pages, kgem->aperture_high));
6374 return aperture_check(kgem, num_pages);
6375 }
6376
6377 if (busy)
6378 return true;
6379
6380 return kgem_flush(kgem, flush);
6381 }
6382
kgem_check_bo_fenced(struct kgem * kgem,struct kgem_bo * bo)6383 bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
6384 {
6385 assert(bo->refcnt);
6386 while (bo->proxy)
6387 bo = bo->proxy;
6388 assert(bo->refcnt);
6389
6390 if (bo->exec) {
6391 if (kgem->gen < 040 &&
6392 bo->tiling != I915_TILING_NONE &&
6393 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6394 uint32_t size;
6395
6396 assert(bo->tiling == I915_TILING_X);
6397
6398 if (kgem->nfence >= kgem->fence_max)
6399 return false;
6400
6401 if (kgem->aperture_fenced) {
6402 size = 3*kgem->aperture_fenced;
6403 if (kgem->aperture_total == kgem->aperture_mappable)
6404 size += kgem->aperture;
6405 if (size > kgem->aperture_fenceable &&
6406 kgem_ring_is_idle(kgem, kgem->ring)) {
6407 DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6408 return false;
6409 }
6410 }
6411
6412 size = kgem_bo_fenced_size(kgem, bo);
6413 if (size > kgem->aperture_max_fence)
6414 kgem->aperture_max_fence = size;
6415 size += kgem->aperture_fenced;
6416 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6417 size = 2 * kgem->aperture_max_fence;
6418 if (kgem->aperture_total == kgem->aperture_mappable)
6419 size += kgem->aperture;
6420 if (size > kgem->aperture_fenceable) {
6421 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6422 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6423 return false;
6424 }
6425 }
6426
6427 return true;
6428 }
6429
6430 if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
6431 return false;
6432
6433 if (needs_batch_flush(kgem, bo))
6434 return false;
6435
6436 assert_tiling(kgem, bo);
6437 if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
6438 uint32_t size;
6439
6440 assert(bo->tiling == I915_TILING_X);
6441
6442 if (kgem->nfence >= kgem->fence_max)
6443 return false;
6444
6445 if (kgem->aperture_fenced) {
6446 size = 3*kgem->aperture_fenced;
6447 if (kgem->aperture_total == kgem->aperture_mappable)
6448 size += kgem->aperture;
6449 if (size > kgem->aperture_fenceable &&
6450 kgem_ring_is_idle(kgem, kgem->ring)) {
6451 DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6452 return false;
6453 }
6454 }
6455
6456 size = kgem_bo_fenced_size(kgem, bo);
6457 if (size > kgem->aperture_max_fence)
6458 kgem->aperture_max_fence = size;
6459 size += kgem->aperture_fenced;
6460 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6461 size = 2 * kgem->aperture_max_fence;
6462 if (kgem->aperture_total == kgem->aperture_mappable)
6463 size += kgem->aperture;
6464 if (size > kgem->aperture_fenceable) {
6465 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6466 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6467 return false;
6468 }
6469 }
6470
6471 if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) {
6472 DBG(("%s: final aperture usage (%d + %d) is greater than high water mark (%d)\n",
6473 __FUNCTION__, kgem->aperture, num_pages(bo), kgem->aperture_high));
6474 return aperture_check(kgem, num_pages(bo));
6475 }
6476
6477 if (bo->rq)
6478 return true;
6479
6480 return kgem_flush(kgem, bo->flush);
6481 }
6482
kgem_check_many_bo_fenced(struct kgem * kgem,...)6483 bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
6484 {
6485 va_list ap;
6486 struct kgem_bo *bo;
6487 int num_fence = 0;
6488 int num_exec = 0;
6489 int num_pages = 0;
6490 int fenced_size = 0;
6491 bool flush = false;
6492 bool busy = true;
6493
6494 va_start(ap, kgem);
6495 while ((bo = va_arg(ap, struct kgem_bo *))) {
6496 assert(bo->refcnt);
6497 while (bo->proxy)
6498 bo = bo->proxy;
6499 assert(bo->refcnt);
6500 if (bo->exec) {
6501 if (kgem->gen >= 040 || bo->tiling == I915_TILING_NONE)
6502 continue;
6503
6504 if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6505 fenced_size += kgem_bo_fenced_size(kgem, bo);
6506 num_fence++;
6507 }
6508
6509 continue;
6510 }
6511
6512 if (needs_batch_flush(kgem, bo)) {
6513 va_end(ap);
6514 return false;
6515 }
6516
6517 assert_tiling(kgem, bo);
6518 num_pages += num_pages(bo);
6519 num_exec++;
6520 if (kgem->gen < 040 && bo->tiling) {
6521 uint32_t size = kgem_bo_fenced_size(kgem, bo);
6522 if (size > kgem->aperture_max_fence)
6523 kgem->aperture_max_fence = size;
6524 fenced_size += size;
6525 num_fence++;
6526 }
6527
6528 flush |= bo->flush;
6529 busy &= bo->rq != NULL;
6530 }
6531 va_end(ap);
6532
6533 if (num_fence) {
6534 uint32_t size;
6535
6536 if (kgem->nfence + num_fence > kgem->fence_max)
6537 return false;
6538
6539 if (kgem->aperture_fenced) {
6540 size = 3*kgem->aperture_fenced;
6541 if (kgem->aperture_total == kgem->aperture_mappable)
6542 size += kgem->aperture;
6543 if (size > kgem->aperture_fenceable &&
6544 kgem_ring_is_idle(kgem, kgem->ring)) {
6545 DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
6546 return false;
6547 }
6548 }
6549
6550 size = kgem->aperture_fenced;
6551 size += fenced_size;
6552 if (kgem->gen < 033 && size < 2 * kgem->aperture_max_fence)
6553 size = 2 * kgem->aperture_max_fence;
6554 if (kgem->aperture_total == kgem->aperture_mappable)
6555 size += kgem->aperture;
6556 if (size > kgem->aperture_fenceable) {
6557 DBG(("%s: estimated fence space required %d (fenced=%d, max_fence=%d, aperture=%d) exceeds fenceable aperture %d\n",
6558 __FUNCTION__, size, kgem->aperture_fenced, kgem->aperture_max_fence, kgem->aperture, kgem->aperture_fenceable));
6559 return false;
6560 }
6561 }
6562
6563 if (num_pages == 0)
6564 return true;
6565
6566 if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
6567 return false;
6568
6569 if (num_pages + kgem->aperture > kgem->aperture_high - kgem->aperture_fenced) {
6570 DBG(("%s: final aperture usage (%d + %d + %d) is greater than high water mark (%d)\n",
6571 __FUNCTION__, kgem->aperture, kgem->aperture_fenced, num_pages, kgem->aperture_high));
6572 return aperture_check(kgem, num_pages);
6573 }
6574
6575 if (busy)
6576 return true;
6577
6578 return kgem_flush(kgem, flush);
6579 }
6580
__kgem_bcs_set_tiling(struct kgem * kgem,struct kgem_bo * src,struct kgem_bo * dst)6581 void __kgem_bcs_set_tiling(struct kgem *kgem,
6582 struct kgem_bo *src,
6583 struct kgem_bo *dst)
6584 {
6585 uint32_t state, *b;
6586
6587 DBG(("%s: src handle=%d:tiling=%d, dst handle=%d:tiling=%d\n",
6588 __FUNCTION__,
6589 src ? src->handle : 0, src ? src->tiling : 0,
6590 dst ? dst->handle : 0, dst ? dst->tiling : 0));
6591 assert(kgem->mode == KGEM_BLT);
6592 assert(dst == NULL || kgem_bo_can_blt(kgem, dst));
6593 assert(src == NULL || kgem_bo_can_blt(kgem, src));
6594
6595 state = 0;
6596 if (dst && dst->tiling == I915_TILING_Y)
6597 state |= BCS_DST_Y;
6598 if (src && src->tiling == I915_TILING_Y)
6599 state |= BCS_SRC_Y;
6600
6601 if (kgem->bcs_state == state)
6602 return;
6603
6604 DBG(("%s: updating SWCTRL %x -> %x\n", __FUNCTION__,
6605 kgem->bcs_state, state));
6606
6607 /* Over-estimate space in case we need to re-emit the cmd packet */
6608 if (!kgem_check_batch(kgem, 24)) {
6609 _kgem_submit(kgem);
6610 _kgem_set_mode(kgem, KGEM_BLT);
6611 if (state == 0)
6612 return;
6613 }
6614
6615 b = kgem->batch + kgem->nbatch;
6616 if (kgem->nbatch) {
6617 *b++ = MI_FLUSH_DW;
6618 *b++ = 0;
6619 *b++ = 0;
6620 *b++ = 0;
6621 }
6622 *b++ = MI_LOAD_REGISTER_IMM;
6623 *b++ = BCS_SWCTRL;
6624 *b++ = (BCS_SRC_Y | BCS_DST_Y) << 16 | state;
6625 kgem->nbatch = b - kgem->batch;
6626
6627 kgem->bcs_state = state;
6628 }
6629
kgem_add_reloc(struct kgem * kgem,uint32_t pos,struct kgem_bo * bo,uint32_t read_write_domain,uint32_t delta)6630 uint32_t kgem_add_reloc(struct kgem *kgem,
6631 uint32_t pos,
6632 struct kgem_bo *bo,
6633 uint32_t read_write_domain,
6634 uint32_t delta)
6635 {
6636 int index;
6637
6638 DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
6639 __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
6640
6641 assert(kgem->gen < 0100);
6642 assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
6643
6644 index = kgem->nreloc++;
6645 assert(index < ARRAY_SIZE(kgem->reloc));
6646 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6647 if (bo) {
6648 assert(kgem->mode != KGEM_NONE);
6649 assert(bo->refcnt);
6650 while (bo->proxy) {
6651 DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
6652 __FUNCTION__, bo->delta, bo->handle));
6653 delta += bo->delta;
6654 assert(bo->handle == bo->proxy->handle);
6655 /* need to release the cache upon batch submit */
6656 if (bo->exec == NULL) {
6657 list_move_tail(&bo->request,
6658 &kgem->next_request->buffers);
6659 bo->rq = MAKE_REQUEST(kgem->next_request,
6660 kgem->ring);
6661 bo->exec = &_kgem_dummy_exec;
6662 bo->domain = DOMAIN_GPU;
6663 }
6664
6665 if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6666 __kgem_bo_mark_dirty(bo);
6667
6668 bo = bo->proxy;
6669 assert(bo->refcnt);
6670 }
6671 assert(bo->refcnt);
6672
6673 if (bo->exec == NULL)
6674 kgem_add_bo(kgem, bo);
6675 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6676 assert(RQ_RING(bo->rq) == kgem->ring);
6677
6678 if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
6679 if (bo->tiling &&
6680 (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
6681 assert(bo->tiling == I915_TILING_X);
6682 assert(kgem->nfence < kgem->fence_max);
6683 kgem->aperture_fenced +=
6684 kgem_bo_fenced_size(kgem, bo);
6685 kgem->nfence++;
6686 }
6687 bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
6688 }
6689
6690 kgem->reloc[index].delta = delta;
6691 kgem->reloc[index].target_handle = bo->target_handle;
6692 kgem->reloc[index].presumed_offset = bo->presumed_offset;
6693
6694 if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6695 assert(!bo->snoop || kgem->can_blt_cpu);
6696 __kgem_bo_mark_dirty(bo);
6697 }
6698
6699 delta += bo->presumed_offset;
6700 } else {
6701 kgem->reloc[index].delta = delta;
6702 kgem->reloc[index].target_handle = ~0U;
6703 kgem->reloc[index].presumed_offset = 0;
6704 if (kgem->nreloc__self < 256)
6705 kgem->reloc__self[kgem->nreloc__self++] = index;
6706 }
6707 kgem->reloc[index].read_domains = read_write_domain >> 16;
6708 kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6709
6710 return delta;
6711 }
6712
kgem_add_reloc64(struct kgem * kgem,uint32_t pos,struct kgem_bo * bo,uint32_t read_write_domain,uint64_t delta)6713 uint64_t kgem_add_reloc64(struct kgem *kgem,
6714 uint32_t pos,
6715 struct kgem_bo *bo,
6716 uint32_t read_write_domain,
6717 uint64_t delta)
6718 {
6719 int index;
6720
6721 DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n",
6722 __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain));
6723
6724 assert(kgem->gen >= 0100);
6725 assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
6726
6727 index = kgem->nreloc++;
6728 assert(index < ARRAY_SIZE(kgem->reloc));
6729 kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
6730 if (bo) {
6731 assert(kgem->mode != KGEM_NONE);
6732 assert(bo->refcnt);
6733 while (bo->proxy) {
6734 DBG(("%s: adding proxy [delta=%ld] for handle=%d\n",
6735 __FUNCTION__, (long)bo->delta, bo->handle));
6736 delta += bo->delta;
6737 assert(bo->handle == bo->proxy->handle);
6738 /* need to release the cache upon batch submit */
6739 if (bo->exec == NULL) {
6740 list_move_tail(&bo->request,
6741 &kgem->next_request->buffers);
6742 bo->rq = MAKE_REQUEST(kgem->next_request,
6743 kgem->ring);
6744 bo->exec = &_kgem_dummy_exec;
6745 bo->domain = DOMAIN_GPU;
6746 }
6747
6748 if (read_write_domain & 0x7fff && !bo->gpu_dirty)
6749 __kgem_bo_mark_dirty(bo);
6750
6751 bo = bo->proxy;
6752 assert(bo->refcnt);
6753 }
6754 assert(bo->refcnt);
6755
6756 if (bo->exec == NULL)
6757 kgem_add_bo(kgem, bo);
6758 assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
6759 assert(RQ_RING(bo->rq) == kgem->ring);
6760
6761 DBG(("%s[%d] = (delta=%d, target handle=%d, presumed=%llx)\n",
6762 __FUNCTION__, index, delta, bo->target_handle, (long long)bo->presumed_offset));
6763 kgem->reloc[index].delta = delta;
6764 kgem->reloc[index].target_handle = bo->target_handle;
6765 kgem->reloc[index].presumed_offset = bo->presumed_offset;
6766
6767 if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
6768 assert(!bo->snoop || kgem->can_blt_cpu);
6769 __kgem_bo_mark_dirty(bo);
6770 }
6771
6772 delta += bo->presumed_offset;
6773 } else {
6774 DBG(("%s[%d] = (delta=%d, target handle=batch)\n",
6775 __FUNCTION__, index, delta));
6776 kgem->reloc[index].delta = delta;
6777 kgem->reloc[index].target_handle = ~0U;
6778 kgem->reloc[index].presumed_offset = 0;
6779 if (kgem->nreloc__self < 256)
6780 kgem->reloc__self[kgem->nreloc__self++] = index;
6781 }
6782 kgem->reloc[index].read_domains = read_write_domain >> 16;
6783 kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
6784
6785 return delta;
6786 }
6787
kgem_trim_vma_cache(struct kgem * kgem,int type,int bucket)6788 static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
6789 {
6790 int i, j;
6791
6792 DBG(("%s: type=%d, count=%d (bucket: %d)\n",
6793 __FUNCTION__, type, kgem->vma[type].count, bucket));
6794 if (kgem->vma[type].count <= 0)
6795 return;
6796
6797 if (kgem->need_purge)
6798 kgem_purge_cache(kgem);
6799
6800 /* vma are limited on a per-process basis to around 64k.
6801 * This includes all malloc arenas as well as other file
6802 * mappings. In order to be fair and not hog the cache,
6803 * and more importantly not to exhaust that limit and to
6804 * start failing mappings, we keep our own number of open
6805 * vma to within a conservative value.
6806 */
6807 i = 0;
6808 while (kgem->vma[type].count > 0) {
6809 struct kgem_bo *bo = NULL;
6810
6811 for (j = 0;
6812 bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
6813 j++) {
6814 struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
6815 if (!list_is_empty(head))
6816 bo = list_last_entry(head, struct kgem_bo, vma);
6817 }
6818 if (bo == NULL)
6819 break;
6820
6821 DBG(("%s: discarding inactive %s vma cache for %d\n",
6822 __FUNCTION__, type ? "CPU" : "GTT", bo->handle));
6823
6824 assert(bo->rq == NULL);
6825 if (type) {
6826 VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
6827 munmap(MAP(bo->map__cpu), bytes(bo));
6828 bo->map__cpu = NULL;
6829 } else {
6830 if (bo->map__wc) {
6831 VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
6832 munmap(bo->map__wc, bytes(bo));
6833 bo->map__wc = NULL;
6834 }
6835 if (bo->map__gtt) {
6836 munmap(bo->map__gtt, bytes(bo));
6837 bo->map__gtt = NULL;
6838 }
6839 }
6840
6841 list_del(&bo->vma);
6842 kgem->vma[type].count--;
6843 }
6844 }
6845
__kgem_bo_map__gtt_or_wc(struct kgem * kgem,struct kgem_bo * bo)6846 static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
6847 {
6848 void *ptr;
6849
6850 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
6851
6852 assert(bo->proxy == NULL);
6853 assert(!bo->snoop);
6854
6855 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
6856
6857 if (bo->tiling || !kgem->has_wc_mmap) {
6858 assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
6859 warn_unless(num_pages(bo) <= kgem->aperture_mappable / 2);
6860
6861 ptr = bo->map__gtt;
6862 if (ptr == NULL)
6863 ptr = __kgem_bo_map__gtt(kgem, bo);
6864 } else {
6865 ptr = bo->map__wc;
6866 if (ptr == NULL)
6867 ptr = __kgem_bo_map__wc(kgem, bo);
6868 }
6869
6870 return ptr;
6871 }
6872
kgem_bo_map__async(struct kgem * kgem,struct kgem_bo * bo)6873 void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
6874 {
6875 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6876 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6877
6878 assert(bo->proxy == NULL);
6879 assert(list_is_empty(&bo->list));
6880 assert_tiling(kgem, bo);
6881 assert(!bo->purged || bo->reusable);
6882
6883 if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
6884 DBG(("%s: converting request for GTT map into CPU map\n",
6885 __FUNCTION__));
6886 return kgem_bo_map__cpu(kgem, bo);
6887 }
6888
6889 return __kgem_bo_map__gtt_or_wc(kgem, bo);
6890 }
6891
kgem_bo_map(struct kgem * kgem,struct kgem_bo * bo)6892 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
6893 {
6894 void *ptr;
6895
6896 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6897 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6898
6899 assert(bo->proxy == NULL);
6900 assert(list_is_empty(&bo->list));
6901 assert(bo->exec == NULL);
6902 assert_tiling(kgem, bo);
6903 assert(!bo->purged || bo->reusable);
6904
6905 if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
6906 (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
6907 DBG(("%s: converting request for GTT map into CPU map\n",
6908 __FUNCTION__));
6909 ptr = kgem_bo_map__cpu(kgem, bo);
6910 if (ptr)
6911 kgem_bo_sync__cpu(kgem, bo);
6912 return ptr;
6913 }
6914
6915 ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
6916
6917 if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
6918 struct drm_i915_gem_set_domain set_domain;
6919
6920 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
6921 bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
6922
6923 /* XXX use PROT_READ to avoid the write flush? */
6924
6925 VG_CLEAR(set_domain);
6926 set_domain.handle = bo->handle;
6927 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
6928 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
6929 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
6930 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
6931 kgem_throttle(kgem);
6932 }
6933 bo->needs_flush = false;
6934 kgem_bo_retire(kgem, bo);
6935 bo->domain = DOMAIN_GTT;
6936 bo->gtt_dirty = true;
6937 }
6938
6939 return ptr;
6940 }
6941
kgem_bo_map__gtt(struct kgem * kgem,struct kgem_bo * bo)6942 void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
6943 {
6944 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6945 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6946
6947 assert(bo->proxy == NULL);
6948 assert(bo->exec == NULL);
6949 assert(list_is_empty(&bo->list));
6950 assert_tiling(kgem, bo);
6951 assert(!bo->purged || bo->reusable);
6952
6953 return __kgem_bo_map__gtt_or_wc(kgem, bo);
6954 }
6955
kgem_bo_map__wc(struct kgem * kgem,struct kgem_bo * bo)6956 void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
6957 {
6958 DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
6959 bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
6960
6961 assert(bo->proxy == NULL);
6962 assert(list_is_empty(&bo->list));
6963 assert_tiling(kgem, bo);
6964 assert(!bo->purged || bo->reusable);
6965
6966 if (bo->map__wc)
6967 return bo->map__wc;
6968 if (!kgem->has_wc_mmap)
6969 return NULL;
6970
6971 kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
6972 return __kgem_bo_map__wc(kgem, bo);
6973 }
6974
kgem_bo_map__cpu(struct kgem * kgem,struct kgem_bo * bo)6975 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
6976 {
6977 DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
6978 __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
6979 assert(!bo->purged);
6980 assert(list_is_empty(&bo->list));
6981 assert(bo->proxy == NULL);
6982 assert_tiling(kgem, bo);
6983
6984 if (bo->map__cpu)
6985 return MAP(bo->map__cpu);
6986
6987 kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
6988
6989 return __kgem_bo_map__cpu(kgem, bo);
6990 }
6991
kgem_bo_map__debug(struct kgem * kgem,struct kgem_bo * bo)6992 void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
6993 {
6994 void *ptr;
6995
6996 if (bo->tiling == I915_TILING_NONE && kgem->has_llc) {
6997 ptr = MAP(bo->map__cpu);
6998 if (ptr == NULL)
6999 ptr = __kgem_bo_map__cpu(kgem, bo);
7000 } else if (bo->tiling || !kgem->has_wc_mmap) {
7001 ptr = bo->map__gtt;
7002 if (ptr == NULL)
7003 ptr = __kgem_bo_map__gtt(kgem, bo);
7004 } else {
7005 ptr = bo->map__wc;
7006 if (ptr == NULL)
7007 ptr = __kgem_bo_map__wc(kgem, bo);
7008 }
7009
7010 return ptr;
7011 }
7012
7013
kgem_bo_flink(struct kgem * kgem,struct kgem_bo * bo)7014 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
7015 {
7016 struct drm_gem_flink flink;
7017
7018 assert(kgem_bo_is_fenced(kgem, bo));
7019
7020 VG_CLEAR(flink);
7021 flink.handle = bo->handle;
7022 if (do_ioctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink))
7023 return 0;
7024
7025 DBG(("%s: flinked handle=%d to name=%d, marking non-reusable\n",
7026 __FUNCTION__, flink.handle, flink.name));
7027
7028 /* Ordinarily giving the name aware makes the buffer non-reusable.
7029 * However, we track the lifetime of all clients and their hold
7030 * on the buffer, and *presuming* they do not pass it on to a third
7031 * party, we track the lifetime accurately.
7032 */
7033 bo->reusable = false;
7034 kgem_bo_unclean(kgem, bo);
7035
7036 return flink.name;
7037 }
7038
probe(struct kgem * kgem,uint32_t handle)7039 static bool probe(struct kgem *kgem, uint32_t handle)
7040 {
7041 struct drm_i915_gem_set_domain arg = {
7042 .handle = handle,
7043 .read_domains = I915_GEM_DOMAIN_CPU,
7044 };
7045
7046 return do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg) == 0;
7047 }
7048
probe_userptr(struct kgem * kgem,void * ptr,size_t size,int read_only)7049 static uint32_t probe_userptr(struct kgem *kgem,
7050 void *ptr, size_t size, int read_only)
7051 {
7052 uint32_t handle;
7053
7054 handle = gem_userptr(kgem->fd, ptr, size, read_only);
7055 if (handle && !probe(kgem, handle)) {
7056 gem_close(kgem->fd, handle);
7057 handle = 0;
7058 }
7059
7060 return handle;
7061 }
7062
kgem_create_map(struct kgem * kgem,void * ptr,uint32_t size,bool read_only)7063 struct kgem_bo *kgem_create_map(struct kgem *kgem,
7064 void *ptr, uint32_t size,
7065 bool read_only)
7066 {
7067 struct kgem_bo *bo;
7068 uintptr_t first_page, last_page;
7069 uint32_t handle;
7070
7071 assert(MAP(ptr) == ptr);
7072
7073 DBG(("%s(%p size=%d, read-only?=%d) - has_userptr?=%d\n", __FUNCTION__,
7074 ptr, size, read_only, kgem->has_userptr));
7075 if (!kgem->has_userptr)
7076 return NULL;
7077
7078 first_page = (uintptr_t)ptr;
7079 last_page = first_page + size + PAGE_SIZE - 1;
7080
7081 first_page &= ~(uintptr_t)(PAGE_SIZE-1);
7082 last_page &= ~(uintptr_t)(PAGE_SIZE-1);
7083 assert(last_page > first_page);
7084
7085 handle = probe_userptr(kgem,
7086 (void *)first_page, last_page-first_page,
7087 read_only);
7088 if (handle == 0 && read_only && kgem->has_wc_mmap)
7089 handle = probe_userptr(kgem,
7090 (void *)first_page, last_page-first_page,
7091 false);
7092 if (handle == 0) {
7093 DBG(("%s: import failed, errno=%d\n", __FUNCTION__, errno));
7094 return NULL;
7095 }
7096
7097 bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE);
7098 if (bo == NULL) {
7099 gem_close(kgem->fd, handle);
7100 return NULL;
7101 }
7102
7103 bo->unique_id = kgem_get_unique_id(kgem);
7104 bo->snoop = !kgem->has_llc;
7105 debug_alloc__bo(kgem, bo);
7106
7107 if (first_page != (uintptr_t)ptr) {
7108 struct kgem_bo *proxy;
7109
7110 proxy = kgem_create_proxy(kgem, bo,
7111 (uintptr_t)ptr - first_page, size);
7112 kgem_bo_destroy(kgem, bo);
7113 if (proxy == NULL)
7114 return NULL;
7115
7116 bo = proxy;
7117 }
7118
7119 bo->map__cpu = MAKE_USER_MAP(ptr);
7120
7121 DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n",
7122 __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL));
7123 return bo;
7124 }
7125
kgem_bo_sync__cpu(struct kgem * kgem,struct kgem_bo * bo)7126 void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
7127 {
7128 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7129 assert(!bo->scanout);
7130 assert_tiling(kgem, bo);
7131
7132 kgem_bo_submit(kgem, bo);
7133
7134 /* SHM pixmaps use proxies for subpage offsets */
7135 assert(!bo->purged);
7136 while (bo->proxy)
7137 bo = bo->proxy;
7138 assert(!bo->purged);
7139
7140 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
7141 struct drm_i915_gem_set_domain set_domain;
7142
7143 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7144 __FUNCTION__, bo->handle,
7145 bo->needs_flush, bo->domain,
7146 __kgem_busy(kgem, bo->handle)));
7147
7148 VG_CLEAR(set_domain);
7149 set_domain.handle = bo->handle;
7150 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
7151 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
7152
7153 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7154 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7155 kgem_throttle(kgem);
7156 }
7157 bo->needs_flush = false;
7158 kgem_bo_retire(kgem, bo);
7159 bo->domain = DOMAIN_CPU;
7160 bo->gtt_dirty = true;
7161 }
7162 }
7163
kgem_bo_sync__cpu_full(struct kgem * kgem,struct kgem_bo * bo,bool write)7164 void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
7165 {
7166 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7167 assert(!bo->scanout || !write);
7168 assert_tiling(kgem, bo);
7169
7170 if (write || bo->needs_flush)
7171 kgem_bo_submit(kgem, bo);
7172
7173 /* SHM pixmaps use proxies for subpage offsets */
7174 assert(!bo->purged);
7175 assert(bo->refcnt);
7176 while (bo->proxy)
7177 bo = bo->proxy;
7178 assert(bo->refcnt);
7179 assert(!bo->purged);
7180
7181 if (bo->rq == NULL && (kgem->has_llc || bo->snoop) && !write)
7182 return;
7183
7184 if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
7185 struct drm_i915_gem_set_domain set_domain;
7186
7187 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7188 __FUNCTION__, bo->handle,
7189 bo->needs_flush, bo->domain,
7190 __kgem_busy(kgem, bo->handle)));
7191
7192 VG_CLEAR(set_domain);
7193 set_domain.handle = bo->handle;
7194 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
7195 set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
7196
7197 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7198 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7199 kgem_throttle(kgem);
7200 }
7201 bo->needs_flush = false;
7202 if (write) {
7203 kgem_bo_retire(kgem, bo);
7204 bo->domain = DOMAIN_CPU;
7205 bo->gtt_dirty = true;
7206 } else {
7207 if (bo->exec == NULL)
7208 kgem_bo_maybe_retire(kgem, bo);
7209 bo->domain = DOMAIN_NONE;
7210 }
7211 }
7212 }
7213
kgem_bo_sync__gtt(struct kgem * kgem,struct kgem_bo * bo)7214 void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
7215 {
7216 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
7217 assert(bo->refcnt);
7218 assert(bo->proxy == NULL);
7219 assert_tiling(kgem, bo);
7220 assert(!bo->snoop);
7221
7222 kgem_bo_submit(kgem, bo);
7223
7224 if (bo->domain != DOMAIN_GTT ||
7225 !kgem->has_coherent_mmap_gtt ||
7226 FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
7227 struct drm_i915_gem_set_domain set_domain;
7228
7229 DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
7230 __FUNCTION__, bo->handle,
7231 bo->needs_flush, bo->domain,
7232 __kgem_busy(kgem, bo->handle)));
7233
7234 VG_CLEAR(set_domain);
7235 set_domain.handle = bo->handle;
7236 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
7237 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
7238
7239 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
7240 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
7241 kgem_throttle(kgem);
7242 }
7243 bo->needs_flush = false;
7244 kgem_bo_retire(kgem, bo);
7245 bo->domain = DOMAIN_GTT;
7246 bo->gtt_dirty = true;
7247 }
7248 }
7249
kgem_clear_dirty(struct kgem * kgem)7250 void kgem_clear_dirty(struct kgem *kgem)
7251 {
7252 struct list * const buffers = &kgem->next_request->buffers;
7253 struct kgem_bo *bo;
7254
7255 list_for_each_entry(bo, buffers, request) {
7256 if (!bo->gpu_dirty)
7257 break;
7258
7259 bo->gpu_dirty = false;
7260 }
7261 }
7262
kgem_create_proxy(struct kgem * kgem,struct kgem_bo * target,int offset,int length)7263 struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
7264 struct kgem_bo *target,
7265 int offset, int length)
7266 {
7267 struct kgem_bo *bo;
7268
7269 DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
7270 __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
7271 offset, length, target->io));
7272
7273 bo = __kgem_bo_alloc(target->handle, length);
7274 if (bo == NULL)
7275 return NULL;
7276
7277 bo->unique_id = kgem_get_unique_id(kgem);
7278 bo->reusable = false;
7279 bo->size.bytes = length;
7280
7281 bo->io = target->io && target->proxy == NULL;
7282 bo->gpu_dirty = target->gpu_dirty;
7283 bo->tiling = target->tiling;
7284 bo->pitch = target->pitch;
7285 bo->flush = target->flush;
7286 bo->snoop = target->snoop;
7287
7288 assert(!bo->scanout);
7289 bo->proxy = kgem_bo_reference(target);
7290 bo->delta = offset;
7291
7292 /* Proxies are only tracked for busyness on the current rq */
7293 if (target->exec && !bo->io) {
7294 assert(RQ(target->rq) == kgem->next_request);
7295 list_move_tail(&bo->request, &kgem->next_request->buffers);
7296 bo->exec = &_kgem_dummy_exec;
7297 bo->rq = target->rq;
7298 }
7299
7300 return bo;
7301 }
7302
7303 static struct kgem_buffer *
buffer_alloc(void)7304 buffer_alloc(void)
7305 {
7306 struct kgem_buffer *bo;
7307
7308 bo = malloc(sizeof(*bo));
7309 if (bo == NULL)
7310 return NULL;
7311
7312 bo->mem = NULL;
7313 bo->need_io = false;
7314 bo->mmapped = MMAPPED_CPU;
7315
7316 return bo;
7317 }
7318
7319 static struct kgem_buffer *
buffer_alloc_with_data(int num_pages)7320 buffer_alloc_with_data(int num_pages)
7321 {
7322 struct kgem_buffer *bo;
7323
7324 bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
7325 if (bo == NULL)
7326 return NULL;
7327
7328 bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
7329 bo->mmapped = false;
7330 return bo;
7331 }
7332
7333 static inline bool
use_snoopable_buffer(struct kgem * kgem,uint32_t flags)7334 use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
7335 {
7336 if ((flags & KGEM_BUFFER_WRITE) == 0)
7337 return kgem->gen >= 030;
7338
7339 return true;
7340 }
7341
7342 static void
init_buffer_from_bo(struct kgem_buffer * bo,struct kgem_bo * old)7343 init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
7344 {
7345 DBG(("%s: reusing handle=%d for buffer\n",
7346 __FUNCTION__, old->handle));
7347
7348 assert(old->proxy == NULL);
7349 assert(list_is_empty(&old->list));
7350
7351 memcpy(&bo->base, old, sizeof(*old));
7352 if (old->rq)
7353 list_replace(&old->request, &bo->base.request);
7354 else
7355 list_init(&bo->base.request);
7356 list_replace(&old->vma, &bo->base.vma);
7357 list_init(&bo->base.list);
7358 free(old);
7359
7360 assert(bo->base.tiling == I915_TILING_NONE);
7361
7362 bo->base.refcnt = 1;
7363 }
7364
7365 static struct kgem_buffer *
search_snoopable_buffer(struct kgem * kgem,unsigned alloc)7366 search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
7367 {
7368 struct kgem_buffer *bo;
7369 struct kgem_bo *old;
7370
7371 old = search_snoop_cache(kgem, alloc, 0);
7372 if (old) {
7373 if (!old->io) {
7374 bo = buffer_alloc();
7375 if (bo == NULL)
7376 return NULL;
7377
7378 init_buffer_from_bo(bo, old);
7379 } else {
7380 bo = (struct kgem_buffer *)old;
7381 bo->base.refcnt = 1;
7382 }
7383
7384 DBG(("%s: created CPU handle=%d for buffer, size %d\n",
7385 __FUNCTION__, bo->base.handle, num_pages(&bo->base)));
7386
7387 assert(bo->base.snoop);
7388 assert(bo->base.tiling == I915_TILING_NONE);
7389 assert(num_pages(&bo->base) >= alloc);
7390 assert(bo->mmapped == MMAPPED_CPU);
7391 assert(bo->need_io == false);
7392
7393 bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7394 if (bo->mem == NULL) {
7395 bo->base.refcnt = 0;
7396 kgem_bo_free(kgem, &bo->base);
7397 bo = NULL;
7398 }
7399
7400 return bo;
7401 }
7402
7403 return NULL;
7404 }
7405
7406 static struct kgem_buffer *
create_snoopable_buffer(struct kgem * kgem,unsigned alloc)7407 create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
7408 {
7409 struct kgem_buffer *bo;
7410 uint32_t handle;
7411
7412 if (kgem->has_llc) {
7413 struct kgem_bo *old;
7414
7415 bo = buffer_alloc();
7416 if (bo == NULL)
7417 return NULL;
7418
7419 old = search_linear_cache(kgem, alloc,
7420 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
7421 if (old) {
7422 init_buffer_from_bo(bo, old);
7423 } else {
7424 handle = gem_create(kgem->fd, alloc);
7425 if (handle == 0) {
7426 free(bo);
7427 return NULL;
7428 }
7429
7430 __kgem_bo_init(&bo->base, handle, alloc);
7431 debug_alloc__bo(kgem, &bo->base);
7432 DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
7433 __FUNCTION__, bo->base.handle, alloc));
7434 }
7435
7436 assert(bo->base.refcnt == 1);
7437 assert(bo->mmapped == MMAPPED_CPU);
7438 assert(bo->need_io == false);
7439
7440 bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7441 if (bo->mem != NULL)
7442 return bo;
7443
7444 bo->base.refcnt = 0; /* for valgrind */
7445 kgem_bo_free(kgem, &bo->base);
7446 }
7447
7448 if (kgem->has_caching) {
7449 struct kgem_bo *old;
7450
7451 bo = buffer_alloc();
7452 if (bo == NULL)
7453 return NULL;
7454
7455 old = search_linear_cache(kgem, alloc,
7456 CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
7457 if (old) {
7458 init_buffer_from_bo(bo, old);
7459 } else {
7460 handle = gem_create(kgem->fd, alloc);
7461 if (handle == 0) {
7462 free(bo);
7463 return NULL;
7464 }
7465
7466 __kgem_bo_init(&bo->base, handle, alloc);
7467 debug_alloc__bo(kgem, &bo->base);
7468 DBG(("%s: created CPU handle=%d for buffer, size %d\n",
7469 __FUNCTION__, bo->base.handle, alloc));
7470 }
7471
7472 assert(bo->base.refcnt == 1);
7473 assert(bo->mmapped == MMAPPED_CPU);
7474 assert(bo->need_io == false);
7475 assert(!__kgem_busy(kgem, bo->base.handle));
7476
7477 if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
7478 goto free_caching;
7479
7480 bo->base.snoop = true;
7481
7482 bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7483 if (bo->mem == NULL)
7484 goto free_caching;
7485
7486 return bo;
7487
7488 free_caching:
7489 bo->base.refcnt = 0; /* for valgrind */
7490 kgem_bo_free(kgem, &bo->base);
7491 }
7492
7493 if (kgem->has_userptr) {
7494 bo = buffer_alloc();
7495 if (bo == NULL)
7496 return NULL;
7497
7498 //if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
7499 if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
7500 free(bo);
7501 return NULL;
7502 }
7503
7504 handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
7505 if (handle == 0) {
7506 free(bo->mem);
7507 free(bo);
7508 return NULL;
7509 }
7510
7511 __kgem_bo_init(&bo->base, handle, alloc);
7512 debug_alloc__bo(kgem, &bo->base);
7513 DBG(("%s: created snoop handle=%d for buffer\n",
7514 __FUNCTION__, bo->base.handle));
7515
7516 assert(bo->mmapped == MMAPPED_CPU);
7517 assert(bo->need_io == false);
7518
7519 bo->base.refcnt = 1;
7520 bo->base.snoop = true;
7521 bo->base.map__cpu = MAKE_USER_MAP(bo->mem);
7522
7523 return bo;
7524 }
7525
7526 return NULL;
7527 }
7528
kgem_create_buffer(struct kgem * kgem,uint32_t size,uint32_t flags,void ** ret)7529 struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
7530 uint32_t size, uint32_t flags,
7531 void **ret)
7532 {
7533 struct kgem_buffer *bo;
7534 unsigned offset, alloc;
7535 struct kgem_bo *old;
7536
7537 DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
7538 __FUNCTION__, size, flags,
7539 !!(flags & KGEM_BUFFER_WRITE),
7540 !!(flags & KGEM_BUFFER_INPLACE),
7541 !!(flags & KGEM_BUFFER_LAST)));
7542 assert(size);
7543 /* we should never be asked to create anything TOO large */
7544 assert(size <= kgem->max_object_size);
7545
7546 #if !DBG_NO_UPLOAD_CACHE
7547 list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
7548 assert(bo->base.io);
7549 assert(bo->base.refcnt >= 1);
7550
7551 /* We can reuse any write buffer which we can fit */
7552 if (flags == KGEM_BUFFER_LAST &&
7553 bo->write == KGEM_BUFFER_WRITE &&
7554 bo->base.refcnt == 1 &&
7555 bo->mmapped == MMAPPED_NONE &&
7556 size <= bytes(&bo->base)) {
7557 DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
7558 __FUNCTION__, size, bo->used, bytes(&bo->base)));
7559 gem_write__cachealigned(kgem->fd, bo->base.handle,
7560 0, bo->used, bo->mem);
7561 assert(list_is_empty(&bo->base.vma));
7562 bo->need_io = 0;
7563 bo->write = 0;
7564 offset = 0;
7565 bo->used = size;
7566 goto done;
7567 }
7568
7569 if (flags & KGEM_BUFFER_WRITE) {
7570 if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
7571 (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
7572 !bo->base.snoop)) {
7573 DBG(("%s: skip write %x buffer, need %x\n",
7574 __FUNCTION__, bo->write, flags));
7575 continue;
7576 }
7577 assert(bo->mmapped || bo->need_io);
7578 } else {
7579 if (bo->write & KGEM_BUFFER_WRITE) {
7580 DBG(("%s: skip write %x buffer, need %x\n",
7581 __FUNCTION__, bo->write, flags));
7582 continue;
7583 }
7584 }
7585
7586 if (bo->used + size <= bytes(&bo->base)) {
7587 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
7588 __FUNCTION__, bo->used, size, bytes(&bo->base)));
7589 offset = bo->used;
7590 bo->used += size;
7591 goto done;
7592 }
7593 }
7594
7595 if (flags & KGEM_BUFFER_WRITE) {
7596 list_for_each_entry(bo, &kgem->active_buffers, base.list) {
7597 assert(bo->base.io);
7598 assert(bo->base.refcnt >= 1);
7599 assert(bo->base.exec == NULL);
7600 assert(bo->mmapped);
7601 assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop);
7602
7603 if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) {
7604 DBG(("%s: skip write %x buffer, need %x\n",
7605 __FUNCTION__, bo->write, flags));
7606 continue;
7607 }
7608
7609 if (bo->used + size <= bytes(&bo->base)) {
7610 DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
7611 __FUNCTION__, bo->used, size, bytes(&bo->base)));
7612 offset = bo->used;
7613 bo->used += size;
7614 list_move(&bo->base.list, &kgem->batch_buffers);
7615 goto done;
7616 }
7617
7618 if (bo->base.refcnt == 1 &&
7619 size <= bytes(&bo->base) &&
7620 (bo->base.rq == NULL ||
7621 !__kgem_busy(kgem, bo->base.handle))) {
7622 DBG(("%s: reusing whole buffer? size=%d, total=%d\n",
7623 __FUNCTION__, size, bytes(&bo->base)));
7624 __kgem_bo_clear_busy(&bo->base);
7625 assert(list_is_empty(&bo->base.vma));
7626
7627 switch (bo->mmapped) {
7628 case MMAPPED_CPU:
7629 kgem_bo_sync__cpu(kgem, &bo->base);
7630 break;
7631 case MMAPPED_GTT:
7632 kgem_bo_sync__gtt(kgem, &bo->base);
7633 break;
7634 }
7635
7636 offset = 0;
7637 bo->used = size;
7638 list_move(&bo->base.list, &kgem->batch_buffers);
7639 goto done;
7640 }
7641 }
7642 }
7643 #endif
7644
7645 #if !DBG_NO_MAP_UPLOAD
7646 /* Be a little more generous and hope to hold fewer mmappings */
7647 alloc = ALIGN(2*size, kgem->buffer_size);
7648 if (alloc > MAX_CACHE_SIZE)
7649 alloc = ALIGN(size, kgem->buffer_size);
7650 if (alloc > MAX_CACHE_SIZE)
7651 alloc = PAGE_ALIGN(size);
7652 assert(alloc);
7653
7654 alloc /= PAGE_SIZE;
7655 if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
7656 flags &= ~KGEM_BUFFER_INPLACE;
7657
7658 if (kgem->has_llc &&
7659 (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
7660 bo = buffer_alloc();
7661 if (bo == NULL)
7662 goto skip_llc;
7663
7664 old = NULL;
7665 if ((flags & KGEM_BUFFER_WRITE) == 0)
7666 old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
7667 if (old == NULL)
7668 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
7669 if (old == NULL)
7670 old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
7671 if (old) {
7672 DBG(("%s: found LLC handle=%d for buffer\n",
7673 __FUNCTION__, old->handle));
7674
7675 init_buffer_from_bo(bo, old);
7676 } else {
7677 uint32_t handle = gem_create(kgem->fd, alloc);
7678 if (handle == 0) {
7679 free(bo);
7680 goto skip_llc;
7681 }
7682 __kgem_bo_init(&bo->base, handle, alloc);
7683 debug_alloc__bo(kgem, &bo->base);
7684 DBG(("%s: created LLC handle=%d for buffer\n",
7685 __FUNCTION__, bo->base.handle));
7686 }
7687
7688 assert(bo->mmapped);
7689 assert(!bo->need_io);
7690
7691 bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7692 if (bo->mem) {
7693 if (flags & KGEM_BUFFER_WRITE)
7694 kgem_bo_sync__cpu(kgem, &bo->base);
7695 flags &= ~KGEM_BUFFER_INPLACE;
7696 goto init;
7697 } else {
7698 bo->base.refcnt = 0; /* for valgrind */
7699 kgem_bo_free(kgem, &bo->base);
7700 }
7701 }
7702 skip_llc:
7703
7704 if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
7705 /* The issue with using a GTT upload buffer is that we may
7706 * cause eviction-stalls in order to free up some GTT space.
7707 * An is-mappable? ioctl could help us detect when we are
7708 * about to block, or some per-page magic in the kernel.
7709 *
7710 * XXX This is especially noticeable on memory constrained
7711 * devices like gen2 or with relatively slow gpu like i3.
7712 */
7713 DBG(("%s: searching for an inactive GTT map for upload\n",
7714 __FUNCTION__));
7715 old = search_linear_cache(kgem, alloc,
7716 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7717 #if HAVE_I915_GEM_BUFFER_INFO
7718 if (old) {
7719 struct drm_i915_gem_buffer_info info;
7720
7721 /* An example of such a non-blocking ioctl might work */
7722
7723 VG_CLEAR(info);
7724 info.handle = handle;
7725 if (do_ioctl(kgem->fd,
7726 DRM_IOCTL_I915_GEM_BUFFER_INFO,
7727 &fino) == 0) {
7728 old->presumed_offset = info.addr;
7729 if ((info.flags & I915_GEM_MAPPABLE) == 0) {
7730 kgem_bo_move_to_inactive(kgem, old);
7731 old = NULL;
7732 }
7733 }
7734 }
7735 #endif
7736 if (old == NULL)
7737 old = search_linear_cache(kgem, NUM_PAGES(size),
7738 CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
7739 if (old == NULL) {
7740 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7741 if (old && !kgem_bo_can_map(kgem, old)) {
7742 _kgem_bo_destroy(kgem, old);
7743 old = NULL;
7744 }
7745 }
7746 if (old) {
7747 DBG(("%s: reusing handle=%d for buffer\n",
7748 __FUNCTION__, old->handle));
7749 assert(kgem_bo_can_map(kgem, old));
7750 assert(!old->snoop);
7751 assert(old->rq == NULL);
7752
7753 bo = buffer_alloc();
7754 if (bo == NULL)
7755 return NULL;
7756
7757 init_buffer_from_bo(bo, old);
7758 assert(num_pages(&bo->base) >= NUM_PAGES(size));
7759
7760 assert(bo->mmapped);
7761 assert(bo->base.refcnt == 1);
7762
7763 bo->mem = kgem_bo_map(kgem, &bo->base);
7764 if (bo->mem) {
7765 if (bo->mem == MAP(bo->base.map__cpu))
7766 flags &= ~KGEM_BUFFER_INPLACE;
7767 else
7768 bo->mmapped = MMAPPED_GTT;
7769 goto init;
7770 } else {
7771 bo->base.refcnt = 0;
7772 kgem_bo_free(kgem, &bo->base);
7773 }
7774 }
7775 }
7776 #else
7777 flags &= ~KGEM_BUFFER_INPLACE;
7778 #endif
7779 /* Be more parsimonious with pwrite/pread/cacheable buffers */
7780 if ((flags & KGEM_BUFFER_INPLACE) == 0)
7781 alloc = NUM_PAGES(size);
7782
7783 if (use_snoopable_buffer(kgem, flags)) {
7784 bo = search_snoopable_buffer(kgem, alloc);
7785 if (bo) {
7786 if (flags & KGEM_BUFFER_WRITE)
7787 kgem_bo_sync__cpu(kgem, &bo->base);
7788 flags &= ~KGEM_BUFFER_INPLACE;
7789 goto init;
7790 }
7791
7792 if ((flags & KGEM_BUFFER_INPLACE) == 0) {
7793 bo = create_snoopable_buffer(kgem, alloc);
7794 if (bo)
7795 goto init;
7796 }
7797 }
7798
7799 flags &= ~KGEM_BUFFER_INPLACE;
7800
7801 old = NULL;
7802 if ((flags & KGEM_BUFFER_WRITE) == 0)
7803 old = search_linear_cache(kgem, alloc, 0);
7804 if (old == NULL)
7805 old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
7806 if (old) {
7807 DBG(("%s: reusing ordinary handle %d for io\n",
7808 __FUNCTION__, old->handle));
7809 bo = buffer_alloc_with_data(num_pages(old));
7810 if (bo == NULL)
7811 return NULL;
7812
7813 init_buffer_from_bo(bo, old);
7814 bo->need_io = flags & KGEM_BUFFER_WRITE;
7815 } else {
7816 unsigned hint;
7817
7818 if (use_snoopable_buffer(kgem, flags)) {
7819 bo = create_snoopable_buffer(kgem, alloc);
7820 if (bo)
7821 goto init;
7822 }
7823
7824 bo = buffer_alloc();
7825 if (bo == NULL)
7826 return NULL;
7827
7828 hint = CREATE_INACTIVE;
7829 if (flags & KGEM_BUFFER_WRITE)
7830 hint |= CREATE_CPU_MAP;
7831 old = search_linear_cache(kgem, alloc, hint);
7832 if (old) {
7833 DBG(("%s: reusing handle=%d for buffer\n",
7834 __FUNCTION__, old->handle));
7835
7836 init_buffer_from_bo(bo, old);
7837 } else {
7838 uint32_t handle = gem_create(kgem->fd, alloc);
7839 if (handle == 0) {
7840 free(bo);
7841 return NULL;
7842 }
7843
7844 DBG(("%s: created handle=%d for buffer\n",
7845 __FUNCTION__, handle));
7846
7847 __kgem_bo_init(&bo->base, handle, alloc);
7848 debug_alloc__bo(kgem, &bo->base);
7849 }
7850
7851 assert(bo->mmapped);
7852 assert(!bo->need_io);
7853 assert(bo->base.refcnt == 1);
7854
7855 if (flags & KGEM_BUFFER_WRITE) {
7856 bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
7857 if (bo->mem != NULL) {
7858 kgem_bo_sync__cpu(kgem, &bo->base);
7859 goto init;
7860 }
7861 }
7862
7863 DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
7864 old = &bo->base;
7865 bo = buffer_alloc_with_data(num_pages(old));
7866 if (bo == NULL) {
7867 old->refcnt= 0;
7868 kgem_bo_free(kgem, old);
7869 return NULL;
7870 }
7871
7872 init_buffer_from_bo(bo, old);
7873
7874 assert(bo->mem);
7875 assert(!bo->mmapped);
7876 assert(bo->base.refcnt == 1);
7877
7878 bo->need_io = flags & KGEM_BUFFER_WRITE;
7879 }
7880 init:
7881 bo->base.io = true;
7882 assert(bo->base.refcnt == 1);
7883 assert(num_pages(&bo->base) >= NUM_PAGES(size));
7884 assert(!bo->need_io || !bo->base.needs_flush);
7885 assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
7886 assert(bo->mem);
7887 assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
7888 assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
7889
7890 bo->used = size;
7891 bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
7892 offset = 0;
7893
7894 assert(list_is_empty(&bo->base.list));
7895 list_add(&bo->base.list, &kgem->batch_buffers);
7896
7897 DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
7898 __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
7899
7900 done:
7901 bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
7902 assert(bo->used && bo->used <= bytes(&bo->base));
7903 assert(bo->mem);
7904 *ret = (char *)bo->mem + offset;
7905 return kgem_create_proxy(kgem, &bo->base, offset, size);
7906 }
7907
kgem_buffer_is_inplace(struct kgem_bo * _bo)7908 bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
7909 {
7910 struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
7911 return bo->write & KGEM_BUFFER_WRITE_INPLACE;
7912 }
7913
kgem_create_buffer_2d(struct kgem * kgem,int width,int height,int bpp,uint32_t flags,void ** ret)7914 struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
7915 int width, int height, int bpp,
7916 uint32_t flags,
7917 void **ret)
7918 {
7919 struct kgem_bo *bo;
7920 int stride;
7921
7922 assert(width > 0 && height > 0);
7923 assert(ret != NULL);
7924 stride = ALIGN(width, 2) * bpp >> 3;
7925 stride = ALIGN(stride, kgem->gen >= 0100 ? 32 : 4);
7926
7927 DBG(("%s: %dx%d, %d bpp, stride=%d\n",
7928 __FUNCTION__, width, height, bpp, stride));
7929
7930 bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
7931 if (bo == NULL) {
7932 DBG(("%s: allocation failure for upload buffer\n",
7933 __FUNCTION__));
7934 return NULL;
7935 }
7936 assert(*ret != NULL);
7937 assert(bo->proxy != NULL);
7938
7939 if (height & 1) {
7940 struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
7941 int min;
7942
7943 assert(io->used);
7944
7945 /* Having padded this surface to ensure that accesses to
7946 * the last pair of rows is valid, remove the padding so
7947 * that it can be allocated to other pixmaps.
7948 */
7949 min = bo->delta + height * stride;
7950 min = ALIGN(min, UPLOAD_ALIGNMENT);
7951 if (io->used != min) {
7952 DBG(("%s: trimming buffer from %d to %d\n",
7953 __FUNCTION__, io->used, min));
7954 io->used = min;
7955 }
7956 bo->size.bytes -= stride;
7957 }
7958
7959 bo->map__cpu = *ret;
7960 bo->pitch = stride;
7961 bo->unique_id = kgem_get_unique_id(kgem);
7962 return bo;
7963 }
7964
kgem_upload_source_image(struct kgem * kgem,const void * data,const BoxRec * box,int stride,int bpp)7965 struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
7966 const void *data,
7967 const BoxRec *box,
7968 int stride, int bpp)
7969 {
7970 int width = box->x2 - box->x1;
7971 int height = box->y2 - box->y1;
7972 struct kgem_bo *bo;
7973 void *dst;
7974
7975 if (!kgem_can_create_2d(kgem, width, height, bpp))
7976 return NULL;
7977
7978 DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
7979 __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
7980
7981 assert(data);
7982 assert(width > 0);
7983 assert(height > 0);
7984 assert(stride);
7985 assert(bpp);
7986
7987 bo = kgem_create_buffer_2d(kgem,
7988 width, height, bpp,
7989 KGEM_BUFFER_WRITE_INPLACE, &dst);
7990 if (bo == NULL)
7991 return NULL;
7992
7993 if (sigtrap_get()) {
7994 kgem_bo_destroy(kgem, bo);
7995 return NULL;
7996 }
7997
7998 memcpy_blt(data, dst, bpp,
7999 stride, bo->pitch,
8000 box->x1, box->y1,
8001 0, 0,
8002 width, height);
8003
8004 sigtrap_put();
8005 return bo;
8006 }
8007
kgem_proxy_bo_attach(struct kgem_bo * bo,struct kgem_bo ** ptr)8008 void kgem_proxy_bo_attach(struct kgem_bo *bo,
8009 struct kgem_bo **ptr)
8010 {
8011 DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
8012 assert(bo->map__gtt == NULL);
8013 assert(bo->proxy);
8014 list_add(&bo->vma, &bo->proxy->vma);
8015 bo->map__gtt = ptr;
8016 *ptr = kgem_bo_reference(bo);
8017 }
8018
kgem_buffer_read_sync(struct kgem * kgem,struct kgem_bo * _bo)8019 void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
8020 {
8021 struct kgem_buffer *bo;
8022 uint32_t offset = _bo->delta, length = _bo->size.bytes;
8023
8024 /* We expect the caller to have already submitted the batch */
8025 assert(_bo->io);
8026 assert(_bo->exec == NULL);
8027 assert(_bo->rq == NULL);
8028 assert(_bo->proxy);
8029
8030 _bo = _bo->proxy;
8031 assert(_bo->proxy == NULL);
8032 assert(_bo->exec == NULL);
8033
8034 bo = (struct kgem_buffer *)_bo;
8035
8036 DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
8037 offset, length, bo->base.snoop));
8038
8039 if (bo->mmapped) {
8040 struct drm_i915_gem_set_domain set_domain;
8041
8042 DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
8043 __FUNCTION__,
8044 bo->base.needs_flush,
8045 bo->base.domain,
8046 __kgem_busy(kgem, bo->base.handle)));
8047
8048 assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc);
8049
8050 VG_CLEAR(set_domain);
8051 set_domain.handle = bo->base.handle;
8052 set_domain.write_domain = 0;
8053 set_domain.read_domains =
8054 bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
8055
8056 if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
8057 DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
8058 kgem_throttle(kgem);
8059 }
8060 } else {
8061 if (gem_read(kgem->fd,
8062 bo->base.handle, (char *)bo->mem+offset,
8063 offset, length))
8064 return;
8065 }
8066 kgem_bo_maybe_retire(kgem, &bo->base);
8067 bo->base.domain = DOMAIN_NONE;
8068 }
8069
kgem_bo_get_binding(struct kgem_bo * bo,uint32_t format)8070 uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
8071 {
8072 struct kgem_bo_binding *b;
8073
8074 assert(bo->refcnt);
8075
8076 for (b = &bo->binding; b && b->offset; b = b->next)
8077 if (format == b->format)
8078 return b->offset;
8079
8080 return 0;
8081 }
8082
kgem_bo_set_binding(struct kgem_bo * bo,uint32_t format,uint16_t offset)8083 void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
8084 {
8085 struct kgem_bo_binding *b;
8086
8087 assert(bo->refcnt);
8088
8089 for (b = &bo->binding; b; b = b->next) {
8090 if (b->offset)
8091 continue;
8092
8093 b->offset = offset;
8094 b->format = format;
8095
8096 if (b->next)
8097 b->next->offset = 0;
8098
8099 return;
8100 }
8101
8102 b = malloc(sizeof(*b));
8103 if (b) {
8104 b->next = bo->binding.next;
8105 b->format = format;
8106 b->offset = offset;
8107 bo->binding.next = b;
8108 }
8109 }
8110
8111 struct kgem_bo *
kgem_replace_bo(struct kgem * kgem,struct kgem_bo * src,uint32_t width,uint32_t height,uint32_t pitch,uint32_t bpp)8112 kgem_replace_bo(struct kgem *kgem,
8113 struct kgem_bo *src,
8114 uint32_t width,
8115 uint32_t height,
8116 uint32_t pitch,
8117 uint32_t bpp)
8118 {
8119 struct kgem_bo *dst;
8120 uint32_t br00, br13;
8121 uint32_t handle;
8122 uint32_t size;
8123 uint32_t *b;
8124
8125 DBG(("%s: replacing bo handle=%d, size=%dx%d pitch=%d, with pitch=%d\n",
8126 __FUNCTION__, src->handle, width, height, src->pitch, pitch));
8127
8128 /* We only expect to be called to fixup small buffers, hence why
8129 * we only attempt to allocate a linear bo.
8130 */
8131 assert(src->tiling == I915_TILING_NONE);
8132 assert(kgem_bo_can_blt(kgem, src));
8133
8134 size = height * pitch;
8135 size = NUM_PAGES(size);
8136
8137 dst = search_linear_cache(kgem, size, 0);
8138 if (dst == NULL)
8139 dst = search_linear_cache(kgem, size, CREATE_INACTIVE);
8140 if (dst == NULL) {
8141 handle = gem_create(kgem->fd, size);
8142 if (handle == 0)
8143 return NULL;
8144
8145 dst = __kgem_bo_alloc(handle, size);
8146 if (dst == NULL) {
8147 gem_close(kgem->fd, handle);
8148 return NULL;
8149 }
8150
8151 debug_alloc__bo(kgem, dst);
8152 }
8153 dst->pitch = pitch;
8154 dst->unique_id = kgem_get_unique_id(kgem);
8155 dst->refcnt = 1;
8156 assert(dst->tiling == I915_TILING_NONE);
8157 assert(kgem_bo_can_blt(kgem, dst));
8158
8159 kgem_set_mode(kgem, KGEM_BLT, dst);
8160 if (!kgem_check_batch(kgem, 10) ||
8161 !kgem_check_reloc(kgem, 2) ||
8162 !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
8163 kgem_submit(kgem);
8164 if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
8165 kgem_bo_destroy(kgem, dst);
8166 return NULL;
8167 }
8168 _kgem_set_mode(kgem, KGEM_BLT);
8169 }
8170 kgem_bcs_set_tiling(kgem, src, dst);
8171
8172 br00 = XY_SRC_COPY_BLT_CMD;
8173 br13 = pitch;
8174 pitch = src->pitch;
8175 if (kgem->gen >= 040 && src->tiling) {
8176 br00 |= BLT_SRC_TILED;
8177 pitch >>= 2;
8178 }
8179
8180 br13 |= 0xcc << 16;
8181 br13 |= sna_br13_color_depth(bpp);
8182 if (bpp == 32)
8183 br00 |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
8184
8185 b = kgem->batch + kgem->nbatch;
8186 if (kgem->gen >= 0100) {
8187 b[0] = br00 | 8;
8188 b[1] = br13;
8189 b[2] = 0;
8190 b[3] = height << 16 | width;
8191 *(uint64_t *)(b+4) =
8192 kgem_add_reloc64(kgem, kgem->nbatch + 4, dst,
8193 I915_GEM_DOMAIN_RENDER << 16 |
8194 I915_GEM_DOMAIN_RENDER |
8195 KGEM_RELOC_FENCED,
8196 0);
8197 b[6] = 0;
8198 b[7] = pitch;
8199 *(uint64_t *)(b+8) =
8200 kgem_add_reloc64(kgem, kgem->nbatch + 8, src,
8201 I915_GEM_DOMAIN_RENDER << 16 |
8202 KGEM_RELOC_FENCED,
8203 0);
8204 kgem->nbatch += 10;
8205 } else {
8206 b[0] = br00 | 6;
8207 b[1] = br13;
8208 b[2] = 0;
8209 b[3] = height << 16 | width;
8210 b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
8211 I915_GEM_DOMAIN_RENDER << 16 |
8212 I915_GEM_DOMAIN_RENDER |
8213 KGEM_RELOC_FENCED,
8214 0);
8215 b[5] = 0;
8216 b[6] = pitch;
8217 b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
8218 I915_GEM_DOMAIN_RENDER << 16 |
8219 KGEM_RELOC_FENCED,
8220 0);
8221 kgem->nbatch += 8;
8222 }
8223
8224 return dst;
8225 }
8226
kgem_bo_convert_to_gpu(struct kgem * kgem,struct kgem_bo * bo,unsigned flags)8227 bool kgem_bo_convert_to_gpu(struct kgem *kgem,
8228 struct kgem_bo *bo,
8229 unsigned flags)
8230 {
8231 DBG(("%s: converting handle=%d from CPU to GPU, flags=%x, busy?=%d\n",
8232 __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo)));
8233 assert(bo->tiling == I915_TILING_NONE);
8234
8235 if (flags & (__MOVE_PRIME | __MOVE_SCANOUT))
8236 return false;
8237
8238 if (kgem->has_llc)
8239 return true;
8240
8241 if (flags & MOVE_ASYNC_HINT && __kgem_bo_is_busy(kgem, bo))
8242 return false;
8243
8244 assert(bo->snoop);
8245
8246 kgem_bo_submit(kgem, bo);
8247
8248 if (!gem_set_caching(kgem->fd, bo->handle, UNCACHED))
8249 return false;
8250
8251 bo->snoop = false;
8252 return true;
8253 }
8254