1 /**************************************************************************
2  *
3  * Copyright � 2007 Red Hat Inc.
4  * Copyright � 2007 Intel Corporation
5  * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  *
29  **************************************************************************/
30 /*
31  * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32  *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33  *	    Eric Anholt <eric@anholt.net>
34  *	    Dave Airlie <airlied@linux.ie>
35  */
36 
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40 
41 #include <xf86drm.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <assert.h>
48 #include <pthread.h>
49 #include <sys/ioctl.h>
50 #include <sys/mman.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 
54 #include "errno.h"
55 #include "libdrm_lists.h"
56 #include "intel_bufmgr.h"
57 #include "intel_bufmgr_priv.h"
58 #include "intel_chipset.h"
59 #include "string.h"
60 
61 #include "i915_drm.h"
62 
63 #define DBG(...) do {					\
64    if (bufmgr_gem->bufmgr.debug)			\
65       fprintf(stderr, __VA_ARGS__);			\
66 } while (0)
67 
68 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
69 
70 struct drm_intel_gem_bo_bucket {
71    drmMMListHead head;
72 
73    /**
74     * Limit on the number of entries in this bucket.
75     *
76     * 0 means that this caching at this bucket size is disabled.
77     * -1 means that there is no limit to caching at this size.
78     */
79    int max_entries;
80    int num_entries;
81 };
82 
83 /* Only cache objects up to 64MB.  Bigger than that, and the rounding of the
84  * size makes many operations fail that wouldn't otherwise.
85  */
86 #define DRM_INTEL_GEM_BO_BUCKETS	14
87 typedef struct _drm_intel_bufmgr_gem {
88     drm_intel_bufmgr bufmgr;
89 
90     int fd;
91 
92     int max_relocs;
93 
94     pthread_mutex_t lock;
95 
96     struct drm_i915_gem_exec_object *exec_objects;
97     drm_intel_bo **exec_bos;
98     int exec_size;
99     int exec_count;
100 
101     /** Array of lists of cached gem objects of power-of-two sizes */
102     struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
103 
104     uint64_t gtt_size;
105     int available_fences;
106     int pci_device;
107 } drm_intel_bufmgr_gem;
108 
109 struct _drm_intel_bo_gem {
110     drm_intel_bo bo;
111 
112     int refcount;
113     /** Boolean whether the mmap ioctl has been called for this buffer yet. */
114     uint32_t gem_handle;
115     const char *name;
116 
117     /**
118      * Kenel-assigned global name for this object
119      */
120     unsigned int global_name;
121 
122     /**
123      * Index of the buffer within the validation list while preparing a
124      * batchbuffer execution.
125      */
126     int validate_index;
127 
128     /**
129      * Boolean whether we've started swrast
130      * Set when the buffer has been mapped
131      * Cleared when the buffer is unmapped
132      */
133     int swrast;
134 
135     /**
136      * Current tiling mode
137      */
138     uint32_t tiling_mode;
139     uint32_t swizzle_mode;
140 
141     /** Array passed to the DRM containing relocation information. */
142     struct drm_i915_gem_relocation_entry *relocs;
143     /** Array of bos corresponding to relocs[i].target_handle */
144     drm_intel_bo **reloc_target_bo;
145     /** Number of entries in relocs */
146     int reloc_count;
147     /** Mapped address for the buffer, saved across map/unmap cycles */
148     void *mem_virtual;
149     /** GTT virtual address for the buffer, saved across map/unmap cycles */
150     void *gtt_virtual;
151 
152     /** BO cache list */
153     drmMMListHead head;
154 
155     /**
156      * Boolean of whether this BO and its children have been included in
157      * the current drm_intel_bufmgr_check_aperture_space() total.
158      */
159     char included_in_check_aperture;
160 
161     /**
162      * Boolean of whether this buffer has been used as a relocation
163      * target and had its size accounted for, and thus can't have any
164      * further relocations added to it.
165      */
166      char used_as_reloc_target;
167 
168     /**
169      * Boolean of whether this buffer can be re-used
170      */
171     char reusable;
172 
173     /**
174      * Size in bytes of this buffer and its relocation descendents.
175      *
176      * Used to avoid costly tree walking in drm_intel_bufmgr_check_aperture in
177      * the common case.
178      */
179     int reloc_tree_size;
180     /**
181      * Number of potential fence registers required by this buffer and its
182      * relocations.
183      */
184     int reloc_tree_fences;
185 };
186 
187 static void drm_intel_gem_bo_reference_locked(drm_intel_bo *bo);
188 
189 static unsigned int
190 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count);
191 
192 static unsigned int
193 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count);
194 
195 static int
196 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
197 			    uint32_t *swizzle_mode);
198 
199 static int
200 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
201 			    uint32_t stride);
202 
203 static void
204 drm_intel_gem_bo_unreference(drm_intel_bo *bo);
205 
206 static int
logbase2(int n)207 logbase2(int n)
208 {
209    int i = 1;
210    int log2 = 0;
211 
212    while (n > i) {
213       i *= 2;
214       log2++;
215    }
216 
217    return log2;
218 }
219 
220 static struct drm_intel_gem_bo_bucket *
drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem * bufmgr_gem,unsigned long size)221 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
222 				 unsigned long size)
223 {
224     int i;
225 
226     /* We only do buckets in power of two increments */
227     if ((size & (size - 1)) != 0)
228 	return NULL;
229 
230     /* We should only see sizes rounded to pages. */
231     assert((size % 4096) == 0);
232 
233     /* We always allocate in units of pages */
234     i = ffs(size / 4096) - 1;
235     if (i >= DRM_INTEL_GEM_BO_BUCKETS)
236 	return NULL;
237 
238     return &bufmgr_gem->cache_bucket[i];
239 }
240 
241 
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem * bufmgr_gem)242 static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
243 {
244     int i, j;
245 
246     for (i = 0; i < bufmgr_gem->exec_count; i++) {
247 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
248 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
249 
250 	if (bo_gem->relocs == NULL) {
251 	    DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name);
252 	    continue;
253 	}
254 
255 	for (j = 0; j < bo_gem->reloc_count; j++) {
256 	    drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j];
257 	    drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo;
258 
259 	    DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
260 		i,
261 		bo_gem->gem_handle, bo_gem->name,
262 		(unsigned long long)bo_gem->relocs[j].offset,
263 		target_gem->gem_handle, target_gem->name, target_bo->offset,
264 		bo_gem->relocs[j].delta);
265 	}
266     }
267 }
268 
269 /**
270  * Adds the given buffer to the list of buffers to be validated (moved into the
271  * appropriate memory type) with the next batch submission.
272  *
273  * If a buffer is validated multiple times in a batch submission, it ends up
274  * with the intersection of the memory type flags and the union of the
275  * access flags.
276  */
277 static void
drm_intel_add_validate_buffer(drm_intel_bo * bo)278 drm_intel_add_validate_buffer(drm_intel_bo *bo)
279 {
280     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
281     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
282     int index;
283 
284     if (bo_gem->validate_index != -1)
285 	return;
286 
287     /* Extend the array of validation entries as necessary. */
288     if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
289 	int new_size = bufmgr_gem->exec_size * 2;
290 
291 	if (new_size == 0)
292 	    new_size = 5;
293 
294 	bufmgr_gem->exec_objects =
295 	    realloc(bufmgr_gem->exec_objects,
296 		    sizeof(*bufmgr_gem->exec_objects) * new_size);
297 	bufmgr_gem->exec_bos =
298 	    realloc(bufmgr_gem->exec_bos,
299 		    sizeof(*bufmgr_gem->exec_bos) * new_size);
300 	bufmgr_gem->exec_size = new_size;
301     }
302 
303     index = bufmgr_gem->exec_count;
304     bo_gem->validate_index = index;
305     /* Fill in array entry */
306     bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
307     bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
308     bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
309     bufmgr_gem->exec_objects[index].alignment = 0;
310     bufmgr_gem->exec_objects[index].offset = 0;
311     bufmgr_gem->exec_bos[index] = bo;
312     drm_intel_gem_bo_reference_locked(bo);
313     bufmgr_gem->exec_count++;
314 }
315 
316 
317 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
318 	sizeof(uint32_t))
319 
320 static int
drm_intel_setup_reloc_list(drm_intel_bo * bo)321 drm_intel_setup_reloc_list(drm_intel_bo *bo)
322 {
323     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
324     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
325 
326     bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
327 			    sizeof(struct drm_i915_gem_relocation_entry));
328     bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs *
329 				     sizeof(drm_intel_bo *));
330 
331     return 0;
332 }
333 
334 static drm_intel_bo *
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment,int for_render)335 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
336 				unsigned long size, unsigned int alignment,
337 				int for_render)
338 {
339     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
340     drm_intel_bo_gem *bo_gem;
341     unsigned int page_size = getpagesize();
342     int ret;
343     struct drm_intel_gem_bo_bucket *bucket;
344     int alloc_from_cache = 0;
345     unsigned long bo_size;
346 
347     /* Round the allocated size up to a power of two number of pages. */
348     bo_size = 1 << logbase2(size);
349     if (bo_size < page_size)
350 	bo_size = page_size;
351     bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo_size);
352 
353     /* If we don't have caching at this size, don't actually round the
354      * allocation up.
355      */
356     if (bucket == NULL || bucket->max_entries == 0) {
357 	bo_size = size;
358 	if (bo_size < page_size)
359 	    bo_size = page_size;
360     }
361 
362     pthread_mutex_lock(&bufmgr_gem->lock);
363     /* Get a buffer out of the cache if available */
364     if (bucket != NULL && bucket->num_entries > 0) {
365 	struct drm_i915_gem_busy busy;
366 
367 	if (for_render) {
368 	    /* Allocate new render-target BOs from the tail (MRU)
369 	     * of the list, as it will likely be hot in the GPU cache
370 	     * and in the aperture for us.
371 	     */
372 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head);
373 	    DRMLISTDEL(&bo_gem->head);
374 	    bucket->num_entries--;
375 	    alloc_from_cache = 1;
376 	} else {
377 	    /* For non-render-target BOs (where we're probably going to map it
378 	     * first thing in order to fill it with data), check if the
379 	     * last BO in the cache is unbusy, and only reuse in that case.
380 	     * Otherwise, allocating a new buffer is probably faster than
381 	     * waiting for the GPU to finish.
382 	     */
383 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
384 
385 	    memset(&busy, 0, sizeof(busy));
386 	    busy.handle = bo_gem->gem_handle;
387 
388 	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
389 	    alloc_from_cache = (ret == 0 && busy.busy == 0);
390 
391 	    if (alloc_from_cache) {
392 		DRMLISTDEL(&bo_gem->head);
393 		bucket->num_entries--;
394 	    }
395 	}
396     }
397     pthread_mutex_unlock(&bufmgr_gem->lock);
398 
399     if (!alloc_from_cache) {
400 	struct drm_i915_gem_create create;
401 
402 	bo_gem = calloc(1, sizeof(*bo_gem));
403 	if (!bo_gem)
404 	    return NULL;
405 
406 	bo_gem->bo.size = bo_size;
407 	memset(&create, 0, sizeof(create));
408 	create.size = bo_size;
409 
410 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
411 	bo_gem->gem_handle = create.handle;
412 	bo_gem->bo.handle = bo_gem->gem_handle;
413 	if (ret != 0) {
414 	    free(bo_gem);
415 	    return NULL;
416 	}
417 	bo_gem->bo.bufmgr = bufmgr;
418     }
419 
420     bo_gem->name = name;
421     bo_gem->refcount = 1;
422     bo_gem->validate_index = -1;
423     bo_gem->reloc_tree_size = bo_gem->bo.size;
424     bo_gem->reloc_tree_fences = 0;
425     bo_gem->used_as_reloc_target = 0;
426     bo_gem->tiling_mode = I915_TILING_NONE;
427     bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
428     bo_gem->reusable = 1;
429 
430     DBG("bo_create: buf %d (%s) %ldb\n",
431 	bo_gem->gem_handle, bo_gem->name, size);
432 
433     return &bo_gem->bo;
434 }
435 
436 static drm_intel_bo *
drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)437 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
438 				  unsigned long size, unsigned int alignment)
439 {
440     return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
441 }
442 
443 static drm_intel_bo *
drm_intel_gem_bo_alloc(drm_intel_bufmgr * bufmgr,const char * name,unsigned long size,unsigned int alignment)444 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
445 		       unsigned long size, unsigned int alignment)
446 {
447     return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
448 }
449 
450 /**
451  * Returns a drm_intel_bo wrapping the given buffer object handle.
452  *
453  * This can be used when one application needs to pass a buffer object
454  * to another.
455  */
456 drm_intel_bo *
drm_intel_bo_gem_create_from_name(drm_intel_bufmgr * bufmgr,const char * name,unsigned int handle)457 drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name,
458 				  unsigned int handle)
459 {
460     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
461     drm_intel_bo_gem *bo_gem;
462     int ret;
463     struct drm_gem_open open_arg;
464     struct drm_i915_gem_get_tiling get_tiling;
465 
466     bo_gem = calloc(1, sizeof(*bo_gem));
467     if (!bo_gem)
468 	return NULL;
469 
470     memset(&open_arg, 0, sizeof(open_arg));
471     open_arg.name = handle;
472     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
473     if (ret != 0) {
474 	fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
475 	       name, handle, strerror(errno));
476 	free(bo_gem);
477 	return NULL;
478     }
479     bo_gem->bo.size = open_arg.size;
480     bo_gem->bo.offset = 0;
481     bo_gem->bo.virtual = NULL;
482     bo_gem->bo.bufmgr = bufmgr;
483     bo_gem->name = name;
484     bo_gem->refcount = 1;
485     bo_gem->validate_index = -1;
486     bo_gem->gem_handle = open_arg.handle;
487     bo_gem->global_name = handle;
488     bo_gem->reusable = 0;
489 
490     memset(&get_tiling, 0, sizeof(get_tiling));
491     get_tiling.handle = bo_gem->gem_handle;
492     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
493     if (ret != 0) {
494 	drm_intel_gem_bo_unreference(&bo_gem->bo);
495 	return NULL;
496     }
497     bo_gem->tiling_mode = get_tiling.tiling_mode;
498     bo_gem->swizzle_mode = get_tiling.swizzle_mode;
499     if (bo_gem->tiling_mode == I915_TILING_NONE)
500 	bo_gem->reloc_tree_fences = 0;
501     else
502 	bo_gem->reloc_tree_fences = 1;
503 
504     DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
505 
506     return &bo_gem->bo;
507 }
508 
509 static void
drm_intel_gem_bo_reference(drm_intel_bo * bo)510 drm_intel_gem_bo_reference(drm_intel_bo *bo)
511 {
512     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
513     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
514 
515     assert(bo_gem->refcount > 0);
516     pthread_mutex_lock(&bufmgr_gem->lock);
517     bo_gem->refcount++;
518     pthread_mutex_unlock(&bufmgr_gem->lock);
519 }
520 
521 static void
drm_intel_gem_bo_reference_locked(drm_intel_bo * bo)522 drm_intel_gem_bo_reference_locked(drm_intel_bo *bo)
523 {
524     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
525 
526     assert(bo_gem->refcount > 0);
527     bo_gem->refcount++;
528 }
529 
530 static void
drm_intel_gem_bo_free(drm_intel_bo * bo)531 drm_intel_gem_bo_free(drm_intel_bo *bo)
532 {
533     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
534     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
535     struct drm_gem_close close;
536     int ret;
537 
538     if (bo_gem->mem_virtual)
539 	munmap (bo_gem->mem_virtual, bo_gem->bo.size);
540     if (bo_gem->gtt_virtual)
541 	munmap (bo_gem->gtt_virtual, bo_gem->bo.size);
542 
543     /* Close this object */
544     memset(&close, 0, sizeof(close));
545     close.handle = bo_gem->gem_handle;
546     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
547     if (ret != 0) {
548 	fprintf(stderr,
549 		"DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
550 		bo_gem->gem_handle, bo_gem->name, strerror(errno));
551     }
552     free(bo);
553 }
554 
555 static void
drm_intel_gem_bo_unreference_locked(drm_intel_bo * bo)556 drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
557 {
558     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
559     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
560 
561     assert(bo_gem->refcount > 0);
562     if (--bo_gem->refcount == 0) {
563 	struct drm_intel_gem_bo_bucket *bucket;
564 	uint32_t tiling_mode;
565 
566 	if (bo_gem->relocs != NULL) {
567 	    int i;
568 
569 	    /* Unreference all the target buffers */
570 	    for (i = 0; i < bo_gem->reloc_count; i++)
571 		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]);
572 	    free(bo_gem->reloc_target_bo);
573 	    free(bo_gem->relocs);
574 	}
575 
576 	DBG("bo_unreference final: %d (%s)\n",
577 	    bo_gem->gem_handle, bo_gem->name);
578 
579 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
580 	/* Put the buffer into our internal cache for reuse if we can. */
581 	tiling_mode = I915_TILING_NONE;
582 	if (bo_gem->reusable &&
583 	    bucket != NULL &&
584 	    (bucket->max_entries == -1 ||
585 	     (bucket->max_entries > 0 &&
586 	      bucket->num_entries < bucket->max_entries)) &&
587 	    drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0)
588 	{
589 	    bo_gem->name = NULL;
590 	    bo_gem->validate_index = -1;
591 	    bo_gem->relocs = NULL;
592 	    bo_gem->reloc_target_bo = NULL;
593 	    bo_gem->reloc_count = 0;
594 
595 	    DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
596 	    bucket->num_entries++;
597 	} else {
598 	    drm_intel_gem_bo_free(bo);
599 	}
600     }
601 }
602 
603 static void
drm_intel_gem_bo_unreference(drm_intel_bo * bo)604 drm_intel_gem_bo_unreference(drm_intel_bo *bo)
605 {
606     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
607 
608     pthread_mutex_lock(&bufmgr_gem->lock);
609     drm_intel_gem_bo_unreference_locked(bo);
610     pthread_mutex_unlock(&bufmgr_gem->lock);
611 }
612 
613 static int
drm_intel_gem_bo_map(drm_intel_bo * bo,int write_enable)614 drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
615 {
616     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
617     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
618     struct drm_i915_gem_set_domain set_domain;
619     int ret;
620 
621     pthread_mutex_lock(&bufmgr_gem->lock);
622 
623     /* Allow recursive mapping. Mesa may recursively map buffers with
624      * nested display loops.
625      */
626     if (!bo_gem->mem_virtual) {
627 	struct drm_i915_gem_mmap mmap_arg;
628 
629 	DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
630 
631 	memset(&mmap_arg, 0, sizeof(mmap_arg));
632 	mmap_arg.handle = bo_gem->gem_handle;
633 	mmap_arg.offset = 0;
634 	mmap_arg.size = bo->size;
635 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg);
636 	if (ret != 0) {
637 	    fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n",
638 		    __FILE__, __LINE__,
639 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
640 	    pthread_mutex_unlock(&bufmgr_gem->lock);
641 	    return ret;
642 	}
643 	bo_gem->mem_virtual = (void *)(uintptr_t)mmap_arg.addr_ptr;
644 	bo_gem->swrast = 0;
645     }
646     DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
647 	bo_gem->mem_virtual);
648     bo->virtual = bo_gem->mem_virtual;
649 
650     if (bo_gem->global_name != 0 || !bo_gem->swrast) {
651 	set_domain.handle = bo_gem->gem_handle;
652 	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
653 	if (write_enable)
654 	    set_domain.write_domain = I915_GEM_DOMAIN_CPU;
655 	else
656 	    set_domain.write_domain = 0;
657 	do {
658 	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN,
659 			&set_domain);
660 	} while (ret == -1 && errno == EINTR);
661 	if (ret != 0) {
662 	    fprintf (stderr, "%s:%d: Error setting swrast %d: %s\n",
663 		     __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno));
664 	    pthread_mutex_unlock(&bufmgr_gem->lock);
665 	    return ret;
666 	}
667 	bo_gem->swrast = 1;
668     }
669 
670     pthread_mutex_unlock(&bufmgr_gem->lock);
671 
672     return 0;
673 }
674 
675 int
drm_intel_gem_bo_map_gtt(drm_intel_bo * bo)676 drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
677 {
678     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
679     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
680     struct drm_i915_gem_set_domain set_domain;
681     int ret;
682 
683     pthread_mutex_lock(&bufmgr_gem->lock);
684 
685     /* Get a mapping of the buffer if we haven't before. */
686     if (bo_gem->gtt_virtual == NULL) {
687 	struct drm_i915_gem_mmap_gtt mmap_arg;
688 
689 	DBG("bo_map_gtt: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
690 
691 	memset(&mmap_arg, 0, sizeof(mmap_arg));
692 	mmap_arg.handle = bo_gem->gem_handle;
693 
694 	/* Get the fake offset back... */
695 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg);
696 	if (ret != 0) {
697 	    fprintf(stderr,
698 		    "%s:%d: Error preparing buffer map %d (%s): %s .\n",
699 		    __FILE__, __LINE__,
700 		    bo_gem->gem_handle, bo_gem->name,
701 		    strerror(errno));
702 	    pthread_mutex_unlock(&bufmgr_gem->lock);
703 	    return ret;
704 	}
705 
706 	/* and mmap it */
707 	bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
708 				   MAP_SHARED, bufmgr_gem->fd,
709 				   mmap_arg.offset);
710 	if (bo_gem->gtt_virtual == MAP_FAILED) {
711 	    fprintf(stderr,
712 		    "%s:%d: Error mapping buffer %d (%s): %s .\n",
713 		    __FILE__, __LINE__,
714 		    bo_gem->gem_handle, bo_gem->name,
715 		    strerror(errno));
716 	    pthread_mutex_unlock(&bufmgr_gem->lock);
717 	    return errno;
718 	}
719     }
720 
721     bo->virtual = bo_gem->gtt_virtual;
722 
723     DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
724 	bo_gem->gtt_virtual);
725 
726     /* Now move it to the GTT domain so that the CPU caches are flushed */
727     set_domain.handle = bo_gem->gem_handle;
728     set_domain.read_domains = I915_GEM_DOMAIN_GTT;
729     set_domain.write_domain = I915_GEM_DOMAIN_GTT;
730     do {
731 	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN,
732 			&set_domain);
733     } while (ret == -1 && errno == EINTR);
734 
735     if (ret != 0) {
736 	    fprintf (stderr, "%s:%d: Error setting domain %d: %s\n",
737 		     __FILE__, __LINE__, bo_gem->gem_handle, strerror (errno));
738     }
739 
740     pthread_mutex_unlock(&bufmgr_gem->lock);
741 
742     return 0;
743 }
744 
745 int
drm_intel_gem_bo_unmap_gtt(drm_intel_bo * bo)746 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
747 {
748     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
749     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
750     int ret = 0;
751 
752     if (bo == NULL)
753 	return 0;
754 
755     assert(bo_gem->gtt_virtual != NULL);
756 
757     pthread_mutex_lock(&bufmgr_gem->lock);
758     bo->virtual = NULL;
759     pthread_mutex_unlock(&bufmgr_gem->lock);
760 
761     return ret;
762 }
763 
764 static int
drm_intel_gem_bo_unmap(drm_intel_bo * bo)765 drm_intel_gem_bo_unmap(drm_intel_bo *bo)
766 {
767     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
768     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
769     struct drm_i915_gem_sw_finish sw_finish;
770     int ret;
771 
772     if (bo == NULL)
773 	return 0;
774 
775     assert(bo_gem->mem_virtual != NULL);
776 
777     pthread_mutex_lock(&bufmgr_gem->lock);
778     if (bo_gem->swrast) {
779 	sw_finish.handle = bo_gem->gem_handle;
780 	do {
781 	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SW_FINISH,
782 			&sw_finish);
783 	} while (ret == -1 && errno == EINTR);
784 	bo_gem->swrast = 0;
785     }
786     bo->virtual = NULL;
787     pthread_mutex_unlock(&bufmgr_gem->lock);
788     return 0;
789 }
790 
791 static int
drm_intel_gem_bo_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,const void * data)792 drm_intel_gem_bo_subdata (drm_intel_bo *bo, unsigned long offset,
793 			  unsigned long size, const void *data)
794 {
795     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
796     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
797     struct drm_i915_gem_pwrite pwrite;
798     int ret;
799 
800     memset (&pwrite, 0, sizeof (pwrite));
801     pwrite.handle = bo_gem->gem_handle;
802     pwrite.offset = offset;
803     pwrite.size = size;
804     pwrite.data_ptr = (uint64_t) (uintptr_t) data;
805     do {
806 	ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
807     } while (ret == -1 && errno == EINTR);
808     if (ret != 0) {
809 	fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
810 		 __FILE__, __LINE__,
811 		 bo_gem->gem_handle, (int) offset, (int) size,
812 		 strerror (errno));
813     }
814     return 0;
815 }
816 
817 static int
drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr * bufmgr,int crtc_id)818 drm_intel_gem_get_pipe_from_crtc_id (drm_intel_bufmgr *bufmgr, int crtc_id)
819 {
820     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
821     struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
822     int ret;
823 
824     get_pipe_from_crtc_id.crtc_id = crtc_id;
825     ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
826 		 &get_pipe_from_crtc_id);
827     if (ret != 0) {
828 	/* We return -1 here to signal that we don't
829 	 * know which pipe is associated with this crtc.
830 	 * This lets the caller know that this information
831 	 * isn't available; using the wrong pipe for
832 	 * vblank waiting can cause the chipset to lock up
833 	 */
834 	return -1;
835     }
836 
837     return get_pipe_from_crtc_id.pipe;
838 }
839 
840 static int
drm_intel_gem_bo_get_subdata(drm_intel_bo * bo,unsigned long offset,unsigned long size,void * data)841 drm_intel_gem_bo_get_subdata (drm_intel_bo *bo, unsigned long offset,
842 			      unsigned long size, void *data)
843 {
844     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
845     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
846     struct drm_i915_gem_pread pread;
847     int ret;
848 
849     memset (&pread, 0, sizeof (pread));
850     pread.handle = bo_gem->gem_handle;
851     pread.offset = offset;
852     pread.size = size;
853     pread.data_ptr = (uint64_t) (uintptr_t) data;
854     do {
855 	ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
856     } while (ret == -1 && errno == EINTR);
857     if (ret != 0) {
858 	fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
859 		 __FILE__, __LINE__,
860 		 bo_gem->gem_handle, (int) offset, (int) size,
861 		 strerror (errno));
862     }
863     return 0;
864 }
865 
866 /** Waits for all GPU rendering to the object to have completed. */
867 static void
drm_intel_gem_bo_wait_rendering(drm_intel_bo * bo)868 drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
869 {
870     return drm_intel_gem_bo_start_gtt_access(bo, 0);
871 }
872 
873 /**
874  * Sets the object to the GTT read and possibly write domain, used by the X
875  * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
876  *
877  * In combination with drm_intel_gem_bo_pin() and manual fence management, we
878  * can do tiled pixmaps this way.
879  */
880 void
drm_intel_gem_bo_start_gtt_access(drm_intel_bo * bo,int write_enable)881 drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
882 {
883     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
884     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
885     struct drm_i915_gem_set_domain set_domain;
886     int ret;
887 
888     set_domain.handle = bo_gem->gem_handle;
889     set_domain.read_domains = I915_GEM_DOMAIN_GTT;
890     set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
891     do {
892 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
893     } while (ret == -1 && errno == EINTR);
894     if (ret != 0) {
895 	fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
896 		 __FILE__, __LINE__,
897 		 bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain,
898 		 strerror (errno));
899     }
900 }
901 
902 static void
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr * bufmgr)903 drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
904 {
905     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
906     int i;
907 
908     free(bufmgr_gem->exec_objects);
909     free(bufmgr_gem->exec_bos);
910 
911     pthread_mutex_destroy(&bufmgr_gem->lock);
912 
913     /* Free any cached buffer objects we were going to reuse */
914     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
915 	struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
916 	drm_intel_bo_gem *bo_gem;
917 
918 	while (!DRMLISTEMPTY(&bucket->head)) {
919 	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
920 	    DRMLISTDEL(&bo_gem->head);
921 	    bucket->num_entries--;
922 
923 	    drm_intel_gem_bo_free(&bo_gem->bo);
924 	}
925     }
926 
927     free(bufmgr);
928 }
929 
930 /**
931  * Adds the target buffer to the validation list and adds the relocation
932  * to the reloc_buffer's relocation list.
933  *
934  * The relocation entry at the given offset must already contain the
935  * precomputed relocation value, because the kernel will optimize out
936  * the relocation entry write when the buffer hasn't moved from the
937  * last known offset in target_bo.
938  */
939 static int
drm_intel_gem_bo_emit_reloc(drm_intel_bo * bo,uint32_t offset,drm_intel_bo * target_bo,uint32_t target_offset,uint32_t read_domains,uint32_t write_domain)940 drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
941 			    drm_intel_bo *target_bo, uint32_t target_offset,
942 			    uint32_t read_domains, uint32_t write_domain)
943 {
944     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
945     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
946     drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
947 
948     pthread_mutex_lock(&bufmgr_gem->lock);
949 
950     /* Create a new relocation list if needed */
951     if (bo_gem->relocs == NULL)
952 	drm_intel_setup_reloc_list(bo);
953 
954     /* Check overflow */
955     assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
956 
957     /* Check args */
958     assert (offset <= bo->size - 4);
959     assert ((write_domain & (write_domain-1)) == 0);
960 
961     /* Make sure that we're not adding a reloc to something whose size has
962      * already been accounted for.
963      */
964     assert(!bo_gem->used_as_reloc_target);
965     bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
966     bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
967 
968     /* Flag the target to disallow further relocations in it. */
969     target_bo_gem->used_as_reloc_target = 1;
970 
971     bo_gem->relocs[bo_gem->reloc_count].offset = offset;
972     bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
973     bo_gem->relocs[bo_gem->reloc_count].target_handle =
974 	target_bo_gem->gem_handle;
975     bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
976     bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
977     bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
978 
979     bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
980     drm_intel_gem_bo_reference_locked(target_bo);
981 
982     bo_gem->reloc_count++;
983 
984     pthread_mutex_unlock(&bufmgr_gem->lock);
985 
986     return 0;
987 }
988 
989 /**
990  * Walk the tree of relocations rooted at BO and accumulate the list of
991  * validations to be performed and update the relocation buffers with
992  * index values into the validation list.
993  */
994 static void
drm_intel_gem_bo_process_reloc(drm_intel_bo * bo)995 drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
996 {
997     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
998     int i;
999 
1000     if (bo_gem->relocs == NULL)
1001 	return;
1002 
1003     for (i = 0; i < bo_gem->reloc_count; i++) {
1004 	drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i];
1005 
1006 	/* Continue walking the tree depth-first. */
1007 	drm_intel_gem_bo_process_reloc(target_bo);
1008 
1009 	/* Add the target to the validate list */
1010 	drm_intel_add_validate_buffer(target_bo);
1011     }
1012 }
1013 
1014 static void
drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem * bufmgr_gem)1015 drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
1016 {
1017     int i;
1018 
1019     for (i = 0; i < bufmgr_gem->exec_count; i++) {
1020 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1021 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1022 
1023 	/* Update the buffer offset */
1024 	if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
1025 	    DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1026 		bo_gem->gem_handle, bo_gem->name, bo->offset,
1027 		(unsigned long long)bufmgr_gem->exec_objects[i].offset);
1028 	    bo->offset = bufmgr_gem->exec_objects[i].offset;
1029 	}
1030     }
1031 }
1032 
1033 static int
drm_intel_gem_bo_exec(drm_intel_bo * bo,int used,drm_clip_rect_t * cliprects,int num_cliprects,int DR4)1034 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
1035 		      drm_clip_rect_t *cliprects, int num_cliprects,
1036 		      int DR4)
1037 {
1038     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1039     struct drm_i915_gem_execbuffer execbuf;
1040     int ret, i;
1041 
1042     pthread_mutex_lock(&bufmgr_gem->lock);
1043     /* Update indices and set up the validate list. */
1044     drm_intel_gem_bo_process_reloc(bo);
1045 
1046     /* Add the batch buffer to the validation list.  There are no relocations
1047      * pointing to it.
1048      */
1049     drm_intel_add_validate_buffer(bo);
1050 
1051     execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects;
1052     execbuf.buffer_count = bufmgr_gem->exec_count;
1053     execbuf.batch_start_offset = 0;
1054     execbuf.batch_len = used;
1055     execbuf.cliprects_ptr = (uintptr_t)cliprects;
1056     execbuf.num_cliprects = num_cliprects;
1057     execbuf.DR1 = 0;
1058     execbuf.DR4 = DR4;
1059 
1060     do {
1061 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER, &execbuf);
1062     } while (ret != 0 && errno == EAGAIN);
1063 
1064     if (ret != 0 && errno == ENOMEM) {
1065 	fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n",
1066 		drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1067 						   bufmgr_gem->exec_count),
1068 		drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1069 						  bufmgr_gem->exec_count),
1070 		(unsigned int) bufmgr_gem->gtt_size);
1071     }
1072     drm_intel_update_buffer_offsets (bufmgr_gem);
1073 
1074     if (bufmgr_gem->bufmgr.debug)
1075 	drm_intel_gem_dump_validation_list(bufmgr_gem);
1076 
1077     for (i = 0; i < bufmgr_gem->exec_count; i++) {
1078 	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1079 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1080 
1081 	/* Need to call swrast on next bo_map */
1082 	bo_gem->swrast = 0;
1083 
1084 	/* Disconnect the buffer from the validate list */
1085 	bo_gem->validate_index = -1;
1086 	drm_intel_gem_bo_unreference_locked(bo);
1087 	bufmgr_gem->exec_bos[i] = NULL;
1088     }
1089     bufmgr_gem->exec_count = 0;
1090     pthread_mutex_unlock(&bufmgr_gem->lock);
1091 
1092     return 0;
1093 }
1094 
1095 static int
drm_intel_gem_bo_pin(drm_intel_bo * bo,uint32_t alignment)1096 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
1097 {
1098     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1099     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1100     struct drm_i915_gem_pin pin;
1101     int ret;
1102 
1103     memset(&pin, 0, sizeof(pin));
1104     pin.handle = bo_gem->gem_handle;
1105     pin.alignment = alignment;
1106 
1107     do {
1108 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_PIN, &pin);
1109     } while (ret == -1 && errno == EINTR);
1110 
1111     if (ret != 0)
1112 	return -errno;
1113 
1114     bo->offset = pin.offset;
1115     return 0;
1116 }
1117 
1118 static int
drm_intel_gem_bo_unpin(drm_intel_bo * bo)1119 drm_intel_gem_bo_unpin(drm_intel_bo *bo)
1120 {
1121     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1122     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1123     struct drm_i915_gem_unpin unpin;
1124     int ret;
1125 
1126     memset(&unpin, 0, sizeof(unpin));
1127     unpin.handle = bo_gem->gem_handle;
1128 
1129     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
1130     if (ret != 0)
1131 	return -errno;
1132 
1133     return 0;
1134 }
1135 
1136 static int
drm_intel_gem_bo_set_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t stride)1137 drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
1138 			    uint32_t stride)
1139 {
1140     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1141     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1142     struct drm_i915_gem_set_tiling set_tiling;
1143     int ret;
1144 
1145     if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
1146 	return 0;
1147 
1148     /* If we're going from non-tiling to tiling, bump fence count */
1149     if (bo_gem->tiling_mode == I915_TILING_NONE)
1150 	bo_gem->reloc_tree_fences++;
1151 
1152     memset(&set_tiling, 0, sizeof(set_tiling));
1153     set_tiling.handle = bo_gem->gem_handle;
1154     set_tiling.tiling_mode = *tiling_mode;
1155     set_tiling.stride = stride;
1156 
1157     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
1158     if (ret != 0) {
1159 	*tiling_mode = bo_gem->tiling_mode;
1160 	return -errno;
1161     }
1162     bo_gem->tiling_mode = set_tiling.tiling_mode;
1163     bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1164 
1165     /* If we're going from tiling to non-tiling, drop fence count */
1166     if (bo_gem->tiling_mode == I915_TILING_NONE)
1167 	bo_gem->reloc_tree_fences--;
1168 
1169     *tiling_mode = bo_gem->tiling_mode;
1170     return 0;
1171 }
1172 
1173 static int
drm_intel_gem_bo_get_tiling(drm_intel_bo * bo,uint32_t * tiling_mode,uint32_t * swizzle_mode)1174 drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
1175 			    uint32_t *swizzle_mode)
1176 {
1177     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1178 
1179     *tiling_mode = bo_gem->tiling_mode;
1180     *swizzle_mode = bo_gem->swizzle_mode;
1181     return 0;
1182 }
1183 
1184 static int
drm_intel_gem_bo_flink(drm_intel_bo * bo,uint32_t * name)1185 drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t *name)
1186 {
1187     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1188     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1189     struct drm_gem_flink flink;
1190     int ret;
1191 
1192     if (!bo_gem->global_name) {
1193 	memset(&flink, 0, sizeof(flink));
1194 	flink.handle = bo_gem->gem_handle;
1195 
1196 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
1197 	if (ret != 0)
1198 	    return -errno;
1199 	bo_gem->global_name = flink.name;
1200 	bo_gem->reusable = 0;
1201     }
1202 
1203     *name = bo_gem->global_name;
1204     return 0;
1205 }
1206 
1207 /**
1208  * Enables unlimited caching of buffer objects for reuse.
1209  *
1210  * This is potentially very memory expensive, as the cache at each bucket
1211  * size is only bounded by how many buffers of that size we've managed to have
1212  * in flight at once.
1213  */
1214 void
drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr * bufmgr)1215 drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
1216 {
1217     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
1218     int i;
1219 
1220     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
1221 	bufmgr_gem->cache_bucket[i].max_entries = -1;
1222     }
1223 }
1224 
1225 /**
1226  * Return the additional aperture space required by the tree of buffer objects
1227  * rooted at bo.
1228  */
1229 static int
drm_intel_gem_bo_get_aperture_space(drm_intel_bo * bo)1230 drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
1231 {
1232     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1233     int i;
1234     int total = 0;
1235 
1236     if (bo == NULL || bo_gem->included_in_check_aperture)
1237 	return 0;
1238 
1239     total += bo->size;
1240     bo_gem->included_in_check_aperture = 1;
1241 
1242     for (i = 0; i < bo_gem->reloc_count; i++)
1243 	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]);
1244 
1245     return total;
1246 }
1247 
1248 /**
1249  * Count the number of buffers in this list that need a fence reg
1250  *
1251  * If the count is greater than the number of available regs, we'll have
1252  * to ask the caller to resubmit a batch with fewer tiled buffers.
1253  *
1254  * This function over-counts if the same buffer is used multiple times.
1255  */
1256 static unsigned int
drm_intel_gem_total_fences(drm_intel_bo ** bo_array,int count)1257 drm_intel_gem_total_fences(drm_intel_bo **bo_array, int count)
1258 {
1259     int i;
1260     unsigned int total = 0;
1261 
1262     for (i = 0; i < count; i++) {
1263 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1264 
1265 	if (bo_gem == NULL)
1266 	    continue;
1267 
1268 	total += bo_gem->reloc_tree_fences;
1269     }
1270     return total;
1271 }
1272 
1273 /**
1274  * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
1275  * for the next drm_intel_bufmgr_check_aperture_space() call.
1276  */
1277 static void
drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo * bo)1278 drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
1279 {
1280     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1281     int i;
1282 
1283     if (bo == NULL || !bo_gem->included_in_check_aperture)
1284 	return;
1285 
1286     bo_gem->included_in_check_aperture = 0;
1287 
1288     for (i = 0; i < bo_gem->reloc_count; i++)
1289 	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]);
1290 }
1291 
1292 /**
1293  * Return a conservative estimate for the amount of aperture required
1294  * for a collection of buffers. This may double-count some buffers.
1295  */
1296 static unsigned int
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array,int count)1297 drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
1298 {
1299     int i;
1300     unsigned int total = 0;
1301 
1302     for (i = 0; i < count; i++) {
1303 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1304 	if (bo_gem != NULL)
1305 		total += bo_gem->reloc_tree_size;
1306     }
1307     return total;
1308 }
1309 
1310 /**
1311  * Return the amount of aperture needed for a collection of buffers.
1312  * This avoids double counting any buffers, at the cost of looking
1313  * at every buffer in the set.
1314  */
1315 static unsigned int
drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array,int count)1316 drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
1317 {
1318     int i;
1319     unsigned int total = 0;
1320 
1321     for (i = 0; i < count; i++) {
1322 	total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
1323 	/* For the first buffer object in the array, we get an accurate count
1324 	 * back for its reloc_tree size (since nothing had been flagged as
1325 	 * being counted yet).  We can save that value out as a more
1326 	 * conservative reloc_tree_size that avoids double-counting target
1327 	 * buffers.  Since the first buffer happens to usually be the batch
1328 	 * buffer in our callers, this can pull us back from doing the tree
1329 	 * walk on every new batch emit.
1330 	 */
1331 	if (i == 0) {
1332 	    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i];
1333 	    bo_gem->reloc_tree_size = total;
1334 	}
1335     }
1336 
1337     for (i = 0; i < count; i++)
1338 	drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
1339     return total;
1340 }
1341 
1342 /**
1343  * Return -1 if the batchbuffer should be flushed before attempting to
1344  * emit rendering referencing the buffers pointed to by bo_array.
1345  *
1346  * This is required because if we try to emit a batchbuffer with relocations
1347  * to a tree of buffers that won't simultaneously fit in the aperture,
1348  * the rendering will return an error at a point where the software is not
1349  * prepared to recover from it.
1350  *
1351  * However, we also want to emit the batchbuffer significantly before we reach
1352  * the limit, as a series of batchbuffers each of which references buffers
1353  * covering almost all of the aperture means that at each emit we end up
1354  * waiting to evict a buffer from the last rendering, and we get synchronous
1355  * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
1356  * get better parallelism.
1357  */
1358 static int
drm_intel_gem_check_aperture_space(drm_intel_bo ** bo_array,int count)1359 drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
1360 {
1361     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo_array[0]->bufmgr;
1362     unsigned int total = 0;
1363     unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
1364     int total_fences;
1365 
1366     /* Check for fence reg constraints if necessary */
1367     if (bufmgr_gem->available_fences) {
1368 	total_fences = drm_intel_gem_total_fences(bo_array, count);
1369 	if (total_fences > bufmgr_gem->available_fences)
1370 	    return -1;
1371     }
1372 
1373     total = drm_intel_gem_estimate_batch_space(bo_array, count);
1374 
1375     if (total > threshold)
1376 	total = drm_intel_gem_compute_batch_space(bo_array, count);
1377 
1378     if (total > threshold) {
1379 	DBG("check_space: overflowed available aperture, %dkb vs %dkb\n",
1380 	    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
1381 	return -1;
1382     } else {
1383 	DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024 ,
1384 	    (int)bufmgr_gem->gtt_size / 1024);
1385 	return 0;
1386     }
1387 }
1388 
1389 /*
1390  * Disable buffer reuse for objects which are shared with the kernel
1391  * as scanout buffers
1392  */
1393 static int
drm_intel_gem_bo_disable_reuse(drm_intel_bo * bo)1394 drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
1395 {
1396     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1397 
1398     bo_gem->reusable = 0;
1399     return 0;
1400 }
1401 
1402 /**
1403  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
1404  * and manage map buffer objections.
1405  *
1406  * \param fd File descriptor of the opened DRM device.
1407  */
1408 drm_intel_bufmgr *
drm_intel_bufmgr_gem_init(int fd,int batch_size)1409 drm_intel_bufmgr_gem_init(int fd, int batch_size)
1410 {
1411     drm_intel_bufmgr_gem *bufmgr_gem;
1412     struct drm_i915_gem_get_aperture aperture;
1413     drm_i915_getparam_t gp;
1414     int ret, i;
1415 
1416     bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
1417     bufmgr_gem->fd = fd;
1418 
1419     if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
1420       free(bufmgr_gem);
1421       return NULL;
1422    }
1423 
1424     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
1425 
1426     if (ret == 0)
1427 	bufmgr_gem->gtt_size = aperture.aper_available_size;
1428     else {
1429 	fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
1430 		strerror(errno));
1431 	bufmgr_gem->gtt_size = 128 * 1024 * 1024;
1432 	fprintf(stderr, "Assuming %dkB available aperture size.\n"
1433 		"May lead to reduced performance or incorrect rendering.\n",
1434 		(int)bufmgr_gem->gtt_size / 1024);
1435     }
1436 
1437     gp.param = I915_PARAM_CHIPSET_ID;
1438     gp.value = &bufmgr_gem->pci_device;
1439     ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
1440     if (ret) {
1441 	fprintf(stderr, "get chip id failed: %d\n", ret);
1442 	fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
1443     }
1444 
1445     if (!IS_I965G(bufmgr_gem)) {
1446 	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
1447 	gp.value = &bufmgr_gem->available_fences;
1448 	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
1449 	if (ret) {
1450 	    fprintf(stderr, "get fences failed: %d\n", ret);
1451 	    fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
1452 	    bufmgr_gem->available_fences = 0;
1453 	}
1454     }
1455 
1456     /* Let's go with one relocation per every 2 dwords (but round down a bit
1457      * since a power of two will mean an extra page allocation for the reloc
1458      * buffer).
1459      *
1460      * Every 4 was too few for the blender benchmark.
1461      */
1462     bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
1463 
1464     bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
1465     bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
1466     bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
1467     bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
1468     bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
1469     bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
1470     bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
1471     bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
1472     bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
1473     bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
1474     bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
1475     bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
1476     bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
1477     bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
1478     bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
1479     bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
1480     bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
1481     bufmgr_gem->bufmgr.debug = 0;
1482     bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
1483     bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
1484     bufmgr_gem->bufmgr.get_pipe_from_crtc_id = drm_intel_gem_get_pipe_from_crtc_id;
1485     /* Initialize the linked lists for BO reuse cache. */
1486     for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++)
1487 	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
1488 
1489     return &bufmgr_gem->bufmgr;
1490 }
1491 
1492