1 /*
2 * Copyright 2009 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 *
25 */
26 /*
27 * Authors:
28 * Jerome Glisse <glisse@freedesktop.org>
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
30 * Dave Airlie
31 */
32
33 #include <linux/io.h>
34 #include <linux/list.h>
35 #include <linux/slab.h>
36
37 #include <drm/drm_cache.h>
38 #include <drm/drm_prime.h>
39 #include <drm/radeon_drm.h>
40
41 #include "radeon.h"
42 #include "radeon_trace.h"
43 #include "radeon_ttm.h"
44
45 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
46
47 /*
48 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
49 * function are calling it.
50 */
51
radeon_ttm_bo_destroy(struct ttm_buffer_object * tbo)52 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
53 {
54 struct radeon_bo *bo;
55
56 bo = container_of(tbo, struct radeon_bo, tbo);
57
58 mutex_lock(&bo->rdev->gem.mutex);
59 list_del_init(&bo->list);
60 mutex_unlock(&bo->rdev->gem.mutex);
61 radeon_bo_clear_surface_reg(bo);
62 WARN_ON_ONCE(!list_empty(&bo->va));
63 if (bo->tbo.base.import_attach)
64 drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
65 drm_gem_object_release(&bo->tbo.base);
66 pool_put(&bo->rdev->ddev->objpl, bo);
67 }
68
radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object * bo)69 bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
70 {
71 if (bo->destroy == &radeon_ttm_bo_destroy)
72 return true;
73 return false;
74 }
75
radeon_ttm_placement_from_domain(struct radeon_bo * rbo,u32 domain)76 void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
77 {
78 u32 c = 0, i;
79
80 rbo->placement.placement = rbo->placements;
81 rbo->placement.busy_placement = rbo->placements;
82 if (domain & RADEON_GEM_DOMAIN_VRAM) {
83 /* Try placing BOs which don't need CPU access outside of the
84 * CPU accessible part of VRAM
85 */
86 if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
87 rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
88 rbo->placements[c].fpfn =
89 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
90 rbo->placements[c].mem_type = TTM_PL_VRAM;
91 rbo->placements[c++].flags = 0;
92 }
93
94 rbo->placements[c].fpfn = 0;
95 rbo->placements[c].mem_type = TTM_PL_VRAM;
96 rbo->placements[c++].flags = 0;
97 }
98
99 if (domain & RADEON_GEM_DOMAIN_GTT) {
100 rbo->placements[c].fpfn = 0;
101 rbo->placements[c].mem_type = TTM_PL_TT;
102 rbo->placements[c++].flags = 0;
103 }
104
105 if (domain & RADEON_GEM_DOMAIN_CPU) {
106 rbo->placements[c].fpfn = 0;
107 rbo->placements[c].mem_type = TTM_PL_SYSTEM;
108 rbo->placements[c++].flags = 0;
109 }
110 if (!c) {
111 rbo->placements[c].fpfn = 0;
112 rbo->placements[c].mem_type = TTM_PL_SYSTEM;
113 rbo->placements[c++].flags = 0;
114 }
115
116 rbo->placement.num_placement = c;
117 rbo->placement.num_busy_placement = c;
118
119 for (i = 0; i < c; ++i) {
120 if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
121 (rbo->placements[i].mem_type == TTM_PL_VRAM) &&
122 !rbo->placements[i].fpfn)
123 rbo->placements[i].lpfn =
124 rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
125 else
126 rbo->placements[i].lpfn = 0;
127 }
128 }
129
radeon_bo_create(struct radeon_device * rdev,unsigned long size,int byte_align,bool kernel,u32 domain,u32 flags,struct sg_table * sg,struct dma_resv * resv,struct radeon_bo ** bo_ptr)130 int radeon_bo_create(struct radeon_device *rdev,
131 unsigned long size, int byte_align, bool kernel,
132 u32 domain, u32 flags, struct sg_table *sg,
133 struct dma_resv *resv,
134 struct radeon_bo **bo_ptr)
135 {
136 struct radeon_bo *bo;
137 enum ttm_bo_type type;
138 unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
139 int r;
140
141 size = ALIGN(size, PAGE_SIZE);
142
143 if (kernel) {
144 type = ttm_bo_type_kernel;
145 } else if (sg) {
146 type = ttm_bo_type_sg;
147 } else {
148 type = ttm_bo_type_device;
149 }
150 *bo_ptr = NULL;
151
152 bo = pool_get(&rdev->ddev->objpl, PR_WAITOK | PR_ZERO);
153 if (bo == NULL)
154 return -ENOMEM;
155 drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size);
156 bo->rdev = rdev;
157 bo->surface_reg = -1;
158 INIT_LIST_HEAD(&bo->list);
159 INIT_LIST_HEAD(&bo->va);
160 bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
161 RADEON_GEM_DOMAIN_GTT |
162 RADEON_GEM_DOMAIN_CPU);
163
164 bo->flags = flags;
165 /* PCI GART is always snooped */
166 if (!(rdev->flags & RADEON_IS_PCIE))
167 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
168
169 /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
170 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
171 */
172 if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
173 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
174
175 #ifdef CONFIG_X86_32
176 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
177 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
178 */
179 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
180 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
181 /* Don't try to enable write-combining when it can't work, or things
182 * may be slow
183 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
184 */
185 #ifndef CONFIG_COMPILE_TEST
186 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
187 thanks to write-combining
188 #endif
189
190 if (bo->flags & RADEON_GEM_GTT_WC)
191 DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
192 "better performance thanks to write-combining\n");
193 bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
194 #else
195 /* For architectures that don't support WC memory,
196 * mask out the WC flag from the BO
197 */
198 if (!drm_arch_can_wc_memory())
199 bo->flags &= ~RADEON_GEM_GTT_WC;
200 #endif
201
202 radeon_ttm_placement_from_domain(bo, domain);
203 /* Kernel allocation are uninterruptible */
204 down_read(&rdev->pm.mclk_lock);
205 r = ttm_bo_init_validate(&rdev->mman.bdev, &bo->tbo, type,
206 &bo->placement, page_align, !kernel, sg, resv,
207 &radeon_ttm_bo_destroy);
208 up_read(&rdev->pm.mclk_lock);
209 if (unlikely(r != 0)) {
210 return r;
211 }
212 *bo_ptr = bo;
213
214 trace_radeon_bo_create(bo);
215
216 return 0;
217 }
218
radeon_bo_kmap(struct radeon_bo * bo,void ** ptr)219 int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
220 {
221 bool is_iomem;
222 long r;
223
224 r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
225 false, MAX_SCHEDULE_TIMEOUT);
226 if (r < 0)
227 return r;
228
229 if (bo->kptr) {
230 if (ptr) {
231 *ptr = bo->kptr;
232 }
233 return 0;
234 }
235 r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);
236 if (r) {
237 return r;
238 }
239 bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
240 if (ptr) {
241 *ptr = bo->kptr;
242 }
243 radeon_bo_check_tiling(bo, 0, 0);
244 return 0;
245 }
246
radeon_bo_kunmap(struct radeon_bo * bo)247 void radeon_bo_kunmap(struct radeon_bo *bo)
248 {
249 if (bo->kptr == NULL)
250 return;
251 bo->kptr = NULL;
252 radeon_bo_check_tiling(bo, 0, 0);
253 ttm_bo_kunmap(&bo->kmap);
254 }
255
radeon_bo_ref(struct radeon_bo * bo)256 struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
257 {
258 if (bo == NULL)
259 return NULL;
260
261 ttm_bo_get(&bo->tbo);
262 return bo;
263 }
264
radeon_bo_unref(struct radeon_bo ** bo)265 void radeon_bo_unref(struct radeon_bo **bo)
266 {
267 struct ttm_buffer_object *tbo;
268
269 if ((*bo) == NULL)
270 return;
271 tbo = &((*bo)->tbo);
272 ttm_bo_put(tbo);
273 *bo = NULL;
274 }
275
radeon_bo_pin_restricted(struct radeon_bo * bo,u32 domain,u64 max_offset,u64 * gpu_addr)276 int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
277 u64 *gpu_addr)
278 {
279 struct ttm_operation_ctx ctx = { false, false };
280 int r, i;
281
282 if (radeon_ttm_tt_has_userptr(bo->rdev, bo->tbo.ttm))
283 return -EPERM;
284
285 if (bo->tbo.pin_count) {
286 ttm_bo_pin(&bo->tbo);
287 if (gpu_addr)
288 *gpu_addr = radeon_bo_gpu_offset(bo);
289
290 if (max_offset != 0) {
291 u64 domain_start;
292
293 if (domain == RADEON_GEM_DOMAIN_VRAM)
294 domain_start = bo->rdev->mc.vram_start;
295 else
296 domain_start = bo->rdev->mc.gtt_start;
297 WARN_ON_ONCE(max_offset <
298 (radeon_bo_gpu_offset(bo) - domain_start));
299 }
300
301 return 0;
302 }
303 if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) {
304 /* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */
305 return -EINVAL;
306 }
307
308 radeon_ttm_placement_from_domain(bo, domain);
309 for (i = 0; i < bo->placement.num_placement; i++) {
310 /* force to pin into visible video ram */
311 if ((bo->placements[i].mem_type == TTM_PL_VRAM) &&
312 !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
313 (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
314 bo->placements[i].lpfn =
315 bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
316 else
317 bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
318 }
319
320 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
321 if (likely(r == 0)) {
322 ttm_bo_pin(&bo->tbo);
323 if (gpu_addr != NULL)
324 *gpu_addr = radeon_bo_gpu_offset(bo);
325 if (domain == RADEON_GEM_DOMAIN_VRAM)
326 bo->rdev->vram_pin_size += radeon_bo_size(bo);
327 else
328 bo->rdev->gart_pin_size += radeon_bo_size(bo);
329 } else {
330 dev_err(bo->rdev->dev, "%p pin failed\n", bo);
331 }
332 return r;
333 }
334
radeon_bo_pin(struct radeon_bo * bo,u32 domain,u64 * gpu_addr)335 int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
336 {
337 return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
338 }
339
radeon_bo_unpin(struct radeon_bo * bo)340 void radeon_bo_unpin(struct radeon_bo *bo)
341 {
342 ttm_bo_unpin(&bo->tbo);
343 if (!bo->tbo.pin_count) {
344 if (bo->tbo.resource->mem_type == TTM_PL_VRAM)
345 bo->rdev->vram_pin_size -= radeon_bo_size(bo);
346 else
347 bo->rdev->gart_pin_size -= radeon_bo_size(bo);
348 }
349 }
350
radeon_bo_evict_vram(struct radeon_device * rdev)351 int radeon_bo_evict_vram(struct radeon_device *rdev)
352 {
353 struct ttm_device *bdev = &rdev->mman.bdev;
354 struct ttm_resource_manager *man;
355
356 /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
357 #ifndef CONFIG_HIBERNATION
358 if (rdev->flags & RADEON_IS_IGP) {
359 if (rdev->mc.igp_sideport_enabled == false)
360 /* Useless to evict on IGP chips */
361 return 0;
362 }
363 #endif
364 man = ttm_manager_type(bdev, TTM_PL_VRAM);
365 if (!man)
366 return 0;
367 return ttm_resource_manager_evict_all(bdev, man);
368 }
369
radeon_bo_force_delete(struct radeon_device * rdev)370 void radeon_bo_force_delete(struct radeon_device *rdev)
371 {
372 struct radeon_bo *bo, *n;
373
374 if (list_empty(&rdev->gem.objects)) {
375 return;
376 }
377 dev_err(rdev->dev, "Userspace still has active objects !\n");
378 list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
379 dev_err(rdev->dev, "%p %p %lu %lu force free\n",
380 &bo->tbo.base, bo, (unsigned long)bo->tbo.base.size,
381 *((unsigned long *)&bo->tbo.base.refcount));
382 mutex_lock(&bo->rdev->gem.mutex);
383 list_del_init(&bo->list);
384 mutex_unlock(&bo->rdev->gem.mutex);
385 /* this should unref the ttm bo */
386 drm_gem_object_put(&bo->tbo.base);
387 }
388 }
389
radeon_bo_init(struct radeon_device * rdev)390 int radeon_bo_init(struct radeon_device *rdev)
391 {
392 paddr_t start, end;
393
394 #ifdef __linux__
395 /* reserve PAT memory space to WC for VRAM */
396 arch_io_reserve_memtype_wc(rdev->mc.aper_base,
397 rdev->mc.aper_size);
398 #endif
399
400 /* Add an MTRR for the VRAM */
401 if (!rdev->fastfb_working) {
402 #ifdef __linux__
403 rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
404 rdev->mc.aper_size);
405 #else
406 drm_mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC);
407 /* fake a 'cookie', seems to be unused? */
408 rdev->mc.vram_mtrr = 1;
409 #endif
410 }
411
412 start = atop(bus_space_mmap(rdev->memt, rdev->mc.aper_base, 0, 0, 0));
413 end = start + atop(rdev->mc.aper_size);
414 uvm_page_physload(start, end, start, end, PHYSLOAD_DEVICE);
415
416 DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
417 rdev->mc.mc_vram_size >> 20,
418 (unsigned long long)rdev->mc.aper_size >> 20);
419 DRM_INFO("RAM width %dbits %cDR\n",
420 rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
421 return radeon_ttm_init(rdev);
422 }
423
radeon_bo_fini(struct radeon_device * rdev)424 void radeon_bo_fini(struct radeon_device *rdev)
425 {
426 radeon_ttm_fini(rdev);
427 #ifdef __linux__
428 arch_phys_wc_del(rdev->mc.vram_mtrr);
429 arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
430 #else
431 drm_mtrr_del(0, rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC);
432 #endif
433 }
434
435 /* Returns how many bytes TTM can move per IB.
436 */
radeon_bo_get_threshold_for_moves(struct radeon_device * rdev)437 static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
438 {
439 u64 real_vram_size = rdev->mc.real_vram_size;
440 struct ttm_resource_manager *man =
441 ttm_manager_type(&rdev->mman.bdev, TTM_PL_VRAM);
442 u64 vram_usage = ttm_resource_manager_usage(man);
443
444 /* This function is based on the current VRAM usage.
445 *
446 * - If all of VRAM is free, allow relocating the number of bytes that
447 * is equal to 1/4 of the size of VRAM for this IB.
448
449 * - If more than one half of VRAM is occupied, only allow relocating
450 * 1 MB of data for this IB.
451 *
452 * - From 0 to one half of used VRAM, the threshold decreases
453 * linearly.
454 * __________________
455 * 1/4 of -|\ |
456 * VRAM | \ |
457 * | \ |
458 * | \ |
459 * | \ |
460 * | \ |
461 * | \ |
462 * | \________|1 MB
463 * |----------------|
464 * VRAM 0 % 100 %
465 * used used
466 *
467 * Note: It's a threshold, not a limit. The threshold must be crossed
468 * for buffer relocations to stop, so any buffer of an arbitrary size
469 * can be moved as long as the threshold isn't crossed before
470 * the relocation takes place. We don't want to disable buffer
471 * relocations completely.
472 *
473 * The idea is that buffers should be placed in VRAM at creation time
474 * and TTM should only do a minimum number of relocations during
475 * command submission. In practice, you need to submit at least
476 * a dozen IBs to move all buffers to VRAM if they are in GTT.
477 *
478 * Also, things can get pretty crazy under memory pressure and actual
479 * VRAM usage can change a lot, so playing safe even at 50% does
480 * consistently increase performance.
481 */
482
483 u64 half_vram = real_vram_size >> 1;
484 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
485 u64 bytes_moved_threshold = half_free_vram >> 1;
486 return max(bytes_moved_threshold, 1024*1024ull);
487 }
488
radeon_bo_list_validate(struct radeon_device * rdev,struct ww_acquire_ctx * ticket,struct list_head * head,int ring)489 int radeon_bo_list_validate(struct radeon_device *rdev,
490 struct ww_acquire_ctx *ticket,
491 struct list_head *head, int ring)
492 {
493 struct ttm_operation_ctx ctx = { true, false };
494 struct radeon_bo_list *lobj;
495 struct list_head duplicates;
496 int r;
497 u64 bytes_moved = 0, initial_bytes_moved;
498 u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
499
500 INIT_LIST_HEAD(&duplicates);
501 r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
502 if (unlikely(r != 0)) {
503 return r;
504 }
505
506 list_for_each_entry(lobj, head, tv.head) {
507 struct radeon_bo *bo = lobj->robj;
508 if (!bo->tbo.pin_count) {
509 u32 domain = lobj->preferred_domains;
510 u32 allowed = lobj->allowed_domains;
511 u32 current_domain =
512 radeon_mem_type_to_domain(bo->tbo.resource->mem_type);
513
514 /* Check if this buffer will be moved and don't move it
515 * if we have moved too many buffers for this IB already.
516 *
517 * Note that this allows moving at least one buffer of
518 * any size, because it doesn't take the current "bo"
519 * into account. We don't want to disallow buffer moves
520 * completely.
521 */
522 if ((allowed & current_domain) != 0 &&
523 (domain & current_domain) == 0 && /* will be moved */
524 bytes_moved > bytes_moved_threshold) {
525 /* don't move it */
526 domain = current_domain;
527 }
528
529 retry:
530 radeon_ttm_placement_from_domain(bo, domain);
531 if (ring == R600_RING_TYPE_UVD_INDEX)
532 radeon_uvd_force_into_uvd_segment(bo, allowed);
533
534 initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
535 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
536 bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
537 initial_bytes_moved;
538
539 if (unlikely(r)) {
540 if (r != -ERESTARTSYS &&
541 domain != lobj->allowed_domains) {
542 domain = lobj->allowed_domains;
543 goto retry;
544 }
545 ttm_eu_backoff_reservation(ticket, head);
546 return r;
547 }
548 }
549 lobj->gpu_offset = radeon_bo_gpu_offset(bo);
550 lobj->tiling_flags = bo->tiling_flags;
551 }
552
553 list_for_each_entry(lobj, &duplicates, tv.head) {
554 lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
555 lobj->tiling_flags = lobj->robj->tiling_flags;
556 }
557
558 return 0;
559 }
560
radeon_bo_get_surface_reg(struct radeon_bo * bo)561 int radeon_bo_get_surface_reg(struct radeon_bo *bo)
562 {
563 struct radeon_device *rdev = bo->rdev;
564 struct radeon_surface_reg *reg;
565 struct radeon_bo *old_object;
566 int steal;
567 int i;
568
569 dma_resv_assert_held(bo->tbo.base.resv);
570
571 if (!bo->tiling_flags)
572 return 0;
573
574 if (bo->surface_reg >= 0) {
575 i = bo->surface_reg;
576 goto out;
577 }
578
579 steal = -1;
580 for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
581
582 reg = &rdev->surface_regs[i];
583 if (!reg->bo)
584 break;
585
586 old_object = reg->bo;
587 if (old_object->tbo.pin_count == 0)
588 steal = i;
589 }
590
591 /* if we are all out */
592 if (i == RADEON_GEM_MAX_SURFACES) {
593 if (steal == -1)
594 return -ENOMEM;
595 /* find someone with a surface reg and nuke their BO */
596 reg = &rdev->surface_regs[steal];
597 old_object = reg->bo;
598 /* blow away the mapping */
599 DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
600 ttm_bo_unmap_virtual(&old_object->tbo);
601 old_object->surface_reg = -1;
602 i = steal;
603 }
604
605 bo->surface_reg = i;
606 reg->bo = bo;
607
608 out:
609 radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
610 bo->tbo.resource->start << PAGE_SHIFT,
611 bo->tbo.base.size);
612 return 0;
613 }
614
radeon_bo_clear_surface_reg(struct radeon_bo * bo)615 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
616 {
617 struct radeon_device *rdev = bo->rdev;
618 struct radeon_surface_reg *reg;
619
620 if (bo->surface_reg == -1)
621 return;
622
623 reg = &rdev->surface_regs[bo->surface_reg];
624 radeon_clear_surface_reg(rdev, bo->surface_reg);
625
626 reg->bo = NULL;
627 bo->surface_reg = -1;
628 }
629
radeon_bo_set_tiling_flags(struct radeon_bo * bo,uint32_t tiling_flags,uint32_t pitch)630 int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
631 uint32_t tiling_flags, uint32_t pitch)
632 {
633 struct radeon_device *rdev = bo->rdev;
634 int r;
635
636 if (rdev->family >= CHIP_CEDAR) {
637 unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
638
639 bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
640 bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
641 mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
642 tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
643 stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
644 switch (bankw) {
645 case 0:
646 case 1:
647 case 2:
648 case 4:
649 case 8:
650 break;
651 default:
652 return -EINVAL;
653 }
654 switch (bankh) {
655 case 0:
656 case 1:
657 case 2:
658 case 4:
659 case 8:
660 break;
661 default:
662 return -EINVAL;
663 }
664 switch (mtaspect) {
665 case 0:
666 case 1:
667 case 2:
668 case 4:
669 case 8:
670 break;
671 default:
672 return -EINVAL;
673 }
674 if (tilesplit > 6) {
675 return -EINVAL;
676 }
677 if (stilesplit > 6) {
678 return -EINVAL;
679 }
680 }
681 r = radeon_bo_reserve(bo, false);
682 if (unlikely(r != 0))
683 return r;
684 bo->tiling_flags = tiling_flags;
685 bo->pitch = pitch;
686 radeon_bo_unreserve(bo);
687 return 0;
688 }
689
radeon_bo_get_tiling_flags(struct radeon_bo * bo,uint32_t * tiling_flags,uint32_t * pitch)690 void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
691 uint32_t *tiling_flags,
692 uint32_t *pitch)
693 {
694 dma_resv_assert_held(bo->tbo.base.resv);
695
696 if (tiling_flags)
697 *tiling_flags = bo->tiling_flags;
698 if (pitch)
699 *pitch = bo->pitch;
700 }
701
radeon_bo_check_tiling(struct radeon_bo * bo,bool has_moved,bool force_drop)702 int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
703 bool force_drop)
704 {
705 if (!force_drop)
706 dma_resv_assert_held(bo->tbo.base.resv);
707
708 if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
709 return 0;
710
711 if (force_drop) {
712 radeon_bo_clear_surface_reg(bo);
713 return 0;
714 }
715
716 if (bo->tbo.resource->mem_type != TTM_PL_VRAM) {
717 if (!has_moved)
718 return 0;
719
720 if (bo->surface_reg >= 0)
721 radeon_bo_clear_surface_reg(bo);
722 return 0;
723 }
724
725 if ((bo->surface_reg >= 0) && !has_moved)
726 return 0;
727
728 return radeon_bo_get_surface_reg(bo);
729 }
730
radeon_bo_move_notify(struct ttm_buffer_object * bo)731 void radeon_bo_move_notify(struct ttm_buffer_object *bo)
732 {
733 struct radeon_bo *rbo;
734
735 if (!radeon_ttm_bo_is_radeon_bo(bo))
736 return;
737
738 rbo = container_of(bo, struct radeon_bo, tbo);
739 radeon_bo_check_tiling(rbo, 0, 1);
740 radeon_vm_bo_invalidate(rbo->rdev, rbo);
741 }
742
radeon_bo_fault_reserve_notify(struct ttm_buffer_object * bo)743 vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
744 {
745 struct ttm_operation_ctx ctx = { false, false };
746 struct radeon_device *rdev;
747 struct radeon_bo *rbo;
748 unsigned long offset, size, lpfn;
749 int i, r;
750
751 if (!radeon_ttm_bo_is_radeon_bo(bo))
752 return 0;
753 rbo = container_of(bo, struct radeon_bo, tbo);
754 radeon_bo_check_tiling(rbo, 0, 0);
755 rdev = rbo->rdev;
756 if (bo->resource->mem_type != TTM_PL_VRAM)
757 return 0;
758
759 size = bo->resource->size;
760 offset = bo->resource->start << PAGE_SHIFT;
761 if ((offset + size) <= rdev->mc.visible_vram_size)
762 return 0;
763
764 /* Can't move a pinned BO to visible VRAM */
765 if (rbo->tbo.pin_count > 0)
766 return VM_FAULT_SIGBUS;
767
768 /* hurrah the memory is not visible ! */
769 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
770 lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
771 for (i = 0; i < rbo->placement.num_placement; i++) {
772 /* Force into visible VRAM */
773 if ((rbo->placements[i].mem_type == TTM_PL_VRAM) &&
774 (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
775 rbo->placements[i].lpfn = lpfn;
776 }
777 r = ttm_bo_validate(bo, &rbo->placement, &ctx);
778 if (unlikely(r == -ENOMEM)) {
779 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
780 r = ttm_bo_validate(bo, &rbo->placement, &ctx);
781 } else if (likely(!r)) {
782 offset = bo->resource->start << PAGE_SHIFT;
783 /* this should never happen */
784 if ((offset + size) > rdev->mc.visible_vram_size)
785 return VM_FAULT_SIGBUS;
786 }
787
788 if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
789 return VM_FAULT_NOPAGE;
790 else if (unlikely(r))
791 return VM_FAULT_SIGBUS;
792
793 ttm_bo_move_to_lru_tail_unlocked(bo);
794 return 0;
795 }
796
797 /**
798 * radeon_bo_fence - add fence to buffer object
799 *
800 * @bo: buffer object in question
801 * @fence: fence to add
802 * @shared: true if fence should be added shared
803 *
804 */
radeon_bo_fence(struct radeon_bo * bo,struct radeon_fence * fence,bool shared)805 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
806 bool shared)
807 {
808 struct dma_resv *resv = bo->tbo.base.resv;
809 int r;
810
811 r = dma_resv_reserve_fences(resv, 1);
812 if (r) {
813 /* As last resort on OOM we block for the fence */
814 dma_fence_wait(&fence->base, false);
815 return;
816 }
817
818 dma_resv_add_fence(resv, &fence->base, shared ?
819 DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
820 }
821