xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 1e525507)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
17 
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_preempt_fence.h"
26 #include "xe_res_cursor.h"
27 #include "xe_trace.h"
28 #include "xe_ttm_stolen_mgr.h"
29 #include "xe_vm.h"
30 
31 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
32 	[XE_PL_SYSTEM] = "system",
33 	[XE_PL_TT] = "gtt",
34 	[XE_PL_VRAM0] = "vram0",
35 	[XE_PL_VRAM1] = "vram1",
36 	[XE_PL_STOLEN] = "stolen"
37 };
38 
39 static const struct ttm_place sys_placement_flags = {
40 	.fpfn = 0,
41 	.lpfn = 0,
42 	.mem_type = XE_PL_SYSTEM,
43 	.flags = 0,
44 };
45 
46 static struct ttm_placement sys_placement = {
47 	.num_placement = 1,
48 	.placement = &sys_placement_flags,
49 };
50 
51 static const struct ttm_place tt_placement_flags[] = {
52 	{
53 		.fpfn = 0,
54 		.lpfn = 0,
55 		.mem_type = XE_PL_TT,
56 		.flags = TTM_PL_FLAG_DESIRED,
57 	},
58 	{
59 		.fpfn = 0,
60 		.lpfn = 0,
61 		.mem_type = XE_PL_SYSTEM,
62 		.flags = TTM_PL_FLAG_FALLBACK,
63 	}
64 };
65 
66 static struct ttm_placement tt_placement = {
67 	.num_placement = 2,
68 	.placement = tt_placement_flags,
69 };
70 
71 bool mem_type_is_vram(u32 mem_type)
72 {
73 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
74 }
75 
76 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
77 {
78 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
79 }
80 
81 static bool resource_is_vram(struct ttm_resource *res)
82 {
83 	return mem_type_is_vram(res->mem_type);
84 }
85 
86 bool xe_bo_is_vram(struct xe_bo *bo)
87 {
88 	return resource_is_vram(bo->ttm.resource) ||
89 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
90 }
91 
92 bool xe_bo_is_stolen(struct xe_bo *bo)
93 {
94 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
95 }
96 
97 /**
98  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
99  * @bo: The BO
100  *
101  * The stolen memory is accessed through the PCI BAR for both DGFX and some
102  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
103  *
104  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
105  */
106 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
107 {
108 	return xe_bo_is_stolen(bo) &&
109 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
110 }
111 
112 static bool xe_bo_is_user(struct xe_bo *bo)
113 {
114 	return bo->flags & XE_BO_CREATE_USER_BIT;
115 }
116 
117 static struct xe_migrate *
118 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
119 {
120 	struct xe_tile *tile;
121 
122 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
123 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
124 	return tile->migrate;
125 }
126 
127 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
128 {
129 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
130 	struct ttm_resource_manager *mgr;
131 
132 	xe_assert(xe, resource_is_vram(res));
133 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
134 	return to_xe_ttm_vram_mgr(mgr)->vram;
135 }
136 
137 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
138 			   u32 bo_flags, u32 *c)
139 {
140 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
141 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
142 
143 		bo->placements[*c] = (struct ttm_place) {
144 			.mem_type = XE_PL_TT,
145 		};
146 		*c += 1;
147 	}
148 }
149 
150 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
151 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
152 {
153 	struct ttm_place place = { .mem_type = mem_type };
154 	struct xe_mem_region *vram;
155 	u64 io_size;
156 
157 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
158 
159 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
160 	xe_assert(xe, vram && vram->usable_size);
161 	io_size = vram->io_size;
162 
163 	/*
164 	 * For eviction / restore on suspend / resume objects
165 	 * pinned in VRAM must be contiguous
166 	 */
167 	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
168 			XE_BO_CREATE_GGTT_BIT))
169 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
170 
171 	if (io_size < vram->usable_size) {
172 		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
173 			place.fpfn = 0;
174 			place.lpfn = io_size >> PAGE_SHIFT;
175 		} else {
176 			place.flags |= TTM_PL_FLAG_TOPDOWN;
177 		}
178 	}
179 	places[*c] = place;
180 	*c += 1;
181 }
182 
183 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
184 			 u32 bo_flags, u32 *c)
185 {
186 	if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
187 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
188 	if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
189 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
190 }
191 
192 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
193 			   u32 bo_flags, u32 *c)
194 {
195 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
196 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
197 
198 		bo->placements[*c] = (struct ttm_place) {
199 			.mem_type = XE_PL_STOLEN,
200 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
201 					     XE_BO_CREATE_GGTT_BIT) ?
202 				TTM_PL_FLAG_CONTIGUOUS : 0,
203 		};
204 		*c += 1;
205 	}
206 }
207 
208 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
209 				       u32 bo_flags)
210 {
211 	u32 c = 0;
212 
213 	try_add_vram(xe, bo, bo_flags, &c);
214 	try_add_system(xe, bo, bo_flags, &c);
215 	try_add_stolen(xe, bo, bo_flags, &c);
216 
217 	if (!c)
218 		return -EINVAL;
219 
220 	bo->placement = (struct ttm_placement) {
221 		.num_placement = c,
222 		.placement = bo->placements,
223 	};
224 
225 	return 0;
226 }
227 
228 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
229 			      u32 bo_flags)
230 {
231 	xe_bo_assert_held(bo);
232 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
233 }
234 
235 static void xe_evict_flags(struct ttm_buffer_object *tbo,
236 			   struct ttm_placement *placement)
237 {
238 	if (!xe_bo_is_xe_bo(tbo)) {
239 		/* Don't handle scatter gather BOs */
240 		if (tbo->type == ttm_bo_type_sg) {
241 			placement->num_placement = 0;
242 			return;
243 		}
244 
245 		*placement = sys_placement;
246 		return;
247 	}
248 
249 	/*
250 	 * For xe, sg bos that are evicted to system just triggers a
251 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
252 	 */
253 	switch (tbo->resource->mem_type) {
254 	case XE_PL_VRAM0:
255 	case XE_PL_VRAM1:
256 	case XE_PL_STOLEN:
257 		*placement = tt_placement;
258 		break;
259 	case XE_PL_TT:
260 	default:
261 		*placement = sys_placement;
262 		break;
263 	}
264 }
265 
266 struct xe_ttm_tt {
267 	struct ttm_tt ttm;
268 	struct device *dev;
269 	struct sg_table sgt;
270 	struct sg_table *sg;
271 };
272 
273 static int xe_tt_map_sg(struct ttm_tt *tt)
274 {
275 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
276 	unsigned long num_pages = tt->num_pages;
277 	int ret;
278 
279 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
280 
281 	if (xe_tt->sg)
282 		return 0;
283 
284 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
285 						num_pages, 0,
286 						(u64)num_pages << PAGE_SHIFT,
287 						xe_sg_segment_size(xe_tt->dev),
288 						GFP_KERNEL);
289 	if (ret)
290 		return ret;
291 
292 	xe_tt->sg = &xe_tt->sgt;
293 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
294 			      DMA_ATTR_SKIP_CPU_SYNC);
295 	if (ret) {
296 		sg_free_table(xe_tt->sg);
297 		xe_tt->sg = NULL;
298 		return ret;
299 	}
300 
301 	return 0;
302 }
303 
304 struct sg_table *xe_bo_sg(struct xe_bo *bo)
305 {
306 	struct ttm_tt *tt = bo->ttm.ttm;
307 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
308 
309 	return xe_tt->sg;
310 }
311 
312 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
313 				       u32 page_flags)
314 {
315 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
316 	struct xe_device *xe = xe_bo_device(bo);
317 	struct xe_ttm_tt *tt;
318 	unsigned long extra_pages;
319 	enum ttm_caching caching;
320 	int err;
321 
322 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
323 	if (!tt)
324 		return NULL;
325 
326 	tt->dev = xe->drm.dev;
327 
328 	extra_pages = 0;
329 	if (xe_bo_needs_ccs_pages(bo))
330 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
331 					   PAGE_SIZE);
332 
333 	switch (bo->cpu_caching) {
334 	case DRM_XE_GEM_CPU_CACHING_WC:
335 		caching = ttm_write_combined;
336 		break;
337 	default:
338 		caching = ttm_cached;
339 		break;
340 	}
341 
342 	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
343 
344 	/*
345 	 * Display scanout is always non-coherent with the CPU cache.
346 	 *
347 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
348 	 * require a CPU:WC mapping.
349 	 */
350 	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
351 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
352 		caching = ttm_write_combined;
353 
354 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
355 	if (err) {
356 		kfree(tt);
357 		return NULL;
358 	}
359 
360 	return &tt->ttm;
361 }
362 
363 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
364 			      struct ttm_operation_ctx *ctx)
365 {
366 	int err;
367 
368 	/*
369 	 * dma-bufs are not populated with pages, and the dma-
370 	 * addresses are set up when moved to XE_PL_TT.
371 	 */
372 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
373 		return 0;
374 
375 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
376 	if (err)
377 		return err;
378 
379 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
380 	err = xe_tt_map_sg(tt);
381 	if (err)
382 		ttm_pool_free(&ttm_dev->pool, tt);
383 
384 	return err;
385 }
386 
387 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
388 {
389 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
390 
391 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
392 		return;
393 
394 	if (xe_tt->sg) {
395 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
396 				  DMA_BIDIRECTIONAL, 0);
397 		sg_free_table(xe_tt->sg);
398 		xe_tt->sg = NULL;
399 	}
400 
401 	return ttm_pool_free(&ttm_dev->pool, tt);
402 }
403 
404 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
405 {
406 	ttm_tt_fini(tt);
407 	kfree(tt);
408 }
409 
410 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
411 				 struct ttm_resource *mem)
412 {
413 	struct xe_device *xe = ttm_to_xe_device(bdev);
414 
415 	switch (mem->mem_type) {
416 	case XE_PL_SYSTEM:
417 	case XE_PL_TT:
418 		return 0;
419 	case XE_PL_VRAM0:
420 	case XE_PL_VRAM1: {
421 		struct xe_ttm_vram_mgr_resource *vres =
422 			to_xe_ttm_vram_mgr_resource(mem);
423 		struct xe_mem_region *vram = res_to_mem_region(mem);
424 
425 		if (vres->used_visible_size < mem->size)
426 			return -EINVAL;
427 
428 		mem->bus.offset = mem->start << PAGE_SHIFT;
429 
430 		if (vram->mapping &&
431 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
432 			mem->bus.addr = (u8 __force *)vram->mapping +
433 				mem->bus.offset;
434 
435 		mem->bus.offset += vram->io_start;
436 		mem->bus.is_iomem = true;
437 
438 #if  !defined(CONFIG_X86)
439 		mem->bus.caching = ttm_write_combined;
440 #endif
441 		return 0;
442 	} case XE_PL_STOLEN:
443 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
444 	default:
445 		return -EINVAL;
446 	}
447 }
448 
449 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
450 				const struct ttm_operation_ctx *ctx)
451 {
452 	struct dma_resv_iter cursor;
453 	struct dma_fence *fence;
454 	struct drm_gem_object *obj = &bo->ttm.base;
455 	struct drm_gpuvm_bo *vm_bo;
456 	bool idle = false;
457 	int ret = 0;
458 
459 	dma_resv_assert_held(bo->ttm.base.resv);
460 
461 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
462 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
463 				    DMA_RESV_USAGE_BOOKKEEP);
464 		dma_resv_for_each_fence_unlocked(&cursor, fence)
465 			dma_fence_enable_sw_signaling(fence);
466 		dma_resv_iter_end(&cursor);
467 	}
468 
469 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
470 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
471 		struct drm_gpuva *gpuva;
472 
473 		if (!xe_vm_in_fault_mode(vm)) {
474 			drm_gpuvm_bo_evict(vm_bo, true);
475 			continue;
476 		}
477 
478 		if (!idle) {
479 			long timeout;
480 
481 			if (ctx->no_wait_gpu &&
482 			    !dma_resv_test_signaled(bo->ttm.base.resv,
483 						    DMA_RESV_USAGE_BOOKKEEP))
484 				return -EBUSY;
485 
486 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
487 							DMA_RESV_USAGE_BOOKKEEP,
488 							ctx->interruptible,
489 							MAX_SCHEDULE_TIMEOUT);
490 			if (!timeout)
491 				return -ETIME;
492 			if (timeout < 0)
493 				return timeout;
494 
495 			idle = true;
496 		}
497 
498 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
499 			struct xe_vma *vma = gpuva_to_vma(gpuva);
500 
501 			trace_xe_vma_evict(vma);
502 			ret = xe_vm_invalidate_vma(vma);
503 			if (XE_WARN_ON(ret))
504 				return ret;
505 		}
506 	}
507 
508 	return ret;
509 }
510 
511 /*
512  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
513  * Note that unmapping the attachment is deferred to the next
514  * map_attachment time, or to bo destroy (after idling) whichever comes first.
515  * This is to avoid syncing before unmap_attachment(), assuming that the
516  * caller relies on idling the reservation object before moving the
517  * backing store out. Should that assumption not hold, then we will be able
518  * to unconditionally call unmap_attachment() when moving out to system.
519  */
520 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
521 			     struct ttm_resource *new_res)
522 {
523 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
524 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
525 					       ttm);
526 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
527 	struct sg_table *sg;
528 
529 	xe_assert(xe, attach);
530 	xe_assert(xe, ttm_bo->ttm);
531 
532 	if (new_res->mem_type == XE_PL_SYSTEM)
533 		goto out;
534 
535 	if (ttm_bo->sg) {
536 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
537 		ttm_bo->sg = NULL;
538 	}
539 
540 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
541 	if (IS_ERR(sg))
542 		return PTR_ERR(sg);
543 
544 	ttm_bo->sg = sg;
545 	xe_tt->sg = sg;
546 
547 out:
548 	ttm_bo_move_null(ttm_bo, new_res);
549 
550 	return 0;
551 }
552 
553 /**
554  * xe_bo_move_notify - Notify subsystems of a pending move
555  * @bo: The buffer object
556  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
557  *
558  * This function notifies subsystems of an upcoming buffer move.
559  * Upon receiving such a notification, subsystems should schedule
560  * halting access to the underlying pages and optionally add a fence
561  * to the buffer object's dma_resv object, that signals when access is
562  * stopped. The caller will wait on all dma_resv fences before
563  * starting the move.
564  *
565  * A subsystem may commence access to the object after obtaining
566  * bindings to the new backing memory under the object lock.
567  *
568  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
569  * negative error code on error.
570  */
571 static int xe_bo_move_notify(struct xe_bo *bo,
572 			     const struct ttm_operation_ctx *ctx)
573 {
574 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
575 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
576 	struct ttm_resource *old_mem = ttm_bo->resource;
577 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
578 	int ret;
579 
580 	/*
581 	 * If this starts to call into many components, consider
582 	 * using a notification chain here.
583 	 */
584 
585 	if (xe_bo_is_pinned(bo))
586 		return -EINVAL;
587 
588 	xe_bo_vunmap(bo);
589 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
590 	if (ret)
591 		return ret;
592 
593 	/* Don't call move_notify() for imported dma-bufs. */
594 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
595 		dma_buf_move_notify(ttm_bo->base.dma_buf);
596 
597 	/*
598 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
599 	 * so if we moved from VRAM make sure to unlink this from the userfault
600 	 * tracking.
601 	 */
602 	if (mem_type_is_vram(old_mem_type)) {
603 		mutex_lock(&xe->mem_access.vram_userfault.lock);
604 		if (!list_empty(&bo->vram_userfault_link))
605 			list_del_init(&bo->vram_userfault_link);
606 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
607 	}
608 
609 	return 0;
610 }
611 
612 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
613 		      struct ttm_operation_ctx *ctx,
614 		      struct ttm_resource *new_mem,
615 		      struct ttm_place *hop)
616 {
617 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
618 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
619 	struct ttm_resource *old_mem = ttm_bo->resource;
620 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
621 	struct ttm_tt *ttm = ttm_bo->ttm;
622 	struct xe_migrate *migrate = NULL;
623 	struct dma_fence *fence;
624 	bool move_lacks_source;
625 	bool tt_has_data;
626 	bool needs_clear;
627 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
628 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
629 	int ret = 0;
630 	/* Bo creation path, moving to system or TT. */
631 	if ((!old_mem && ttm) && !handle_system_ccs) {
632 		ttm_bo_move_null(ttm_bo, new_mem);
633 		return 0;
634 	}
635 
636 	if (ttm_bo->type == ttm_bo_type_sg) {
637 		ret = xe_bo_move_notify(bo, ctx);
638 		if (!ret)
639 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
640 		goto out;
641 	}
642 
643 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
644 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
645 
646 	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
647 						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
648 
649 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
650 		(!ttm && ttm_bo->type == ttm_bo_type_device);
651 
652 	if ((move_lacks_source && !needs_clear)) {
653 		ttm_bo_move_null(ttm_bo, new_mem);
654 		goto out;
655 	}
656 
657 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
658 		ttm_bo_move_null(ttm_bo, new_mem);
659 		goto out;
660 	}
661 
662 	/*
663 	 * Failed multi-hop where the old_mem is still marked as
664 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
665 	 */
666 	if (old_mem_type == XE_PL_TT &&
667 	    new_mem->mem_type == XE_PL_TT) {
668 		ttm_bo_move_null(ttm_bo, new_mem);
669 		goto out;
670 	}
671 
672 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
673 		ret = xe_bo_move_notify(bo, ctx);
674 		if (ret)
675 			goto out;
676 	}
677 
678 	if (old_mem_type == XE_PL_TT &&
679 	    new_mem->mem_type == XE_PL_SYSTEM) {
680 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
681 						     DMA_RESV_USAGE_BOOKKEEP,
682 						     true,
683 						     MAX_SCHEDULE_TIMEOUT);
684 		if (timeout < 0) {
685 			ret = timeout;
686 			goto out;
687 		}
688 
689 		if (!handle_system_ccs) {
690 			ttm_bo_move_null(ttm_bo, new_mem);
691 			goto out;
692 		}
693 	}
694 
695 	if (!move_lacks_source &&
696 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
697 	     (mem_type_is_vram(old_mem_type) &&
698 	      new_mem->mem_type == XE_PL_SYSTEM))) {
699 		hop->fpfn = 0;
700 		hop->lpfn = 0;
701 		hop->mem_type = XE_PL_TT;
702 		hop->flags = TTM_PL_FLAG_TEMPORARY;
703 		ret = -EMULTIHOP;
704 		goto out;
705 	}
706 
707 	if (bo->tile)
708 		migrate = bo->tile->migrate;
709 	else if (resource_is_vram(new_mem))
710 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
711 	else if (mem_type_is_vram(old_mem_type))
712 		migrate = mem_type_to_migrate(xe, old_mem_type);
713 	else
714 		migrate = xe->tiles[0].migrate;
715 
716 	xe_assert(xe, migrate);
717 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
718 	xe_device_mem_access_get(xe);
719 
720 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
721 		/*
722 		 * Kernel memory that is pinned should only be moved on suspend
723 		 * / resume, some of the pinned memory is required for the
724 		 * device to resume / use the GPU to move other evicted memory
725 		 * (user memory) around. This likely could be optimized a bit
726 		 * futher where we find the minimum set of pinned memory
727 		 * required for resume but for simplity doing a memcpy for all
728 		 * pinned memory.
729 		 */
730 		ret = xe_bo_vmap(bo);
731 		if (!ret) {
732 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
733 
734 			/* Create a new VMAP once kernel BO back in VRAM */
735 			if (!ret && resource_is_vram(new_mem)) {
736 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
737 				void __iomem *new_addr = vram->mapping +
738 					(new_mem->start << PAGE_SHIFT);
739 
740 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
741 					ret = -EINVAL;
742 					xe_device_mem_access_put(xe);
743 					goto out;
744 				}
745 
746 				xe_assert(xe, new_mem->start ==
747 					  bo->placements->fpfn);
748 
749 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
750 			}
751 		}
752 	} else {
753 		if (move_lacks_source)
754 			fence = xe_migrate_clear(migrate, bo, new_mem);
755 		else
756 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
757 						new_mem, handle_system_ccs);
758 		if (IS_ERR(fence)) {
759 			ret = PTR_ERR(fence);
760 			xe_device_mem_access_put(xe);
761 			goto out;
762 		}
763 		if (!move_lacks_source) {
764 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
765 							true, new_mem);
766 			if (ret) {
767 				dma_fence_wait(fence, false);
768 				ttm_bo_move_null(ttm_bo, new_mem);
769 				ret = 0;
770 			}
771 		} else {
772 			/*
773 			 * ttm_bo_move_accel_cleanup() may blow up if
774 			 * bo->resource == NULL, so just attach the
775 			 * fence and set the new resource.
776 			 */
777 			dma_resv_add_fence(ttm_bo->base.resv, fence,
778 					   DMA_RESV_USAGE_KERNEL);
779 			ttm_bo_move_null(ttm_bo, new_mem);
780 		}
781 
782 		dma_fence_put(fence);
783 	}
784 
785 	xe_device_mem_access_put(xe);
786 
787 out:
788 	return ret;
789 
790 }
791 
792 /**
793  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
794  * @bo: The buffer object to move.
795  *
796  * On successful completion, the object memory will be moved to sytem memory.
797  * This function blocks until the object has been fully moved.
798  *
799  * This is needed to for special handling of pinned VRAM object during
800  * suspend-resume.
801  *
802  * Return: 0 on success. Negative error code on failure.
803  */
804 int xe_bo_evict_pinned(struct xe_bo *bo)
805 {
806 	struct ttm_place place = {
807 		.mem_type = XE_PL_TT,
808 	};
809 	struct ttm_placement placement = {
810 		.placement = &place,
811 		.num_placement = 1,
812 	};
813 	struct ttm_operation_ctx ctx = {
814 		.interruptible = false,
815 	};
816 	struct ttm_resource *new_mem;
817 	int ret;
818 
819 	xe_bo_assert_held(bo);
820 
821 	if (WARN_ON(!bo->ttm.resource))
822 		return -EINVAL;
823 
824 	if (WARN_ON(!xe_bo_is_pinned(bo)))
825 		return -EINVAL;
826 
827 	if (WARN_ON(!xe_bo_is_vram(bo)))
828 		return -EINVAL;
829 
830 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
831 	if (ret)
832 		return ret;
833 
834 	if (!bo->ttm.ttm) {
835 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
836 		if (!bo->ttm.ttm) {
837 			ret = -ENOMEM;
838 			goto err_res_free;
839 		}
840 	}
841 
842 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
843 	if (ret)
844 		goto err_res_free;
845 
846 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
847 	if (ret)
848 		goto err_res_free;
849 
850 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
851 	if (ret)
852 		goto err_res_free;
853 
854 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
855 			      false, MAX_SCHEDULE_TIMEOUT);
856 
857 	return 0;
858 
859 err_res_free:
860 	ttm_resource_free(&bo->ttm, &new_mem);
861 	return ret;
862 }
863 
864 /**
865  * xe_bo_restore_pinned() - Restore a pinned VRAM object
866  * @bo: The buffer object to move.
867  *
868  * On successful completion, the object memory will be moved back to VRAM.
869  * This function blocks until the object has been fully moved.
870  *
871  * This is needed to for special handling of pinned VRAM object during
872  * suspend-resume.
873  *
874  * Return: 0 on success. Negative error code on failure.
875  */
876 int xe_bo_restore_pinned(struct xe_bo *bo)
877 {
878 	struct ttm_operation_ctx ctx = {
879 		.interruptible = false,
880 	};
881 	struct ttm_resource *new_mem;
882 	int ret;
883 
884 	xe_bo_assert_held(bo);
885 
886 	if (WARN_ON(!bo->ttm.resource))
887 		return -EINVAL;
888 
889 	if (WARN_ON(!xe_bo_is_pinned(bo)))
890 		return -EINVAL;
891 
892 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
893 		return -EINVAL;
894 
895 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
896 	if (ret)
897 		return ret;
898 
899 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
900 	if (ret)
901 		goto err_res_free;
902 
903 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
904 	if (ret)
905 		goto err_res_free;
906 
907 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
908 	if (ret)
909 		goto err_res_free;
910 
911 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
912 			      false, MAX_SCHEDULE_TIMEOUT);
913 
914 	return 0;
915 
916 err_res_free:
917 	ttm_resource_free(&bo->ttm, &new_mem);
918 	return ret;
919 }
920 
921 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
922 				       unsigned long page_offset)
923 {
924 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
925 	struct xe_res_cursor cursor;
926 	struct xe_mem_region *vram;
927 
928 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
929 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
930 
931 	vram = res_to_mem_region(ttm_bo->resource);
932 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
933 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
934 }
935 
936 static void __xe_bo_vunmap(struct xe_bo *bo);
937 
938 /*
939  * TODO: Move this function to TTM so we don't rely on how TTM does its
940  * locking, thereby abusing TTM internals.
941  */
942 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
943 {
944 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
945 	bool locked;
946 
947 	xe_assert(xe, !kref_read(&ttm_bo->kref));
948 
949 	/*
950 	 * We can typically only race with TTM trylocking under the
951 	 * lru_lock, which will immediately be unlocked again since
952 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
953 	 * always succeed here, as long as we hold the lru lock.
954 	 */
955 	spin_lock(&ttm_bo->bdev->lru_lock);
956 	locked = dma_resv_trylock(ttm_bo->base.resv);
957 	spin_unlock(&ttm_bo->bdev->lru_lock);
958 	xe_assert(xe, locked);
959 
960 	return locked;
961 }
962 
963 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
964 {
965 	struct dma_resv_iter cursor;
966 	struct dma_fence *fence;
967 	struct dma_fence *replacement = NULL;
968 	struct xe_bo *bo;
969 
970 	if (!xe_bo_is_xe_bo(ttm_bo))
971 		return;
972 
973 	bo = ttm_to_xe_bo(ttm_bo);
974 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
975 
976 	/*
977 	 * Corner case where TTM fails to allocate memory and this BOs resv
978 	 * still points the VMs resv
979 	 */
980 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
981 		return;
982 
983 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
984 		return;
985 
986 	/*
987 	 * Scrub the preempt fences if any. The unbind fence is already
988 	 * attached to the resv.
989 	 * TODO: Don't do this for external bos once we scrub them after
990 	 * unbind.
991 	 */
992 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
993 				DMA_RESV_USAGE_BOOKKEEP, fence) {
994 		if (xe_fence_is_xe_preempt(fence) &&
995 		    !dma_fence_is_signaled(fence)) {
996 			if (!replacement)
997 				replacement = dma_fence_get_stub();
998 
999 			dma_resv_replace_fences(ttm_bo->base.resv,
1000 						fence->context,
1001 						replacement,
1002 						DMA_RESV_USAGE_BOOKKEEP);
1003 		}
1004 	}
1005 	dma_fence_put(replacement);
1006 
1007 	dma_resv_unlock(ttm_bo->base.resv);
1008 }
1009 
1010 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1011 {
1012 	if (!xe_bo_is_xe_bo(ttm_bo))
1013 		return;
1014 
1015 	/*
1016 	 * Object is idle and about to be destroyed. Release the
1017 	 * dma-buf attachment.
1018 	 */
1019 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1020 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1021 						       struct xe_ttm_tt, ttm);
1022 
1023 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1024 					 DMA_BIDIRECTIONAL);
1025 		ttm_bo->sg = NULL;
1026 		xe_tt->sg = NULL;
1027 	}
1028 }
1029 
1030 const struct ttm_device_funcs xe_ttm_funcs = {
1031 	.ttm_tt_create = xe_ttm_tt_create,
1032 	.ttm_tt_populate = xe_ttm_tt_populate,
1033 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1034 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1035 	.evict_flags = xe_evict_flags,
1036 	.move = xe_bo_move,
1037 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1038 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1039 	.release_notify = xe_ttm_bo_release_notify,
1040 	.eviction_valuable = ttm_bo_eviction_valuable,
1041 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1042 };
1043 
1044 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1045 {
1046 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1047 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1048 
1049 	if (bo->ttm.base.import_attach)
1050 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1051 	drm_gem_object_release(&bo->ttm.base);
1052 
1053 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1054 
1055 	if (bo->ggtt_node.size)
1056 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1057 
1058 #ifdef CONFIG_PROC_FS
1059 	if (bo->client)
1060 		xe_drm_client_remove_bo(bo);
1061 #endif
1062 
1063 	if (bo->vm && xe_bo_is_user(bo))
1064 		xe_vm_put(bo->vm);
1065 
1066 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1067 	if (!list_empty(&bo->vram_userfault_link))
1068 		list_del(&bo->vram_userfault_link);
1069 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1070 
1071 	kfree(bo);
1072 }
1073 
1074 static void xe_gem_object_free(struct drm_gem_object *obj)
1075 {
1076 	/* Our BO reference counting scheme works as follows:
1077 	 *
1078 	 * The gem object kref is typically used throughout the driver,
1079 	 * and the gem object holds a ttm_buffer_object refcount, so
1080 	 * that when the last gem object reference is put, which is when
1081 	 * we end up in this function, we put also that ttm_buffer_object
1082 	 * refcount. Anything using gem interfaces is then no longer
1083 	 * allowed to access the object in a way that requires a gem
1084 	 * refcount, including locking the object.
1085 	 *
1086 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1087 	 * refcount directly if needed.
1088 	 */
1089 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1090 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1091 }
1092 
1093 static void xe_gem_object_close(struct drm_gem_object *obj,
1094 				struct drm_file *file_priv)
1095 {
1096 	struct xe_bo *bo = gem_to_xe_bo(obj);
1097 
1098 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1099 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1100 
1101 		xe_bo_lock(bo, false);
1102 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1103 		xe_bo_unlock(bo);
1104 	}
1105 }
1106 
1107 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1108 {
1109 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1110 	struct drm_device *ddev = tbo->base.dev;
1111 	struct xe_device *xe = to_xe_device(ddev);
1112 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1113 	bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
1114 	vm_fault_t ret;
1115 	int idx;
1116 
1117 	if (needs_rpm)
1118 		xe_device_mem_access_get(xe);
1119 
1120 	ret = ttm_bo_vm_reserve(tbo, vmf);
1121 	if (ret)
1122 		goto out;
1123 
1124 	if (drm_dev_enter(ddev, &idx)) {
1125 		trace_xe_bo_cpu_fault(bo);
1126 
1127 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1128 					       TTM_BO_VM_NUM_PREFAULT);
1129 		drm_dev_exit(idx);
1130 	} else {
1131 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1132 	}
1133 
1134 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1135 		goto out;
1136 	/*
1137 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1138 	 */
1139 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1140 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1141 		if (list_empty(&bo->vram_userfault_link))
1142 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1143 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1144 	}
1145 
1146 	dma_resv_unlock(tbo->base.resv);
1147 out:
1148 	if (needs_rpm)
1149 		xe_device_mem_access_put(xe);
1150 
1151 	return ret;
1152 }
1153 
1154 static const struct vm_operations_struct xe_gem_vm_ops = {
1155 	.fault = xe_gem_fault,
1156 	.open = ttm_bo_vm_open,
1157 	.close = ttm_bo_vm_close,
1158 	.access = ttm_bo_vm_access
1159 };
1160 
1161 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1162 	.free = xe_gem_object_free,
1163 	.close = xe_gem_object_close,
1164 	.mmap = drm_gem_ttm_mmap,
1165 	.export = xe_gem_prime_export,
1166 	.vm_ops = &xe_gem_vm_ops,
1167 };
1168 
1169 /**
1170  * xe_bo_alloc - Allocate storage for a struct xe_bo
1171  *
1172  * This funcition is intended to allocate storage to be used for input
1173  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1174  * created is needed before the call to __xe_bo_create_locked().
1175  * If __xe_bo_create_locked ends up never to be called, then the
1176  * storage allocated with this function needs to be freed using
1177  * xe_bo_free().
1178  *
1179  * Return: A pointer to an uninitialized struct xe_bo on success,
1180  * ERR_PTR(-ENOMEM) on error.
1181  */
1182 struct xe_bo *xe_bo_alloc(void)
1183 {
1184 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1185 
1186 	if (!bo)
1187 		return ERR_PTR(-ENOMEM);
1188 
1189 	return bo;
1190 }
1191 
1192 /**
1193  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1194  * @bo: The buffer object storage.
1195  *
1196  * Refer to xe_bo_alloc() documentation for valid use-cases.
1197  */
1198 void xe_bo_free(struct xe_bo *bo)
1199 {
1200 	kfree(bo);
1201 }
1202 
1203 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1204 				     struct xe_tile *tile, struct dma_resv *resv,
1205 				     struct ttm_lru_bulk_move *bulk, size_t size,
1206 				     u16 cpu_caching, enum ttm_bo_type type,
1207 				     u32 flags)
1208 {
1209 	struct ttm_operation_ctx ctx = {
1210 		.interruptible = true,
1211 		.no_wait_gpu = false,
1212 	};
1213 	struct ttm_placement *placement;
1214 	uint32_t alignment;
1215 	size_t aligned_size;
1216 	int err;
1217 
1218 	/* Only kernel objects should set GT */
1219 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1220 
1221 	if (XE_WARN_ON(!size)) {
1222 		xe_bo_free(bo);
1223 		return ERR_PTR(-EINVAL);
1224 	}
1225 
1226 	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1227 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1228 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1229 		aligned_size = ALIGN(size, SZ_64K);
1230 		if (type != ttm_bo_type_device)
1231 			size = ALIGN(size, SZ_64K);
1232 		flags |= XE_BO_INTERNAL_64K;
1233 		alignment = SZ_64K >> PAGE_SHIFT;
1234 
1235 	} else {
1236 		aligned_size = ALIGN(size, SZ_4K);
1237 		flags &= ~XE_BO_INTERNAL_64K;
1238 		alignment = SZ_4K >> PAGE_SHIFT;
1239 	}
1240 
1241 	if (type == ttm_bo_type_device && aligned_size != size)
1242 		return ERR_PTR(-EINVAL);
1243 
1244 	if (!bo) {
1245 		bo = xe_bo_alloc();
1246 		if (IS_ERR(bo))
1247 			return bo;
1248 	}
1249 
1250 	bo->ccs_cleared = false;
1251 	bo->tile = tile;
1252 	bo->size = size;
1253 	bo->flags = flags;
1254 	bo->cpu_caching = cpu_caching;
1255 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1256 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1257 	INIT_LIST_HEAD(&bo->pinned_link);
1258 #ifdef CONFIG_PROC_FS
1259 	INIT_LIST_HEAD(&bo->client_link);
1260 #endif
1261 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1262 
1263 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1264 
1265 	if (resv) {
1266 		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1267 		ctx.resv = resv;
1268 	}
1269 
1270 	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1271 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1272 		if (WARN_ON(err)) {
1273 			xe_ttm_bo_destroy(&bo->ttm);
1274 			return ERR_PTR(err);
1275 		}
1276 	}
1277 
1278 	/* Defer populating type_sg bos */
1279 	placement = (type == ttm_bo_type_sg ||
1280 		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1281 		&bo->placement;
1282 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1283 				   placement, alignment,
1284 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1285 	if (err)
1286 		return ERR_PTR(err);
1287 
1288 	/*
1289 	 * The VRAM pages underneath are potentially still being accessed by the
1290 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1291 	 * sure to add any corresponding move/clear fences into the objects
1292 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1293 	 *
1294 	 * For KMD internal buffers we don't care about GPU clearing, however we
1295 	 * still need to handle async evictions, where the VRAM is still being
1296 	 * accessed by the GPU. Most internal callers are not expecting this,
1297 	 * since they are missing the required synchronisation before accessing
1298 	 * the memory. To keep things simple just sync wait any kernel fences
1299 	 * here, if the buffer is designated KMD internal.
1300 	 *
1301 	 * For normal userspace objects we should already have the required
1302 	 * pipelining or sync waiting elsewhere, since we already have to deal
1303 	 * with things like async GPU clearing.
1304 	 */
1305 	if (type == ttm_bo_type_kernel) {
1306 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1307 						     DMA_RESV_USAGE_KERNEL,
1308 						     ctx.interruptible,
1309 						     MAX_SCHEDULE_TIMEOUT);
1310 
1311 		if (timeout < 0) {
1312 			if (!resv)
1313 				dma_resv_unlock(bo->ttm.base.resv);
1314 			xe_bo_put(bo);
1315 			return ERR_PTR(timeout);
1316 		}
1317 	}
1318 
1319 	bo->created = true;
1320 	if (bulk)
1321 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1322 	else
1323 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1324 
1325 	return bo;
1326 }
1327 
1328 static int __xe_bo_fixed_placement(struct xe_device *xe,
1329 				   struct xe_bo *bo,
1330 				   u32 flags,
1331 				   u64 start, u64 end, u64 size)
1332 {
1333 	struct ttm_place *place = bo->placements;
1334 
1335 	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1336 		return -EINVAL;
1337 
1338 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1339 	place->fpfn = start >> PAGE_SHIFT;
1340 	place->lpfn = end >> PAGE_SHIFT;
1341 
1342 	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1343 	case XE_BO_CREATE_VRAM0_BIT:
1344 		place->mem_type = XE_PL_VRAM0;
1345 		break;
1346 	case XE_BO_CREATE_VRAM1_BIT:
1347 		place->mem_type = XE_PL_VRAM1;
1348 		break;
1349 	case XE_BO_CREATE_STOLEN_BIT:
1350 		place->mem_type = XE_PL_STOLEN;
1351 		break;
1352 
1353 	default:
1354 		/* 0 or multiple of the above set */
1355 		return -EINVAL;
1356 	}
1357 
1358 	bo->placement = (struct ttm_placement) {
1359 		.num_placement = 1,
1360 		.placement = place,
1361 	};
1362 
1363 	return 0;
1364 }
1365 
1366 static struct xe_bo *
1367 __xe_bo_create_locked(struct xe_device *xe,
1368 		      struct xe_tile *tile, struct xe_vm *vm,
1369 		      size_t size, u64 start, u64 end,
1370 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1371 {
1372 	struct xe_bo *bo = NULL;
1373 	int err;
1374 
1375 	if (vm)
1376 		xe_vm_assert_held(vm);
1377 
1378 	if (start || end != ~0ULL) {
1379 		bo = xe_bo_alloc();
1380 		if (IS_ERR(bo))
1381 			return bo;
1382 
1383 		flags |= XE_BO_FIXED_PLACEMENT_BIT;
1384 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1385 		if (err) {
1386 			xe_bo_free(bo);
1387 			return ERR_PTR(err);
1388 		}
1389 	}
1390 
1391 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1392 				    vm && !xe_vm_in_fault_mode(vm) &&
1393 				    flags & XE_BO_CREATE_USER_BIT ?
1394 				    &vm->lru_bulk_move : NULL, size,
1395 				    cpu_caching, type, flags);
1396 	if (IS_ERR(bo))
1397 		return bo;
1398 
1399 	/*
1400 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1401 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1402 	 * will keep a reference to the vm, and avoid circular references
1403 	 * by having all the vm's bo refereferences released at vm close
1404 	 * time.
1405 	 */
1406 	if (vm && xe_bo_is_user(bo))
1407 		xe_vm_get(vm);
1408 	bo->vm = vm;
1409 
1410 	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1411 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1412 			tile = xe_device_get_root_tile(xe);
1413 
1414 		xe_assert(xe, tile);
1415 
1416 		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1417 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1418 						   start + bo->size, U64_MAX);
1419 		} else {
1420 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1421 		}
1422 		if (err)
1423 			goto err_unlock_put_bo;
1424 	}
1425 
1426 	return bo;
1427 
1428 err_unlock_put_bo:
1429 	__xe_bo_unset_bulk_move(bo);
1430 	xe_bo_unlock_vm_held(bo);
1431 	xe_bo_put(bo);
1432 	return ERR_PTR(err);
1433 }
1434 
1435 struct xe_bo *
1436 xe_bo_create_locked_range(struct xe_device *xe,
1437 			  struct xe_tile *tile, struct xe_vm *vm,
1438 			  size_t size, u64 start, u64 end,
1439 			  enum ttm_bo_type type, u32 flags)
1440 {
1441 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1442 }
1443 
1444 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1445 				  struct xe_vm *vm, size_t size,
1446 				  enum ttm_bo_type type, u32 flags)
1447 {
1448 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1449 }
1450 
1451 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1452 				struct xe_vm *vm, size_t size,
1453 				u16 cpu_caching,
1454 				enum ttm_bo_type type,
1455 				u32 flags)
1456 {
1457 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1458 						 cpu_caching, type,
1459 						 flags | XE_BO_CREATE_USER_BIT);
1460 	if (!IS_ERR(bo))
1461 		xe_bo_unlock_vm_held(bo);
1462 
1463 	return bo;
1464 }
1465 
1466 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1467 			   struct xe_vm *vm, size_t size,
1468 			   enum ttm_bo_type type, u32 flags)
1469 {
1470 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1471 
1472 	if (!IS_ERR(bo))
1473 		xe_bo_unlock_vm_held(bo);
1474 
1475 	return bo;
1476 }
1477 
1478 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1479 				      struct xe_vm *vm,
1480 				      size_t size, u64 offset,
1481 				      enum ttm_bo_type type, u32 flags)
1482 {
1483 	struct xe_bo *bo;
1484 	int err;
1485 	u64 start = offset == ~0ull ? 0 : offset;
1486 	u64 end = offset == ~0ull ? offset : start + size;
1487 
1488 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
1489 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1490 		flags |= XE_BO_CREATE_GGTT_BIT;
1491 
1492 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1493 				       flags | XE_BO_NEEDS_CPU_ACCESS);
1494 	if (IS_ERR(bo))
1495 		return bo;
1496 
1497 	err = xe_bo_pin(bo);
1498 	if (err)
1499 		goto err_put;
1500 
1501 	err = xe_bo_vmap(bo);
1502 	if (err)
1503 		goto err_unpin;
1504 
1505 	xe_bo_unlock_vm_held(bo);
1506 
1507 	return bo;
1508 
1509 err_unpin:
1510 	xe_bo_unpin(bo);
1511 err_put:
1512 	xe_bo_unlock_vm_held(bo);
1513 	xe_bo_put(bo);
1514 	return ERR_PTR(err);
1515 }
1516 
1517 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1518 				   struct xe_vm *vm, size_t size,
1519 				   enum ttm_bo_type type, u32 flags)
1520 {
1521 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1522 }
1523 
1524 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1525 				     const void *data, size_t size,
1526 				     enum ttm_bo_type type, u32 flags)
1527 {
1528 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1529 						ALIGN(size, PAGE_SIZE),
1530 						type, flags);
1531 	if (IS_ERR(bo))
1532 		return bo;
1533 
1534 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1535 
1536 	return bo;
1537 }
1538 
1539 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1540 {
1541 	xe_bo_unpin_map_no_vm(arg);
1542 }
1543 
1544 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1545 					   size_t size, u32 flags)
1546 {
1547 	struct xe_bo *bo;
1548 	int ret;
1549 
1550 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1551 	if (IS_ERR(bo))
1552 		return bo;
1553 
1554 	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1555 	if (ret)
1556 		return ERR_PTR(ret);
1557 
1558 	return bo;
1559 }
1560 
1561 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1562 					     const void *data, size_t size, u32 flags)
1563 {
1564 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1565 
1566 	if (IS_ERR(bo))
1567 		return bo;
1568 
1569 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1570 
1571 	return bo;
1572 }
1573 
1574 /**
1575  * xe_managed_bo_reinit_in_vram
1576  * @xe: xe device
1577  * @tile: Tile where the new buffer will be created
1578  * @src: Managed buffer object allocated in system memory
1579  *
1580  * Replace a managed src buffer object allocated in system memory with a new
1581  * one allocated in vram, copying the data between them.
1582  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1583  * is responsible for making sure that any old references to it are updated.
1584  *
1585  * Returns 0 for success, negative error code otherwise.
1586  */
1587 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1588 {
1589 	struct xe_bo *bo;
1590 
1591 	xe_assert(xe, IS_DGFX(xe));
1592 	xe_assert(xe, !(*src)->vmap.is_iomem);
1593 
1594 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size,
1595 					    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
1596 					    XE_BO_CREATE_GGTT_BIT);
1597 	if (IS_ERR(bo))
1598 		return PTR_ERR(bo);
1599 
1600 	drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
1601 	*src = bo;
1602 
1603 	return 0;
1604 }
1605 
1606 /*
1607  * XXX: This is in the VM bind data path, likely should calculate this once and
1608  * store, with a recalculation if the BO is moved.
1609  */
1610 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1611 {
1612 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1613 
1614 	if (res->mem_type == XE_PL_STOLEN)
1615 		return xe_ttm_stolen_gpu_offset(xe);
1616 
1617 	return res_to_mem_region(res)->dpa_base;
1618 }
1619 
1620 /**
1621  * xe_bo_pin_external - pin an external BO
1622  * @bo: buffer object to be pinned
1623  *
1624  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1625  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1626  * asserts and code to ensure evict / restore on suspend / resume.
1627  *
1628  * Returns 0 for success, negative error code otherwise.
1629  */
1630 int xe_bo_pin_external(struct xe_bo *bo)
1631 {
1632 	struct xe_device *xe = xe_bo_device(bo);
1633 	int err;
1634 
1635 	xe_assert(xe, !bo->vm);
1636 	xe_assert(xe, xe_bo_is_user(bo));
1637 
1638 	if (!xe_bo_is_pinned(bo)) {
1639 		err = xe_bo_validate(bo, NULL, false);
1640 		if (err)
1641 			return err;
1642 
1643 		if (xe_bo_is_vram(bo)) {
1644 			spin_lock(&xe->pinned.lock);
1645 			list_add_tail(&bo->pinned_link,
1646 				      &xe->pinned.external_vram);
1647 			spin_unlock(&xe->pinned.lock);
1648 		}
1649 	}
1650 
1651 	ttm_bo_pin(&bo->ttm);
1652 
1653 	/*
1654 	 * FIXME: If we always use the reserve / unreserve functions for locking
1655 	 * we do not need this.
1656 	 */
1657 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1658 
1659 	return 0;
1660 }
1661 
1662 int xe_bo_pin(struct xe_bo *bo)
1663 {
1664 	struct xe_device *xe = xe_bo_device(bo);
1665 	int err;
1666 
1667 	/* We currently don't expect user BO to be pinned */
1668 	xe_assert(xe, !xe_bo_is_user(bo));
1669 
1670 	/* Pinned object must be in GGTT or have pinned flag */
1671 	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1672 				   XE_BO_CREATE_GGTT_BIT));
1673 
1674 	/*
1675 	 * No reason we can't support pinning imported dma-bufs we just don't
1676 	 * expect to pin an imported dma-buf.
1677 	 */
1678 	xe_assert(xe, !bo->ttm.base.import_attach);
1679 
1680 	/* We only expect at most 1 pin */
1681 	xe_assert(xe, !xe_bo_is_pinned(bo));
1682 
1683 	err = xe_bo_validate(bo, NULL, false);
1684 	if (err)
1685 		return err;
1686 
1687 	/*
1688 	 * For pinned objects in on DGFX, which are also in vram, we expect
1689 	 * these to be in contiguous VRAM memory. Required eviction / restore
1690 	 * during suspend / resume (force restore to same physical address).
1691 	 */
1692 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1693 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1694 		struct ttm_place *place = &(bo->placements[0]);
1695 
1696 		if (mem_type_is_vram(place->mem_type)) {
1697 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1698 
1699 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1700 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1701 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1702 
1703 			spin_lock(&xe->pinned.lock);
1704 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1705 			spin_unlock(&xe->pinned.lock);
1706 		}
1707 	}
1708 
1709 	ttm_bo_pin(&bo->ttm);
1710 
1711 	/*
1712 	 * FIXME: If we always use the reserve / unreserve functions for locking
1713 	 * we do not need this.
1714 	 */
1715 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1716 
1717 	return 0;
1718 }
1719 
1720 /**
1721  * xe_bo_unpin_external - unpin an external BO
1722  * @bo: buffer object to be unpinned
1723  *
1724  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1725  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1726  * asserts and code to ensure evict / restore on suspend / resume.
1727  *
1728  * Returns 0 for success, negative error code otherwise.
1729  */
1730 void xe_bo_unpin_external(struct xe_bo *bo)
1731 {
1732 	struct xe_device *xe = xe_bo_device(bo);
1733 
1734 	xe_assert(xe, !bo->vm);
1735 	xe_assert(xe, xe_bo_is_pinned(bo));
1736 	xe_assert(xe, xe_bo_is_user(bo));
1737 
1738 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1739 		spin_lock(&xe->pinned.lock);
1740 		list_del_init(&bo->pinned_link);
1741 		spin_unlock(&xe->pinned.lock);
1742 	}
1743 
1744 	ttm_bo_unpin(&bo->ttm);
1745 
1746 	/*
1747 	 * FIXME: If we always use the reserve / unreserve functions for locking
1748 	 * we do not need this.
1749 	 */
1750 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1751 }
1752 
1753 void xe_bo_unpin(struct xe_bo *bo)
1754 {
1755 	struct xe_device *xe = xe_bo_device(bo);
1756 
1757 	xe_assert(xe, !bo->ttm.base.import_attach);
1758 	xe_assert(xe, xe_bo_is_pinned(bo));
1759 
1760 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1761 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1762 		struct ttm_place *place = &(bo->placements[0]);
1763 
1764 		if (mem_type_is_vram(place->mem_type)) {
1765 			xe_assert(xe, !list_empty(&bo->pinned_link));
1766 
1767 			spin_lock(&xe->pinned.lock);
1768 			list_del_init(&bo->pinned_link);
1769 			spin_unlock(&xe->pinned.lock);
1770 		}
1771 	}
1772 
1773 	ttm_bo_unpin(&bo->ttm);
1774 }
1775 
1776 /**
1777  * xe_bo_validate() - Make sure the bo is in an allowed placement
1778  * @bo: The bo,
1779  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1780  *      NULL. Used together with @allow_res_evict.
1781  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1782  *                   reservation object.
1783  *
1784  * Make sure the bo is in allowed placement, migrating it if necessary. If
1785  * needed, other bos will be evicted. If bos selected for eviction shares
1786  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1787  * set to true, otherwise they will be bypassed.
1788  *
1789  * Return: 0 on success, negative error code on failure. May return
1790  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1791  */
1792 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1793 {
1794 	struct ttm_operation_ctx ctx = {
1795 		.interruptible = true,
1796 		.no_wait_gpu = false,
1797 	};
1798 
1799 	if (vm) {
1800 		lockdep_assert_held(&vm->lock);
1801 		xe_vm_assert_held(vm);
1802 
1803 		ctx.allow_res_evict = allow_res_evict;
1804 		ctx.resv = xe_vm_resv(vm);
1805 	}
1806 
1807 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1808 }
1809 
1810 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1811 {
1812 	if (bo->destroy == &xe_ttm_bo_destroy)
1813 		return true;
1814 
1815 	return false;
1816 }
1817 
1818 /*
1819  * Resolve a BO address. There is no assert to check if the proper lock is held
1820  * so it should only be used in cases where it is not fatal to get the wrong
1821  * address, such as printing debug information, but not in cases where memory is
1822  * written based on this result.
1823  */
1824 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1825 {
1826 	struct xe_device *xe = xe_bo_device(bo);
1827 	struct xe_res_cursor cur;
1828 	u64 page;
1829 
1830 	xe_assert(xe, page_size <= PAGE_SIZE);
1831 	page = offset >> PAGE_SHIFT;
1832 	offset &= (PAGE_SIZE - 1);
1833 
1834 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1835 		xe_assert(xe, bo->ttm.ttm);
1836 
1837 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1838 				page_size, &cur);
1839 		return xe_res_dma(&cur) + offset;
1840 	} else {
1841 		struct xe_res_cursor cur;
1842 
1843 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1844 			     page_size, &cur);
1845 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1846 	}
1847 }
1848 
1849 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1850 {
1851 	if (!READ_ONCE(bo->ttm.pin_count))
1852 		xe_bo_assert_held(bo);
1853 	return __xe_bo_addr(bo, offset, page_size);
1854 }
1855 
1856 int xe_bo_vmap(struct xe_bo *bo)
1857 {
1858 	void *virtual;
1859 	bool is_iomem;
1860 	int ret;
1861 
1862 	xe_bo_assert_held(bo);
1863 
1864 	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1865 		return -EINVAL;
1866 
1867 	if (!iosys_map_is_null(&bo->vmap))
1868 		return 0;
1869 
1870 	/*
1871 	 * We use this more or less deprecated interface for now since
1872 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1873 	 * single page bos, which is done here.
1874 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1875 	 * to use struct iosys_map.
1876 	 */
1877 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1878 	if (ret)
1879 		return ret;
1880 
1881 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1882 	if (is_iomem)
1883 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1884 	else
1885 		iosys_map_set_vaddr(&bo->vmap, virtual);
1886 
1887 	return 0;
1888 }
1889 
1890 static void __xe_bo_vunmap(struct xe_bo *bo)
1891 {
1892 	if (!iosys_map_is_null(&bo->vmap)) {
1893 		iosys_map_clear(&bo->vmap);
1894 		ttm_bo_kunmap(&bo->kmap);
1895 	}
1896 }
1897 
1898 void xe_bo_vunmap(struct xe_bo *bo)
1899 {
1900 	xe_bo_assert_held(bo);
1901 	__xe_bo_vunmap(bo);
1902 }
1903 
1904 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1905 			struct drm_file *file)
1906 {
1907 	struct xe_device *xe = to_xe_device(dev);
1908 	struct xe_file *xef = to_xe_file(file);
1909 	struct drm_xe_gem_create *args = data;
1910 	struct xe_vm *vm = NULL;
1911 	struct xe_bo *bo;
1912 	unsigned int bo_flags;
1913 	u32 handle;
1914 	int err;
1915 
1916 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1917 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1918 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1919 		return -EINVAL;
1920 
1921 	/* at least one valid memory placement must be specified */
1922 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1923 			 !args->placement))
1924 		return -EINVAL;
1925 
1926 	if (XE_IOCTL_DBG(xe, args->flags &
1927 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1928 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1929 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1930 		return -EINVAL;
1931 
1932 	if (XE_IOCTL_DBG(xe, args->handle))
1933 		return -EINVAL;
1934 
1935 	if (XE_IOCTL_DBG(xe, !args->size))
1936 		return -EINVAL;
1937 
1938 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1939 		return -EINVAL;
1940 
1941 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1942 		return -EINVAL;
1943 
1944 	bo_flags = 0;
1945 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1946 		bo_flags |= XE_BO_DEFER_BACKING;
1947 
1948 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1949 		bo_flags |= XE_BO_SCANOUT_BIT;
1950 
1951 	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1952 
1953 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1954 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1955 			return -EINVAL;
1956 
1957 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1958 	}
1959 
1960 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1961 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1962 		return -EINVAL;
1963 
1964 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
1965 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1966 		return -EINVAL;
1967 
1968 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
1969 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1970 		return -EINVAL;
1971 
1972 	if (args->vm_id) {
1973 		vm = xe_vm_lookup(xef, args->vm_id);
1974 		if (XE_IOCTL_DBG(xe, !vm))
1975 			return -ENOENT;
1976 		err = xe_vm_lock(vm, true);
1977 		if (err)
1978 			goto out_vm;
1979 	}
1980 
1981 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1982 			       ttm_bo_type_device, bo_flags);
1983 
1984 	if (vm)
1985 		xe_vm_unlock(vm);
1986 
1987 	if (IS_ERR(bo)) {
1988 		err = PTR_ERR(bo);
1989 		goto out_vm;
1990 	}
1991 
1992 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1993 	if (err)
1994 		goto out_bulk;
1995 
1996 	args->handle = handle;
1997 	goto out_put;
1998 
1999 out_bulk:
2000 	if (vm && !xe_vm_in_fault_mode(vm)) {
2001 		xe_vm_lock(vm, false);
2002 		__xe_bo_unset_bulk_move(bo);
2003 		xe_vm_unlock(vm);
2004 	}
2005 out_put:
2006 	xe_bo_put(bo);
2007 out_vm:
2008 	if (vm)
2009 		xe_vm_put(vm);
2010 
2011 	return err;
2012 }
2013 
2014 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2015 			     struct drm_file *file)
2016 {
2017 	struct xe_device *xe = to_xe_device(dev);
2018 	struct drm_xe_gem_mmap_offset *args = data;
2019 	struct drm_gem_object *gem_obj;
2020 
2021 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2022 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2023 		return -EINVAL;
2024 
2025 	if (XE_IOCTL_DBG(xe, args->flags))
2026 		return -EINVAL;
2027 
2028 	gem_obj = drm_gem_object_lookup(file, args->handle);
2029 	if (XE_IOCTL_DBG(xe, !gem_obj))
2030 		return -ENOENT;
2031 
2032 	/* The mmap offset was set up at BO allocation time. */
2033 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2034 
2035 	xe_bo_put(gem_to_xe_bo(gem_obj));
2036 	return 0;
2037 }
2038 
2039 /**
2040  * xe_bo_lock() - Lock the buffer object's dma_resv object
2041  * @bo: The struct xe_bo whose lock is to be taken
2042  * @intr: Whether to perform any wait interruptible
2043  *
2044  * Locks the buffer object's dma_resv object. If the buffer object is
2045  * pointing to a shared dma_resv object, that shared lock is locked.
2046  *
2047  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2048  * contended lock was interrupted. If @intr is set to false, the
2049  * function always returns 0.
2050  */
2051 int xe_bo_lock(struct xe_bo *bo, bool intr)
2052 {
2053 	if (intr)
2054 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2055 
2056 	dma_resv_lock(bo->ttm.base.resv, NULL);
2057 
2058 	return 0;
2059 }
2060 
2061 /**
2062  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2063  * @bo: The struct xe_bo whose lock is to be released.
2064  *
2065  * Unlock a buffer object lock that was locked by xe_bo_lock().
2066  */
2067 void xe_bo_unlock(struct xe_bo *bo)
2068 {
2069 	dma_resv_unlock(bo->ttm.base.resv);
2070 }
2071 
2072 /**
2073  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2074  * @bo: The buffer object to migrate
2075  * @mem_type: The TTM memory type intended to migrate to
2076  *
2077  * Check whether the buffer object supports migration to the
2078  * given memory type. Note that pinning may affect the ability to migrate as
2079  * returned by this function.
2080  *
2081  * This function is primarily intended as a helper for checking the
2082  * possibility to migrate buffer objects and can be called without
2083  * the object lock held.
2084  *
2085  * Return: true if migration is possible, false otherwise.
2086  */
2087 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2088 {
2089 	unsigned int cur_place;
2090 
2091 	if (bo->ttm.type == ttm_bo_type_kernel)
2092 		return true;
2093 
2094 	if (bo->ttm.type == ttm_bo_type_sg)
2095 		return false;
2096 
2097 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2098 	     cur_place++) {
2099 		if (bo->placements[cur_place].mem_type == mem_type)
2100 			return true;
2101 	}
2102 
2103 	return false;
2104 }
2105 
2106 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2107 {
2108 	memset(place, 0, sizeof(*place));
2109 	place->mem_type = mem_type;
2110 }
2111 
2112 /**
2113  * xe_bo_migrate - Migrate an object to the desired region id
2114  * @bo: The buffer object to migrate.
2115  * @mem_type: The TTM region type to migrate to.
2116  *
2117  * Attempt to migrate the buffer object to the desired memory region. The
2118  * buffer object may not be pinned, and must be locked.
2119  * On successful completion, the object memory type will be updated,
2120  * but an async migration task may not have completed yet, and to
2121  * accomplish that, the object's kernel fences must be signaled with
2122  * the object lock held.
2123  *
2124  * Return: 0 on success. Negative error code on failure. In particular may
2125  * return -EINTR or -ERESTARTSYS if signal pending.
2126  */
2127 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2128 {
2129 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2130 	struct ttm_operation_ctx ctx = {
2131 		.interruptible = true,
2132 		.no_wait_gpu = false,
2133 	};
2134 	struct ttm_placement placement;
2135 	struct ttm_place requested;
2136 
2137 	xe_bo_assert_held(bo);
2138 
2139 	if (bo->ttm.resource->mem_type == mem_type)
2140 		return 0;
2141 
2142 	if (xe_bo_is_pinned(bo))
2143 		return -EBUSY;
2144 
2145 	if (!xe_bo_can_migrate(bo, mem_type))
2146 		return -EINVAL;
2147 
2148 	xe_place_from_ttm_type(mem_type, &requested);
2149 	placement.num_placement = 1;
2150 	placement.placement = &requested;
2151 
2152 	/*
2153 	 * Stolen needs to be handled like below VRAM handling if we ever need
2154 	 * to support it.
2155 	 */
2156 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2157 
2158 	if (mem_type_is_vram(mem_type)) {
2159 		u32 c = 0;
2160 
2161 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2162 	}
2163 
2164 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2165 }
2166 
2167 /**
2168  * xe_bo_evict - Evict an object to evict placement
2169  * @bo: The buffer object to migrate.
2170  * @force_alloc: Set force_alloc in ttm_operation_ctx
2171  *
2172  * On successful completion, the object memory will be moved to evict
2173  * placement. Ths function blocks until the object has been fully moved.
2174  *
2175  * Return: 0 on success. Negative error code on failure.
2176  */
2177 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2178 {
2179 	struct ttm_operation_ctx ctx = {
2180 		.interruptible = false,
2181 		.no_wait_gpu = false,
2182 		.force_alloc = force_alloc,
2183 	};
2184 	struct ttm_placement placement;
2185 	int ret;
2186 
2187 	xe_evict_flags(&bo->ttm, &placement);
2188 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2189 	if (ret)
2190 		return ret;
2191 
2192 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2193 			      false, MAX_SCHEDULE_TIMEOUT);
2194 
2195 	return 0;
2196 }
2197 
2198 /**
2199  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2200  * placed in system memory.
2201  * @bo: The xe_bo
2202  *
2203  * Return: true if extra pages need to be allocated, false otherwise.
2204  */
2205 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2206 {
2207 	struct xe_device *xe = xe_bo_device(bo);
2208 
2209 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2210 		return false;
2211 
2212 	/* On discrete GPUs, if the GPU can access this buffer from
2213 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2214 	 * can't be used since there's no CCS storage associated with
2215 	 * non-VRAM addresses.
2216 	 */
2217 	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
2218 		return false;
2219 
2220 	return true;
2221 }
2222 
2223 /**
2224  * __xe_bo_release_dummy() - Dummy kref release function
2225  * @kref: The embedded struct kref.
2226  *
2227  * Dummy release function for xe_bo_put_deferred(). Keep off.
2228  */
2229 void __xe_bo_release_dummy(struct kref *kref)
2230 {
2231 }
2232 
2233 /**
2234  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2235  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2236  *
2237  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2238  * The @deferred list can be either an onstack local list or a global
2239  * shared list used by a workqueue.
2240  */
2241 void xe_bo_put_commit(struct llist_head *deferred)
2242 {
2243 	struct llist_node *freed;
2244 	struct xe_bo *bo, *next;
2245 
2246 	if (!deferred)
2247 		return;
2248 
2249 	freed = llist_del_all(deferred);
2250 	if (!freed)
2251 		return;
2252 
2253 	llist_for_each_entry_safe(bo, next, freed, freed)
2254 		drm_gem_object_free(&bo->ttm.base.refcount);
2255 }
2256 
2257 /**
2258  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2259  * @file_priv: ...
2260  * @dev: ...
2261  * @args: ...
2262  *
2263  * See dumb_create() hook in include/drm/drm_drv.h
2264  *
2265  * Return: ...
2266  */
2267 int xe_bo_dumb_create(struct drm_file *file_priv,
2268 		      struct drm_device *dev,
2269 		      struct drm_mode_create_dumb *args)
2270 {
2271 	struct xe_device *xe = to_xe_device(dev);
2272 	struct xe_bo *bo;
2273 	uint32_t handle;
2274 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2275 	int err;
2276 	u32 page_size = max_t(u32, PAGE_SIZE,
2277 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2278 
2279 	args->pitch = ALIGN(args->width * cpp, 64);
2280 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2281 			   page_size);
2282 
2283 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2284 			       DRM_XE_GEM_CPU_CACHING_WC,
2285 			       ttm_bo_type_device,
2286 			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2287 			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2288 			       XE_BO_NEEDS_CPU_ACCESS);
2289 	if (IS_ERR(bo))
2290 		return PTR_ERR(bo);
2291 
2292 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2293 	/* drop reference from allocate - handle holds it now */
2294 	drm_gem_object_put(&bo->ttm.base);
2295 	if (!err)
2296 		args->handle = handle;
2297 	return err;
2298 }
2299 
2300 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2301 {
2302 	struct ttm_buffer_object *tbo = &bo->ttm;
2303 	struct ttm_device *bdev = tbo->bdev;
2304 
2305 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2306 
2307 	list_del_init(&bo->vram_userfault_link);
2308 }
2309 
2310 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2311 #include "tests/xe_bo.c"
2312 #endif
2313