1fb4d8502Sjsg /*
2fb4d8502Sjsg  * Copyright 2016-2018 Advanced Micro Devices, Inc.
3fb4d8502Sjsg  *
4fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
6fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
7fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
10fb4d8502Sjsg  *
11fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
12fb4d8502Sjsg  * all copies or substantial portions of the Software.
13fb4d8502Sjsg  *
14fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21fb4d8502Sjsg  */
22fb4d8502Sjsg 
23fb4d8502Sjsg #include <linux/dma-fence.h>
24fb4d8502Sjsg #include <linux/spinlock.h>
25fb4d8502Sjsg #include <linux/atomic.h>
26fb4d8502Sjsg #include <linux/stacktrace.h>
27fb4d8502Sjsg #include <linux/sched.h>
28fb4d8502Sjsg #include <linux/slab.h>
29fb4d8502Sjsg #include <linux/sched/mm.h>
30fb4d8502Sjsg #include "amdgpu_amdkfd.h"
315ca02815Sjsg #include "kfd_svm.h"
32fb4d8502Sjsg 
33fb4d8502Sjsg static const struct dma_fence_ops amdkfd_fence_ops;
34fb4d8502Sjsg static atomic_t fence_seq = ATOMIC_INIT(0);
35fb4d8502Sjsg 
36fb4d8502Sjsg /* Eviction Fence
37fb4d8502Sjsg  * Fence helper functions to deal with KFD memory eviction.
38fb4d8502Sjsg  * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
39fb4d8502Sjsg  *  evicted unless all the user queues for that process are evicted.
40fb4d8502Sjsg  *
41fb4d8502Sjsg  * All the BOs in a process share an eviction fence. When process X wants
42fb4d8502Sjsg  * to map VRAM memory but TTM can't find enough space, TTM will attempt to
43fb4d8502Sjsg  * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
445ca02815Sjsg  * by calling ttm_device_funcs->eviction_valuable().
45fb4d8502Sjsg  *
465ca02815Sjsg  * ttm_device_funcs->eviction_valuable() - will return false if the BO belongs
47fb4d8502Sjsg  *  to process X. Otherwise, it will return true to indicate BO can be
48fb4d8502Sjsg  *  evicted by TTM.
49fb4d8502Sjsg  *
505ca02815Sjsg  * If ttm_device_funcs->eviction_valuable returns true, then TTM will continue
51fb4d8502Sjsg  * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
52fb4d8502Sjsg  * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
53fb4d8502Sjsg  *
54fb4d8502Sjsg  * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
55fb4d8502Sjsg  *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
56fb4d8502Sjsg  *  --> amdgpu_amdkfd_fence.enable_signaling
57fb4d8502Sjsg  *
58fb4d8502Sjsg  * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
59fb4d8502Sjsg  * user queues and signal fence. The work item will also start another delayed
60fb4d8502Sjsg  * work item to restore BOs
61fb4d8502Sjsg  */
62fb4d8502Sjsg 
amdgpu_amdkfd_fence_create(u64 context,struct mm_struct * mm,struct svm_range_bo * svm_bo)63fb4d8502Sjsg struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
645ca02815Sjsg 				struct mm_struct *mm,
655ca02815Sjsg 				struct svm_range_bo *svm_bo)
66fb4d8502Sjsg {
67fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence;
68fb4d8502Sjsg 
69fb4d8502Sjsg 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
70fb4d8502Sjsg 	if (fence == NULL)
71fb4d8502Sjsg 		return NULL;
72fb4d8502Sjsg 
73fb4d8502Sjsg 	/* This reference gets released in amdkfd_fence_release */
74fb4d8502Sjsg 	mmgrab(mm);
75fb4d8502Sjsg 	fence->mm = mm;
76fb4d8502Sjsg 	get_task_comm(fence->timeline_name, current);
77fb4d8502Sjsg 	mtx_init(&fence->lock, IPL_TTY);
785ca02815Sjsg 	fence->svm_bo = svm_bo;
79fb4d8502Sjsg 	dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
80fb4d8502Sjsg 		   context, atomic_inc_return(&fence_seq));
81fb4d8502Sjsg 
82fb4d8502Sjsg 	return fence;
83fb4d8502Sjsg }
84fb4d8502Sjsg 
to_amdgpu_amdkfd_fence(struct dma_fence * f)85fb4d8502Sjsg struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
86fb4d8502Sjsg {
87fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence;
88fb4d8502Sjsg 
89fb4d8502Sjsg 	if (!f)
90fb4d8502Sjsg 		return NULL;
91fb4d8502Sjsg 
92fb4d8502Sjsg 	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
93*e82a142aSjsg 	if (f->ops == &amdkfd_fence_ops)
94fb4d8502Sjsg 		return fence;
95fb4d8502Sjsg 
96fb4d8502Sjsg 	return NULL;
97fb4d8502Sjsg }
98fb4d8502Sjsg 
amdkfd_fence_get_driver_name(struct dma_fence * f)99fb4d8502Sjsg static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
100fb4d8502Sjsg {
101fb4d8502Sjsg 	return "amdgpu_amdkfd_fence";
102fb4d8502Sjsg }
103fb4d8502Sjsg 
amdkfd_fence_get_timeline_name(struct dma_fence * f)104fb4d8502Sjsg static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
105fb4d8502Sjsg {
106fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
107fb4d8502Sjsg 
108fb4d8502Sjsg 	return fence->timeline_name;
109fb4d8502Sjsg }
110fb4d8502Sjsg 
111fb4d8502Sjsg /**
112fb4d8502Sjsg  * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
113fb4d8502Sjsg  *  a KFD BO and schedules a job to move the BO.
114fb4d8502Sjsg  *  If fence is already signaled return true.
115fb4d8502Sjsg  *  If fence is not signaled schedule a evict KFD process work item.
1165ca02815Sjsg  *
1175ca02815Sjsg  *  @f: dma_fence
118fb4d8502Sjsg  */
amdkfd_fence_enable_signaling(struct dma_fence * f)119fb4d8502Sjsg static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
120fb4d8502Sjsg {
121fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
122fb4d8502Sjsg 
123fb4d8502Sjsg 	if (!fence)
124fb4d8502Sjsg 		return false;
125fb4d8502Sjsg 
126fb4d8502Sjsg 	if (dma_fence_is_signaled(f))
127fb4d8502Sjsg 		return true;
128fb4d8502Sjsg 
1295ca02815Sjsg 	if (!fence->svm_bo) {
130c349dbc7Sjsg 		if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
131fb4d8502Sjsg 			return true;
1325ca02815Sjsg 	} else {
1335ca02815Sjsg 		if (!svm_range_schedule_evict_svm_bo(fence))
1345ca02815Sjsg 			return true;
1355ca02815Sjsg 	}
136fb4d8502Sjsg 	return false;
137fb4d8502Sjsg }
138fb4d8502Sjsg 
139fb4d8502Sjsg /**
140fb4d8502Sjsg  * amdkfd_fence_release - callback that fence can be freed
141fb4d8502Sjsg  *
1425ca02815Sjsg  * @f: dma_fence
143fb4d8502Sjsg  *
144fb4d8502Sjsg  * This function is called when the reference count becomes zero.
145fb4d8502Sjsg  * Drops the mm_struct reference and RCU schedules freeing up the fence.
146fb4d8502Sjsg  */
amdkfd_fence_release(struct dma_fence * f)147fb4d8502Sjsg static void amdkfd_fence_release(struct dma_fence *f)
148fb4d8502Sjsg {
149fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
150fb4d8502Sjsg 
151fb4d8502Sjsg 	/* Unconditionally signal the fence. The process is getting
152fb4d8502Sjsg 	 * terminated.
153fb4d8502Sjsg 	 */
154fb4d8502Sjsg 	if (WARN_ON(!fence))
155fb4d8502Sjsg 		return; /* Not an amdgpu_amdkfd_fence */
156fb4d8502Sjsg 
157fb4d8502Sjsg 	mmdrop(fence->mm);
158fb4d8502Sjsg 	kfree_rcu(f, rcu);
159fb4d8502Sjsg }
160fb4d8502Sjsg 
161fb4d8502Sjsg /**
1621bb76ff1Sjsg  * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
163fb4d8502Sjsg  *
164fb4d8502Sjsg  * @f: [IN] fence
165fb4d8502Sjsg  * @mm: [IN] mm that needs to be verified
1661bb76ff1Sjsg  *
1671bb76ff1Sjsg  * Check if @mm is same as that of the fence @f, if same return TRUE else
1681bb76ff1Sjsg  * return FALSE.
1691bb76ff1Sjsg  * For svm bo, which support vram overcommitment, always return FALSE.
170fb4d8502Sjsg  */
amdkfd_fence_check_mm(struct dma_fence * f,struct mm_struct * mm)171fb4d8502Sjsg bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
172fb4d8502Sjsg {
173fb4d8502Sjsg 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
174fb4d8502Sjsg 
175fb4d8502Sjsg 	if (!fence)
176fb4d8502Sjsg 		return false;
1771bb76ff1Sjsg 	else if (fence->mm == mm  && !fence->svm_bo)
178fb4d8502Sjsg 		return true;
179fb4d8502Sjsg 
180fb4d8502Sjsg 	return false;
181fb4d8502Sjsg }
182fb4d8502Sjsg 
183fb4d8502Sjsg static const struct dma_fence_ops amdkfd_fence_ops = {
184fb4d8502Sjsg 	.get_driver_name = amdkfd_fence_get_driver_name,
185fb4d8502Sjsg 	.get_timeline_name = amdkfd_fence_get_timeline_name,
186fb4d8502Sjsg 	.enable_signaling = amdkfd_fence_enable_signaling,
187fb4d8502Sjsg 	.release = amdkfd_fence_release,
188fb4d8502Sjsg };
189