1 /*	$NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $	*/
2 
3 /*
4  * Copyright 2014 Advanced Micro Devices, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * The above copyright notice and this permission notice (including the
24  * next paragraph) shall be included in all copies or substantial portions
25  * of the Software.
26  *
27  */
28 /*
29  * Authors:
30  *    Christian König <christian.koenig@amd.com>
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $");
35 
36 #include "amdgpu.h"
37 #include "amdgpu_trace.h"
38 #include "amdgpu_amdkfd.h"
39 
40 struct amdgpu_sync_entry {
41 	struct hlist_node	node;
42 	struct dma_fence	*fence;
43 	bool	explicit;
44 };
45 
46 static struct kmem_cache *amdgpu_sync_slab;
47 
48 /**
49  * amdgpu_sync_create - zero init sync object
50  *
51  * @sync: sync object to initialize
52  *
53  * Just clear the sync object for now.
54  */
amdgpu_sync_create(struct amdgpu_sync * sync)55 void amdgpu_sync_create(struct amdgpu_sync *sync)
56 {
57 	hash_init(sync->fences);
58 	sync->last_vm_update = NULL;
59 }
60 
61 /**
62  * amdgpu_sync_same_dev - test if fence belong to us
63  *
64  * @adev: amdgpu device to use for the test
65  * @f: fence to test
66  *
67  * Test if the fence was issued by us.
68  */
amdgpu_sync_same_dev(struct amdgpu_device * adev,struct dma_fence * f)69 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
70 				 struct dma_fence *f)
71 {
72 	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
73 
74 	if (s_fence) {
75 		struct amdgpu_ring *ring;
76 
77 		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
78 		return ring->adev == adev;
79 	}
80 
81 	return false;
82 }
83 
84 /**
85  * amdgpu_sync_get_owner - extract the owner of a fence
86  *
87  * @fence: fence get the owner from
88  *
89  * Extract who originally created the fence.
90  */
amdgpu_sync_get_owner(struct dma_fence * f)91 static void *amdgpu_sync_get_owner(struct dma_fence *f)
92 {
93 	struct drm_sched_fence *s_fence;
94 	struct amdgpu_amdkfd_fence *kfd_fence;
95 
96 	if (!f)
97 		return AMDGPU_FENCE_OWNER_UNDEFINED;
98 
99 	s_fence = to_drm_sched_fence(f);
100 	if (s_fence)
101 		return s_fence->owner;
102 
103 	kfd_fence = to_amdgpu_amdkfd_fence(f);
104 	if (kfd_fence)
105 		return AMDGPU_FENCE_OWNER_KFD;
106 
107 	return AMDGPU_FENCE_OWNER_UNDEFINED;
108 }
109 
110 /**
111  * amdgpu_sync_keep_later - Keep the later fence
112  *
113  * @keep: existing fence to test
114  * @fence: new fence
115  *
116  * Either keep the existing fence or the new one, depending which one is later.
117  */
amdgpu_sync_keep_later(struct dma_fence ** keep,struct dma_fence * fence)118 static void amdgpu_sync_keep_later(struct dma_fence **keep,
119 				   struct dma_fence *fence)
120 {
121 	if (*keep && dma_fence_is_later(*keep, fence))
122 		return;
123 
124 	dma_fence_put(*keep);
125 	*keep = dma_fence_get(fence);
126 }
127 
128 /**
129  * amdgpu_sync_add_later - add the fence to the hash
130  *
131  * @sync: sync object to add the fence to
132  * @f: fence to add
133  *
134  * Tries to add the fence to an existing hash entry. Returns true when an entry
135  * was found, false otherwise.
136  */
amdgpu_sync_add_later(struct amdgpu_sync * sync,struct dma_fence * f,bool explicit)137 static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
138 				  bool explicit)
139 {
140 	struct amdgpu_sync_entry *e;
141 
142 	hash_for_each_possible(sync->fences, e, node, f->context) {
143 		if (unlikely(e->fence->context != f->context))
144 			continue;
145 
146 		amdgpu_sync_keep_later(&e->fence, f);
147 
148 		/* Preserve eplicit flag to not loose pipe line sync */
149 		e->explicit |= explicit;
150 
151 		return true;
152 	}
153 	return false;
154 }
155 
156 /**
157  * amdgpu_sync_fence - remember to sync to this fence
158  *
159  * @sync: sync object to add fence to
160  * @f: fence to sync to
161  * @explicit: if this is an explicit dependency
162  *
163  * Add the fence to the sync object.
164  */
amdgpu_sync_fence(struct amdgpu_sync * sync,struct dma_fence * f,bool explicit)165 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
166 		      bool explicit)
167 {
168 	struct amdgpu_sync_entry *e;
169 
170 	if (!f)
171 		return 0;
172 
173 	if (amdgpu_sync_add_later(sync, f, explicit))
174 		return 0;
175 
176 	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
177 	if (!e)
178 		return -ENOMEM;
179 
180 	e->explicit = explicit;
181 
182 	hash_add(sync->fences, &e->node, f->context);
183 	e->fence = dma_fence_get(f);
184 	return 0;
185 }
186 
187 /**
188  * amdgpu_sync_vm_fence - remember to sync to this VM fence
189  *
190  * @adev: amdgpu device
191  * @sync: sync object to add fence to
192  * @fence: the VM fence to add
193  *
194  * Add the fence to the sync object and remember it as VM update.
195  */
amdgpu_sync_vm_fence(struct amdgpu_sync * sync,struct dma_fence * fence)196 int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
197 {
198 	if (!fence)
199 		return 0;
200 
201 	amdgpu_sync_keep_later(&sync->last_vm_update, fence);
202 	return amdgpu_sync_fence(sync, fence, false);
203 }
204 
205 /**
206  * amdgpu_sync_resv - sync to a reservation object
207  *
208  * @sync: sync object to add fences from reservation object to
209  * @resv: reservation object with embedded fence
210  * @explicit_sync: true if we should only sync to the exclusive fence
211  *
212  * Sync to the fence
213  */
amdgpu_sync_resv(struct amdgpu_device * adev,struct amdgpu_sync * sync,struct dma_resv * resv,void * owner,bool explicit_sync)214 int amdgpu_sync_resv(struct amdgpu_device *adev,
215 		     struct amdgpu_sync *sync,
216 		     struct dma_resv *resv,
217 		     void *owner, bool explicit_sync)
218 {
219 	struct dma_resv_list *flist;
220 	struct dma_fence *f;
221 	void *fence_owner;
222 	unsigned i;
223 	int r = 0;
224 
225 	if (resv == NULL)
226 		return -EINVAL;
227 
228 	/* always sync to the exclusive fence */
229 	f = dma_resv_get_excl(resv);
230 	r = amdgpu_sync_fence(sync, f, false);
231 
232 	flist = dma_resv_get_list(resv);
233 	if (!flist || r)
234 		return r;
235 
236 	for (i = 0; i < flist->shared_count; ++i) {
237 		f = rcu_dereference_protected(flist->shared[i],
238 					      dma_resv_held(resv));
239 		/* We only want to trigger KFD eviction fences on
240 		 * evict or move jobs. Skip KFD fences otherwise.
241 		 */
242 		fence_owner = amdgpu_sync_get_owner(f);
243 		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
244 		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
245 			continue;
246 
247 		if (amdgpu_sync_same_dev(adev, f)) {
248 			/* VM updates only sync with moves but not with user
249 			 * command submissions or KFD evictions fences
250 			 */
251 			if (owner == AMDGPU_FENCE_OWNER_VM &&
252 			    fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
253 				continue;
254 
255 			/* Ignore fence from the same owner and explicit one as
256 			 * long as it isn't undefined.
257 			 */
258 			if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
259 			    (fence_owner == owner || explicit_sync))
260 				continue;
261 		}
262 
263 		r = amdgpu_sync_fence(sync, f, false);
264 		if (r)
265 			break;
266 	}
267 	return r;
268 }
269 
270 /**
271  * amdgpu_sync_peek_fence - get the next fence not signaled yet
272  *
273  * @sync: the sync object
274  * @ring: optional ring to use for test
275  *
276  * Returns the next fence not signaled yet without removing it from the sync
277  * object.
278  */
amdgpu_sync_peek_fence(struct amdgpu_sync * sync,struct amdgpu_ring * ring)279 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
280 					 struct amdgpu_ring *ring)
281 {
282 	struct amdgpu_sync_entry *e;
283 	struct hlist_node *tmp;
284 	int i;
285 
286 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
287 		struct dma_fence *f = e->fence;
288 		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
289 
290 		if (dma_fence_is_signaled(f)) {
291 			hash_del(&e->node);
292 			dma_fence_put(f);
293 			kmem_cache_free(amdgpu_sync_slab, e);
294 			continue;
295 		}
296 		if (ring && s_fence) {
297 			/* For fences from the same ring it is sufficient
298 			 * when they are scheduled.
299 			 */
300 			if (s_fence->sched == &ring->sched) {
301 				if (dma_fence_is_signaled(&s_fence->scheduled))
302 					continue;
303 
304 				return &s_fence->scheduled;
305 			}
306 		}
307 
308 		return f;
309 	}
310 
311 	return NULL;
312 }
313 
314 /**
315  * amdgpu_sync_get_fence - get the next fence from the sync object
316  *
317  * @sync: sync object to use
318  * @explicit: true if the next fence is explicit
319  *
320  * Get and removes the next fence from the sync object not signaled yet.
321  */
amdgpu_sync_get_fence(struct amdgpu_sync * sync,bool * explicit)322 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
323 {
324 	struct amdgpu_sync_entry *e;
325 	struct hlist_node *tmp;
326 	struct dma_fence *f;
327 	int i;
328 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
329 
330 		f = e->fence;
331 		if (explicit)
332 			*explicit = e->explicit;
333 
334 		hash_del(&e->node);
335 		kmem_cache_free(amdgpu_sync_slab, e);
336 
337 		if (!dma_fence_is_signaled(f))
338 			return f;
339 
340 		dma_fence_put(f);
341 	}
342 	return NULL;
343 }
344 
345 /**
346  * amdgpu_sync_clone - clone a sync object
347  *
348  * @source: sync object to clone
349  * @clone: pointer to destination sync object
350  *
351  * Adds references to all unsignaled fences in @source to @clone. Also
352  * removes signaled fences from @source while at it.
353  */
amdgpu_sync_clone(struct amdgpu_sync * source,struct amdgpu_sync * clone)354 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
355 {
356 	struct amdgpu_sync_entry *e;
357 	struct hlist_node *tmp;
358 	struct dma_fence *f;
359 	int i, r;
360 
361 	hash_for_each_safe(source->fences, i, tmp, e, node) {
362 		f = e->fence;
363 		if (!dma_fence_is_signaled(f)) {
364 			r = amdgpu_sync_fence(clone, f, e->explicit);
365 			if (r)
366 				return r;
367 		} else {
368 			hash_del(&e->node);
369 			dma_fence_put(f);
370 			kmem_cache_free(amdgpu_sync_slab, e);
371 		}
372 	}
373 
374 	dma_fence_put(clone->last_vm_update);
375 	clone->last_vm_update = dma_fence_get(source->last_vm_update);
376 
377 	return 0;
378 }
379 
amdgpu_sync_wait(struct amdgpu_sync * sync,bool intr)380 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
381 {
382 	struct amdgpu_sync_entry *e;
383 	struct hlist_node *tmp;
384 	int i, r;
385 
386 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
387 		r = dma_fence_wait(e->fence, intr);
388 		if (r)
389 			return r;
390 
391 		hash_del(&e->node);
392 		dma_fence_put(e->fence);
393 		kmem_cache_free(amdgpu_sync_slab, e);
394 	}
395 
396 	return 0;
397 }
398 
399 /**
400  * amdgpu_sync_free - free the sync object
401  *
402  * @sync: sync object to use
403  *
404  * Free the sync object.
405  */
amdgpu_sync_free(struct amdgpu_sync * sync)406 void amdgpu_sync_free(struct amdgpu_sync *sync)
407 {
408 	struct amdgpu_sync_entry *e;
409 	struct hlist_node *tmp;
410 	unsigned i;
411 
412 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
413 		hash_del(&e->node);
414 		dma_fence_put(e->fence);
415 		kmem_cache_free(amdgpu_sync_slab, e);
416 	}
417 
418 	dma_fence_put(sync->last_vm_update);
419 }
420 
421 /**
422  * amdgpu_sync_init - init sync object subsystem
423  *
424  * Allocate the slab allocator.
425  */
amdgpu_sync_init(void)426 int amdgpu_sync_init(void)
427 {
428 	amdgpu_sync_slab = kmem_cache_create(
429 		"amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
430 		SLAB_HWCACHE_ALIGN, NULL);
431 	if (!amdgpu_sync_slab)
432 		return -ENOMEM;
433 
434 	return 0;
435 }
436 
437 /**
438  * amdgpu_sync_fini - fini sync object subsystem
439  *
440  * Free the slab allocator.
441  */
amdgpu_sync_fini(void)442 void amdgpu_sync_fini(void)
443 {
444 	kmem_cache_destroy(amdgpu_sync_slab);
445 }
446