1 /* $NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $ */
2
3 /*
4 * Copyright 2014 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28 /*
29 * Authors:
30 * Christian König <christian.koenig@amd.com>
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: amdgpu_sync.c,v 1.3 2021/12/18 23:44:58 riastradh Exp $");
35
36 #include "amdgpu.h"
37 #include "amdgpu_trace.h"
38 #include "amdgpu_amdkfd.h"
39
40 struct amdgpu_sync_entry {
41 struct hlist_node node;
42 struct dma_fence *fence;
43 bool explicit;
44 };
45
46 static struct kmem_cache *amdgpu_sync_slab;
47
48 /**
49 * amdgpu_sync_create - zero init sync object
50 *
51 * @sync: sync object to initialize
52 *
53 * Just clear the sync object for now.
54 */
amdgpu_sync_create(struct amdgpu_sync * sync)55 void amdgpu_sync_create(struct amdgpu_sync *sync)
56 {
57 hash_init(sync->fences);
58 sync->last_vm_update = NULL;
59 }
60
61 /**
62 * amdgpu_sync_same_dev - test if fence belong to us
63 *
64 * @adev: amdgpu device to use for the test
65 * @f: fence to test
66 *
67 * Test if the fence was issued by us.
68 */
amdgpu_sync_same_dev(struct amdgpu_device * adev,struct dma_fence * f)69 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
70 struct dma_fence *f)
71 {
72 struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
73
74 if (s_fence) {
75 struct amdgpu_ring *ring;
76
77 ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
78 return ring->adev == adev;
79 }
80
81 return false;
82 }
83
84 /**
85 * amdgpu_sync_get_owner - extract the owner of a fence
86 *
87 * @fence: fence get the owner from
88 *
89 * Extract who originally created the fence.
90 */
amdgpu_sync_get_owner(struct dma_fence * f)91 static void *amdgpu_sync_get_owner(struct dma_fence *f)
92 {
93 struct drm_sched_fence *s_fence;
94 struct amdgpu_amdkfd_fence *kfd_fence;
95
96 if (!f)
97 return AMDGPU_FENCE_OWNER_UNDEFINED;
98
99 s_fence = to_drm_sched_fence(f);
100 if (s_fence)
101 return s_fence->owner;
102
103 kfd_fence = to_amdgpu_amdkfd_fence(f);
104 if (kfd_fence)
105 return AMDGPU_FENCE_OWNER_KFD;
106
107 return AMDGPU_FENCE_OWNER_UNDEFINED;
108 }
109
110 /**
111 * amdgpu_sync_keep_later - Keep the later fence
112 *
113 * @keep: existing fence to test
114 * @fence: new fence
115 *
116 * Either keep the existing fence or the new one, depending which one is later.
117 */
amdgpu_sync_keep_later(struct dma_fence ** keep,struct dma_fence * fence)118 static void amdgpu_sync_keep_later(struct dma_fence **keep,
119 struct dma_fence *fence)
120 {
121 if (*keep && dma_fence_is_later(*keep, fence))
122 return;
123
124 dma_fence_put(*keep);
125 *keep = dma_fence_get(fence);
126 }
127
128 /**
129 * amdgpu_sync_add_later - add the fence to the hash
130 *
131 * @sync: sync object to add the fence to
132 * @f: fence to add
133 *
134 * Tries to add the fence to an existing hash entry. Returns true when an entry
135 * was found, false otherwise.
136 */
amdgpu_sync_add_later(struct amdgpu_sync * sync,struct dma_fence * f,bool explicit)137 static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
138 bool explicit)
139 {
140 struct amdgpu_sync_entry *e;
141
142 hash_for_each_possible(sync->fences, e, node, f->context) {
143 if (unlikely(e->fence->context != f->context))
144 continue;
145
146 amdgpu_sync_keep_later(&e->fence, f);
147
148 /* Preserve eplicit flag to not loose pipe line sync */
149 e->explicit |= explicit;
150
151 return true;
152 }
153 return false;
154 }
155
156 /**
157 * amdgpu_sync_fence - remember to sync to this fence
158 *
159 * @sync: sync object to add fence to
160 * @f: fence to sync to
161 * @explicit: if this is an explicit dependency
162 *
163 * Add the fence to the sync object.
164 */
amdgpu_sync_fence(struct amdgpu_sync * sync,struct dma_fence * f,bool explicit)165 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
166 bool explicit)
167 {
168 struct amdgpu_sync_entry *e;
169
170 if (!f)
171 return 0;
172
173 if (amdgpu_sync_add_later(sync, f, explicit))
174 return 0;
175
176 e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
177 if (!e)
178 return -ENOMEM;
179
180 e->explicit = explicit;
181
182 hash_add(sync->fences, &e->node, f->context);
183 e->fence = dma_fence_get(f);
184 return 0;
185 }
186
187 /**
188 * amdgpu_sync_vm_fence - remember to sync to this VM fence
189 *
190 * @adev: amdgpu device
191 * @sync: sync object to add fence to
192 * @fence: the VM fence to add
193 *
194 * Add the fence to the sync object and remember it as VM update.
195 */
amdgpu_sync_vm_fence(struct amdgpu_sync * sync,struct dma_fence * fence)196 int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
197 {
198 if (!fence)
199 return 0;
200
201 amdgpu_sync_keep_later(&sync->last_vm_update, fence);
202 return amdgpu_sync_fence(sync, fence, false);
203 }
204
205 /**
206 * amdgpu_sync_resv - sync to a reservation object
207 *
208 * @sync: sync object to add fences from reservation object to
209 * @resv: reservation object with embedded fence
210 * @explicit_sync: true if we should only sync to the exclusive fence
211 *
212 * Sync to the fence
213 */
amdgpu_sync_resv(struct amdgpu_device * adev,struct amdgpu_sync * sync,struct dma_resv * resv,void * owner,bool explicit_sync)214 int amdgpu_sync_resv(struct amdgpu_device *adev,
215 struct amdgpu_sync *sync,
216 struct dma_resv *resv,
217 void *owner, bool explicit_sync)
218 {
219 struct dma_resv_list *flist;
220 struct dma_fence *f;
221 void *fence_owner;
222 unsigned i;
223 int r = 0;
224
225 if (resv == NULL)
226 return -EINVAL;
227
228 /* always sync to the exclusive fence */
229 f = dma_resv_get_excl(resv);
230 r = amdgpu_sync_fence(sync, f, false);
231
232 flist = dma_resv_get_list(resv);
233 if (!flist || r)
234 return r;
235
236 for (i = 0; i < flist->shared_count; ++i) {
237 f = rcu_dereference_protected(flist->shared[i],
238 dma_resv_held(resv));
239 /* We only want to trigger KFD eviction fences on
240 * evict or move jobs. Skip KFD fences otherwise.
241 */
242 fence_owner = amdgpu_sync_get_owner(f);
243 if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
244 owner != AMDGPU_FENCE_OWNER_UNDEFINED)
245 continue;
246
247 if (amdgpu_sync_same_dev(adev, f)) {
248 /* VM updates only sync with moves but not with user
249 * command submissions or KFD evictions fences
250 */
251 if (owner == AMDGPU_FENCE_OWNER_VM &&
252 fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)
253 continue;
254
255 /* Ignore fence from the same owner and explicit one as
256 * long as it isn't undefined.
257 */
258 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
259 (fence_owner == owner || explicit_sync))
260 continue;
261 }
262
263 r = amdgpu_sync_fence(sync, f, false);
264 if (r)
265 break;
266 }
267 return r;
268 }
269
270 /**
271 * amdgpu_sync_peek_fence - get the next fence not signaled yet
272 *
273 * @sync: the sync object
274 * @ring: optional ring to use for test
275 *
276 * Returns the next fence not signaled yet without removing it from the sync
277 * object.
278 */
amdgpu_sync_peek_fence(struct amdgpu_sync * sync,struct amdgpu_ring * ring)279 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
280 struct amdgpu_ring *ring)
281 {
282 struct amdgpu_sync_entry *e;
283 struct hlist_node *tmp;
284 int i;
285
286 hash_for_each_safe(sync->fences, i, tmp, e, node) {
287 struct dma_fence *f = e->fence;
288 struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
289
290 if (dma_fence_is_signaled(f)) {
291 hash_del(&e->node);
292 dma_fence_put(f);
293 kmem_cache_free(amdgpu_sync_slab, e);
294 continue;
295 }
296 if (ring && s_fence) {
297 /* For fences from the same ring it is sufficient
298 * when they are scheduled.
299 */
300 if (s_fence->sched == &ring->sched) {
301 if (dma_fence_is_signaled(&s_fence->scheduled))
302 continue;
303
304 return &s_fence->scheduled;
305 }
306 }
307
308 return f;
309 }
310
311 return NULL;
312 }
313
314 /**
315 * amdgpu_sync_get_fence - get the next fence from the sync object
316 *
317 * @sync: sync object to use
318 * @explicit: true if the next fence is explicit
319 *
320 * Get and removes the next fence from the sync object not signaled yet.
321 */
amdgpu_sync_get_fence(struct amdgpu_sync * sync,bool * explicit)322 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit)
323 {
324 struct amdgpu_sync_entry *e;
325 struct hlist_node *tmp;
326 struct dma_fence *f;
327 int i;
328 hash_for_each_safe(sync->fences, i, tmp, e, node) {
329
330 f = e->fence;
331 if (explicit)
332 *explicit = e->explicit;
333
334 hash_del(&e->node);
335 kmem_cache_free(amdgpu_sync_slab, e);
336
337 if (!dma_fence_is_signaled(f))
338 return f;
339
340 dma_fence_put(f);
341 }
342 return NULL;
343 }
344
345 /**
346 * amdgpu_sync_clone - clone a sync object
347 *
348 * @source: sync object to clone
349 * @clone: pointer to destination sync object
350 *
351 * Adds references to all unsignaled fences in @source to @clone. Also
352 * removes signaled fences from @source while at it.
353 */
amdgpu_sync_clone(struct amdgpu_sync * source,struct amdgpu_sync * clone)354 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
355 {
356 struct amdgpu_sync_entry *e;
357 struct hlist_node *tmp;
358 struct dma_fence *f;
359 int i, r;
360
361 hash_for_each_safe(source->fences, i, tmp, e, node) {
362 f = e->fence;
363 if (!dma_fence_is_signaled(f)) {
364 r = amdgpu_sync_fence(clone, f, e->explicit);
365 if (r)
366 return r;
367 } else {
368 hash_del(&e->node);
369 dma_fence_put(f);
370 kmem_cache_free(amdgpu_sync_slab, e);
371 }
372 }
373
374 dma_fence_put(clone->last_vm_update);
375 clone->last_vm_update = dma_fence_get(source->last_vm_update);
376
377 return 0;
378 }
379
amdgpu_sync_wait(struct amdgpu_sync * sync,bool intr)380 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
381 {
382 struct amdgpu_sync_entry *e;
383 struct hlist_node *tmp;
384 int i, r;
385
386 hash_for_each_safe(sync->fences, i, tmp, e, node) {
387 r = dma_fence_wait(e->fence, intr);
388 if (r)
389 return r;
390
391 hash_del(&e->node);
392 dma_fence_put(e->fence);
393 kmem_cache_free(amdgpu_sync_slab, e);
394 }
395
396 return 0;
397 }
398
399 /**
400 * amdgpu_sync_free - free the sync object
401 *
402 * @sync: sync object to use
403 *
404 * Free the sync object.
405 */
amdgpu_sync_free(struct amdgpu_sync * sync)406 void amdgpu_sync_free(struct amdgpu_sync *sync)
407 {
408 struct amdgpu_sync_entry *e;
409 struct hlist_node *tmp;
410 unsigned i;
411
412 hash_for_each_safe(sync->fences, i, tmp, e, node) {
413 hash_del(&e->node);
414 dma_fence_put(e->fence);
415 kmem_cache_free(amdgpu_sync_slab, e);
416 }
417
418 dma_fence_put(sync->last_vm_update);
419 }
420
421 /**
422 * amdgpu_sync_init - init sync object subsystem
423 *
424 * Allocate the slab allocator.
425 */
amdgpu_sync_init(void)426 int amdgpu_sync_init(void)
427 {
428 amdgpu_sync_slab = kmem_cache_create(
429 "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
430 SLAB_HWCACHE_ALIGN, NULL);
431 if (!amdgpu_sync_slab)
432 return -ENOMEM;
433
434 return 0;
435 }
436
437 /**
438 * amdgpu_sync_fini - fini sync object subsystem
439 *
440 * Free the slab allocator.
441 */
amdgpu_sync_fini(void)442 void amdgpu_sync_fini(void)
443 {
444 kmem_cache_destroy(amdgpu_sync_slab);
445 }
446