xref: /openbsd/sys/dev/pci/drm/i915/gt/intel_context.c (revision a35b0b99)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "gem/i915_gem_context.h"
7 #include "gem/i915_gem_pm.h"
8 
9 #include "i915_drv.h"
10 #include "i915_trace.h"
11 
12 #include "intel_context.h"
13 #include "intel_engine.h"
14 #include "intel_engine_pm.h"
15 #include "intel_ring.h"
16 
17 static struct pool slab_ce;
18 
intel_context_alloc(void)19 static struct intel_context *intel_context_alloc(void)
20 {
21 #ifdef __linux__
22 	return kmem_cache_zalloc(slab_ce, GFP_KERNEL);
23 #else
24 	return pool_get(&slab_ce, PR_WAITOK | PR_ZERO);
25 #endif
26 }
27 
rcu_context_free(struct rcu_head * rcu)28 static void rcu_context_free(struct rcu_head *rcu)
29 {
30 	struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
31 
32 	trace_intel_context_free(ce);
33 #ifdef __linux__
34 	kmem_cache_free(slab_ce, ce);
35 #else
36 	pool_put(&slab_ce, ce);
37 #endif
38 }
39 
intel_context_free(struct intel_context * ce)40 void intel_context_free(struct intel_context *ce)
41 {
42 	call_rcu(&ce->rcu, rcu_context_free);
43 }
44 
45 struct intel_context *
intel_context_create(struct intel_engine_cs * engine)46 intel_context_create(struct intel_engine_cs *engine)
47 {
48 	struct intel_context *ce;
49 
50 	ce = intel_context_alloc();
51 	if (!ce)
52 		return ERR_PTR(-ENOMEM);
53 
54 	intel_context_init(ce, engine);
55 	trace_intel_context_create(ce);
56 	return ce;
57 }
58 
intel_context_alloc_state(struct intel_context * ce)59 int intel_context_alloc_state(struct intel_context *ce)
60 {
61 	int err = 0;
62 
63 	if (mutex_lock_interruptible(&ce->pin_mutex))
64 		return -EINTR;
65 
66 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
67 		if (intel_context_is_banned(ce)) {
68 			err = -EIO;
69 			goto unlock;
70 		}
71 
72 		err = ce->ops->alloc(ce);
73 		if (unlikely(err))
74 			goto unlock;
75 
76 		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
77 	}
78 
79 unlock:
80 	mutex_unlock(&ce->pin_mutex);
81 	return err;
82 }
83 
intel_context_active_acquire(struct intel_context * ce)84 static int intel_context_active_acquire(struct intel_context *ce)
85 {
86 	int err;
87 
88 	__i915_active_acquire(&ce->active);
89 
90 	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
91 	    intel_context_is_parallel(ce))
92 		return 0;
93 
94 	/* Preallocate tracking nodes */
95 	err = i915_active_acquire_preallocate_barrier(&ce->active,
96 						      ce->engine);
97 	if (err)
98 		i915_active_release(&ce->active);
99 
100 	return err;
101 }
102 
intel_context_active_release(struct intel_context * ce)103 static void intel_context_active_release(struct intel_context *ce)
104 {
105 	/* Nodes preallocated in intel_context_active() */
106 	i915_active_acquire_barrier(&ce->active);
107 	i915_active_release(&ce->active);
108 }
109 
__context_pin_state(struct i915_vma * vma,struct i915_gem_ww_ctx * ww)110 static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
111 {
112 	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
113 	int err;
114 
115 	err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
116 	if (err)
117 		return err;
118 
119 	err = i915_active_acquire(&vma->active);
120 	if (err)
121 		goto err_unpin;
122 
123 	/*
124 	 * And mark it as a globally pinned object to let the shrinker know
125 	 * it cannot reclaim the object until we release it.
126 	 */
127 	i915_vma_make_unshrinkable(vma);
128 	vma->obj->mm.dirty = true;
129 
130 	return 0;
131 
132 err_unpin:
133 	i915_vma_unpin(vma);
134 	return err;
135 }
136 
__context_unpin_state(struct i915_vma * vma)137 static void __context_unpin_state(struct i915_vma *vma)
138 {
139 	i915_vma_make_shrinkable(vma);
140 	i915_active_release(&vma->active);
141 	__i915_vma_unpin(vma);
142 }
143 
__ring_active(struct intel_ring * ring,struct i915_gem_ww_ctx * ww)144 static int __ring_active(struct intel_ring *ring,
145 			 struct i915_gem_ww_ctx *ww)
146 {
147 	int err;
148 
149 	err = intel_ring_pin(ring, ww);
150 	if (err)
151 		return err;
152 
153 	err = i915_active_acquire(&ring->vma->active);
154 	if (err)
155 		goto err_pin;
156 
157 	return 0;
158 
159 err_pin:
160 	intel_ring_unpin(ring);
161 	return err;
162 }
163 
__ring_retire(struct intel_ring * ring)164 static void __ring_retire(struct intel_ring *ring)
165 {
166 	i915_active_release(&ring->vma->active);
167 	intel_ring_unpin(ring);
168 }
169 
intel_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww)170 static int intel_context_pre_pin(struct intel_context *ce,
171 				 struct i915_gem_ww_ctx *ww)
172 {
173 	int err;
174 
175 	CE_TRACE(ce, "active\n");
176 
177 	err = __ring_active(ce->ring, ww);
178 	if (err)
179 		return err;
180 
181 	err = intel_timeline_pin(ce->timeline, ww);
182 	if (err)
183 		goto err_ring;
184 
185 	if (!ce->state)
186 		return 0;
187 
188 	err = __context_pin_state(ce->state, ww);
189 	if (err)
190 		goto err_timeline;
191 
192 
193 	return 0;
194 
195 err_timeline:
196 	intel_timeline_unpin(ce->timeline);
197 err_ring:
198 	__ring_retire(ce->ring);
199 	return err;
200 }
201 
intel_context_post_unpin(struct intel_context * ce)202 static void intel_context_post_unpin(struct intel_context *ce)
203 {
204 	if (ce->state)
205 		__context_unpin_state(ce->state);
206 
207 	intel_timeline_unpin(ce->timeline);
208 	__ring_retire(ce->ring);
209 }
210 
__intel_context_do_pin_ww(struct intel_context * ce,struct i915_gem_ww_ctx * ww)211 int __intel_context_do_pin_ww(struct intel_context *ce,
212 			      struct i915_gem_ww_ctx *ww)
213 {
214 	bool handoff = false;
215 	void *vaddr;
216 	int err = 0;
217 
218 	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
219 		err = intel_context_alloc_state(ce);
220 		if (err)
221 			return err;
222 	}
223 
224 	/*
225 	 * We always pin the context/ring/timeline here, to ensure a pin
226 	 * refcount for __intel_context_active(), which prevent a lock
227 	 * inversion of ce->pin_mutex vs dma_resv_lock().
228 	 */
229 
230 	err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
231 	if (!err)
232 		err = i915_gem_object_lock(ce->ring->vma->obj, ww);
233 	if (!err && ce->state)
234 		err = i915_gem_object_lock(ce->state->obj, ww);
235 	if (!err)
236 		err = intel_context_pre_pin(ce, ww);
237 	if (err)
238 		return err;
239 
240 	err = ce->ops->pre_pin(ce, ww, &vaddr);
241 	if (err)
242 		goto err_ctx_unpin;
243 
244 	err = i915_active_acquire(&ce->active);
245 	if (err)
246 		goto err_post_unpin;
247 
248 	err = mutex_lock_interruptible(&ce->pin_mutex);
249 	if (err)
250 		goto err_release;
251 
252 	intel_engine_pm_might_get(ce->engine);
253 
254 	if (unlikely(intel_context_is_closed(ce))) {
255 		err = -ENOENT;
256 		goto err_unlock;
257 	}
258 
259 	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
260 		err = intel_context_active_acquire(ce);
261 		if (unlikely(err))
262 			goto err_unlock;
263 
264 		err = ce->ops->pin(ce, vaddr);
265 		if (err) {
266 			intel_context_active_release(ce);
267 			goto err_unlock;
268 		}
269 
270 		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
271 			 i915_ggtt_offset(ce->ring->vma),
272 			 ce->ring->head, ce->ring->tail);
273 
274 		handoff = true;
275 		smp_mb__before_atomic(); /* flush pin before it is visible */
276 		atomic_inc(&ce->pin_count);
277 	}
278 
279 	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
280 
281 	trace_intel_context_do_pin(ce);
282 
283 err_unlock:
284 	mutex_unlock(&ce->pin_mutex);
285 err_release:
286 	i915_active_release(&ce->active);
287 err_post_unpin:
288 	if (!handoff)
289 		ce->ops->post_unpin(ce);
290 err_ctx_unpin:
291 	intel_context_post_unpin(ce);
292 
293 	/*
294 	 * Unlock the hwsp_ggtt object since it's shared.
295 	 * In principle we can unlock all the global state locked above
296 	 * since it's pinned and doesn't need fencing, and will
297 	 * thus remain resident until it is explicitly unpinned.
298 	 */
299 	i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
300 
301 	return err;
302 }
303 
__intel_context_do_pin(struct intel_context * ce)304 int __intel_context_do_pin(struct intel_context *ce)
305 {
306 	struct i915_gem_ww_ctx ww;
307 	int err;
308 
309 	i915_gem_ww_ctx_init(&ww, true);
310 retry:
311 	err = __intel_context_do_pin_ww(ce, &ww);
312 	if (err == -EDEADLK) {
313 		err = i915_gem_ww_ctx_backoff(&ww);
314 		if (!err)
315 			goto retry;
316 	}
317 	i915_gem_ww_ctx_fini(&ww);
318 	return err;
319 }
320 
__intel_context_do_unpin(struct intel_context * ce,int sub)321 void __intel_context_do_unpin(struct intel_context *ce, int sub)
322 {
323 	if (!atomic_sub_and_test(sub, &ce->pin_count))
324 		return;
325 
326 	CE_TRACE(ce, "unpin\n");
327 	ce->ops->unpin(ce);
328 	ce->ops->post_unpin(ce);
329 
330 	/*
331 	 * Once released, we may asynchronously drop the active reference.
332 	 * As that may be the only reference keeping the context alive,
333 	 * take an extra now so that it is not freed before we finish
334 	 * dereferencing it.
335 	 */
336 	intel_context_get(ce);
337 	intel_context_active_release(ce);
338 	trace_intel_context_do_unpin(ce);
339 	intel_context_put(ce);
340 }
341 
__intel_context_retire(struct i915_active * active)342 static void __intel_context_retire(struct i915_active *active)
343 {
344 	struct intel_context *ce = container_of(active, typeof(*ce), active);
345 
346 	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
347 		 intel_context_get_total_runtime_ns(ce),
348 		 intel_context_get_avg_runtime_ns(ce));
349 
350 	set_bit(CONTEXT_VALID_BIT, &ce->flags);
351 	intel_context_post_unpin(ce);
352 	intel_context_put(ce);
353 }
354 
__intel_context_active(struct i915_active * active)355 static int __intel_context_active(struct i915_active *active)
356 {
357 	struct intel_context *ce = container_of(active, typeof(*ce), active);
358 
359 	intel_context_get(ce);
360 
361 	/* everything should already be activated by intel_context_pre_pin() */
362 	GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
363 	__intel_ring_pin(ce->ring);
364 
365 	__intel_timeline_pin(ce->timeline);
366 
367 	if (ce->state) {
368 		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
369 		__i915_vma_pin(ce->state);
370 		i915_vma_make_unshrinkable(ce->state);
371 	}
372 
373 	return 0;
374 }
375 
376 static int
sw_fence_dummy_notify(struct i915_sw_fence * sf,enum i915_sw_fence_notify state)377 sw_fence_dummy_notify(struct i915_sw_fence *sf,
378 		      enum i915_sw_fence_notify state)
379 {
380 	return NOTIFY_DONE;
381 }
382 
383 void
intel_context_init(struct intel_context * ce,struct intel_engine_cs * engine)384 intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
385 {
386 	GEM_BUG_ON(!engine->cops);
387 	GEM_BUG_ON(!engine->gt->vm);
388 
389 	kref_init(&ce->ref);
390 
391 	ce->engine = engine;
392 	ce->ops = engine->cops;
393 	ce->sseu = engine->sseu;
394 	ce->ring = NULL;
395 	ce->ring_size = SZ_4K;
396 
397 	ewma_runtime_init(&ce->stats.runtime.avg);
398 
399 	ce->vm = i915_vm_get(engine->gt->vm);
400 
401 	/* NB ce->signal_link/lock is used under RCU */
402 	mtx_init(&ce->signal_lock, IPL_NONE);
403 	INIT_LIST_HEAD(&ce->signals);
404 
405 	rw_init(&ce->pin_mutex, "cepin");
406 
407 	mtx_init(&ce->guc_state.lock, IPL_TTY);
408 	INIT_LIST_HEAD(&ce->guc_state.fences);
409 	INIT_LIST_HEAD(&ce->guc_state.requests);
410 
411 	ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
412 	INIT_LIST_HEAD(&ce->guc_id.link);
413 
414 	INIT_LIST_HEAD(&ce->destroyed_link);
415 
416 	INIT_LIST_HEAD(&ce->parallel.child_list);
417 
418 	/*
419 	 * Initialize fence to be complete as this is expected to be complete
420 	 * unless there is a pending schedule disable outstanding.
421 	 */
422 	i915_sw_fence_init(&ce->guc_state.blocked,
423 			   sw_fence_dummy_notify);
424 	i915_sw_fence_commit(&ce->guc_state.blocked);
425 
426 	i915_active_init(&ce->active,
427 			 __intel_context_active, __intel_context_retire, 0);
428 }
429 
intel_context_fini(struct intel_context * ce)430 void intel_context_fini(struct intel_context *ce)
431 {
432 	struct intel_context *child, *next;
433 
434 	if (ce->timeline)
435 		intel_timeline_put(ce->timeline);
436 	i915_vm_put(ce->vm);
437 
438 	/* Need to put the creation ref for the children */
439 	if (intel_context_is_parent(ce))
440 		for_each_child_safe(ce, child, next)
441 			intel_context_put(child);
442 
443 	mutex_destroy(&ce->pin_mutex);
444 	i915_active_fini(&ce->active);
445 	i915_sw_fence_fini(&ce->guc_state.blocked);
446 }
447 
i915_context_module_exit(void)448 void i915_context_module_exit(void)
449 {
450 #ifdef __linux__
451 	kmem_cache_destroy(slab_ce);
452 #else
453 	pool_destroy(&slab_ce);
454 #endif
455 }
456 
i915_context_module_init(void)457 int __init i915_context_module_init(void)
458 {
459 #ifdef __linux__
460 	slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
461 	if (!slab_ce)
462 		return -ENOMEM;
463 #else
464 	pool_init(&slab_ce, sizeof(struct intel_context),
465 	    CACHELINESIZE, IPL_TTY, 0, "ictx", NULL);
466 #endif
467 
468 	return 0;
469 }
470 
intel_context_enter_engine(struct intel_context * ce)471 void intel_context_enter_engine(struct intel_context *ce)
472 {
473 	intel_engine_pm_get(ce->engine);
474 	intel_timeline_enter(ce->timeline);
475 }
476 
intel_context_exit_engine(struct intel_context * ce)477 void intel_context_exit_engine(struct intel_context *ce)
478 {
479 	intel_timeline_exit(ce->timeline);
480 	intel_engine_pm_put(ce->engine);
481 }
482 
intel_context_prepare_remote_request(struct intel_context * ce,struct i915_request * rq)483 int intel_context_prepare_remote_request(struct intel_context *ce,
484 					 struct i915_request *rq)
485 {
486 	struct intel_timeline *tl = ce->timeline;
487 	int err;
488 
489 	/* Only suitable for use in remotely modifying this context */
490 	GEM_BUG_ON(rq->context == ce);
491 
492 	if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
493 		/* Queue this switch after current activity by this context. */
494 		err = i915_active_fence_set(&tl->last_request, rq);
495 		if (err)
496 			return err;
497 	}
498 
499 	/*
500 	 * Guarantee context image and the timeline remains pinned until the
501 	 * modifying request is retired by setting the ce activity tracker.
502 	 *
503 	 * But we only need to take one pin on the account of it. Or in other
504 	 * words transfer the pinned ce object to tracked active request.
505 	 */
506 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
507 	return i915_active_add_request(&ce->active, rq);
508 }
509 
intel_context_create_request(struct intel_context * ce)510 struct i915_request *intel_context_create_request(struct intel_context *ce)
511 {
512 	struct i915_gem_ww_ctx ww;
513 	struct i915_request *rq;
514 	int err;
515 
516 	i915_gem_ww_ctx_init(&ww, true);
517 retry:
518 	err = intel_context_pin_ww(ce, &ww);
519 	if (!err) {
520 		rq = i915_request_create(ce);
521 		intel_context_unpin(ce);
522 	} else if (err == -EDEADLK) {
523 		err = i915_gem_ww_ctx_backoff(&ww);
524 		if (!err)
525 			goto retry;
526 		rq = ERR_PTR(err);
527 	} else {
528 		rq = ERR_PTR(err);
529 	}
530 
531 	i915_gem_ww_ctx_fini(&ww);
532 
533 	if (IS_ERR(rq))
534 		return rq;
535 
536 	/*
537 	 * timeline->mutex should be the inner lock, but is used as outer lock.
538 	 * Hack around this to shut up lockdep in selftests..
539 	 */
540 	lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
541 	mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
542 	mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
543 	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
544 
545 	return rq;
546 }
547 
intel_context_get_active_request(struct intel_context * ce)548 struct i915_request *intel_context_get_active_request(struct intel_context *ce)
549 {
550 	struct intel_context *parent = intel_context_to_parent(ce);
551 	struct i915_request *rq, *active = NULL;
552 	unsigned long flags;
553 
554 	GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
555 
556 	/*
557 	 * We search the parent list to find an active request on the submitted
558 	 * context. The parent list contains the requests for all the contexts
559 	 * in the relationship so we have to do a compare of each request's
560 	 * context.
561 	 */
562 	spin_lock_irqsave(&parent->guc_state.lock, flags);
563 	list_for_each_entry_reverse(rq, &parent->guc_state.requests,
564 				    sched.link) {
565 		if (rq->context != ce)
566 			continue;
567 		if (i915_request_completed(rq))
568 			break;
569 
570 		active = rq;
571 	}
572 	if (active)
573 		active = i915_request_get_rcu(active);
574 	spin_unlock_irqrestore(&parent->guc_state.lock, flags);
575 
576 	return active;
577 }
578 
intel_context_bind_parent_child(struct intel_context * parent,struct intel_context * child)579 void intel_context_bind_parent_child(struct intel_context *parent,
580 				     struct intel_context *child)
581 {
582 	/*
583 	 * Callers responsibility to validate that this function is used
584 	 * correctly but we use GEM_BUG_ON here ensure that they do.
585 	 */
586 	GEM_BUG_ON(intel_context_is_pinned(parent));
587 	GEM_BUG_ON(intel_context_is_child(parent));
588 	GEM_BUG_ON(intel_context_is_pinned(child));
589 	GEM_BUG_ON(intel_context_is_child(child));
590 	GEM_BUG_ON(intel_context_is_parent(child));
591 
592 	parent->parallel.child_index = parent->parallel.number_children++;
593 	list_add_tail(&child->parallel.child_link,
594 		      &parent->parallel.child_list);
595 	child->parallel.parent = parent;
596 }
597 
intel_context_get_total_runtime_ns(struct intel_context * ce)598 u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
599 {
600 	u64 total, active;
601 
602 	if (ce->ops->update_stats)
603 		ce->ops->update_stats(ce);
604 
605 	total = ce->stats.runtime.total;
606 	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
607 		total *= ce->engine->gt->clock_period_ns;
608 
609 	active = READ_ONCE(ce->stats.active);
610 	if (active)
611 		active = intel_context_clock() - active;
612 
613 	return total + active;
614 }
615 
intel_context_get_avg_runtime_ns(struct intel_context * ce)616 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
617 {
618 	u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
619 
620 	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
621 		avg *= ce->engine->gt->clock_period_ns;
622 
623 	return avg;
624 }
625 
intel_context_ban(struct intel_context * ce,struct i915_request * rq)626 bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
627 {
628 	bool ret = intel_context_set_banned(ce);
629 
630 	trace_intel_context_ban(ce);
631 
632 	if (ce->ops->revoke)
633 		ce->ops->revoke(ce, rq,
634 				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
635 
636 	return ret;
637 }
638 
intel_context_revoke(struct intel_context * ce)639 bool intel_context_revoke(struct intel_context *ce)
640 {
641 	bool ret = intel_context_set_exiting(ce);
642 
643 	if (ce->ops->revoke)
644 		ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
645 
646 	return ret;
647 }
648 
649 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
650 #include "selftest_context.c"
651 #endif
652