1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2019 Intel Corporation
4 */
5
6 #include "gem/i915_gem_context.h"
7 #include "gem/i915_gem_pm.h"
8
9 #include "i915_drv.h"
10 #include "i915_trace.h"
11
12 #include "intel_context.h"
13 #include "intel_engine.h"
14 #include "intel_engine_pm.h"
15 #include "intel_ring.h"
16
17 static struct pool slab_ce;
18
intel_context_alloc(void)19 static struct intel_context *intel_context_alloc(void)
20 {
21 #ifdef __linux__
22 return kmem_cache_zalloc(slab_ce, GFP_KERNEL);
23 #else
24 return pool_get(&slab_ce, PR_WAITOK | PR_ZERO);
25 #endif
26 }
27
rcu_context_free(struct rcu_head * rcu)28 static void rcu_context_free(struct rcu_head *rcu)
29 {
30 struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
31
32 trace_intel_context_free(ce);
33 #ifdef __linux__
34 kmem_cache_free(slab_ce, ce);
35 #else
36 pool_put(&slab_ce, ce);
37 #endif
38 }
39
intel_context_free(struct intel_context * ce)40 void intel_context_free(struct intel_context *ce)
41 {
42 call_rcu(&ce->rcu, rcu_context_free);
43 }
44
45 struct intel_context *
intel_context_create(struct intel_engine_cs * engine)46 intel_context_create(struct intel_engine_cs *engine)
47 {
48 struct intel_context *ce;
49
50 ce = intel_context_alloc();
51 if (!ce)
52 return ERR_PTR(-ENOMEM);
53
54 intel_context_init(ce, engine);
55 trace_intel_context_create(ce);
56 return ce;
57 }
58
intel_context_alloc_state(struct intel_context * ce)59 int intel_context_alloc_state(struct intel_context *ce)
60 {
61 int err = 0;
62
63 if (mutex_lock_interruptible(&ce->pin_mutex))
64 return -EINTR;
65
66 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
67 if (intel_context_is_banned(ce)) {
68 err = -EIO;
69 goto unlock;
70 }
71
72 err = ce->ops->alloc(ce);
73 if (unlikely(err))
74 goto unlock;
75
76 set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
77 }
78
79 unlock:
80 mutex_unlock(&ce->pin_mutex);
81 return err;
82 }
83
intel_context_active_acquire(struct intel_context * ce)84 static int intel_context_active_acquire(struct intel_context *ce)
85 {
86 int err;
87
88 __i915_active_acquire(&ce->active);
89
90 if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
91 intel_context_is_parallel(ce))
92 return 0;
93
94 /* Preallocate tracking nodes */
95 err = i915_active_acquire_preallocate_barrier(&ce->active,
96 ce->engine);
97 if (err)
98 i915_active_release(&ce->active);
99
100 return err;
101 }
102
intel_context_active_release(struct intel_context * ce)103 static void intel_context_active_release(struct intel_context *ce)
104 {
105 /* Nodes preallocated in intel_context_active() */
106 i915_active_acquire_barrier(&ce->active);
107 i915_active_release(&ce->active);
108 }
109
__context_pin_state(struct i915_vma * vma,struct i915_gem_ww_ctx * ww)110 static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
111 {
112 unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
113 int err;
114
115 err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
116 if (err)
117 return err;
118
119 err = i915_active_acquire(&vma->active);
120 if (err)
121 goto err_unpin;
122
123 /*
124 * And mark it as a globally pinned object to let the shrinker know
125 * it cannot reclaim the object until we release it.
126 */
127 i915_vma_make_unshrinkable(vma);
128 vma->obj->mm.dirty = true;
129
130 return 0;
131
132 err_unpin:
133 i915_vma_unpin(vma);
134 return err;
135 }
136
__context_unpin_state(struct i915_vma * vma)137 static void __context_unpin_state(struct i915_vma *vma)
138 {
139 i915_vma_make_shrinkable(vma);
140 i915_active_release(&vma->active);
141 __i915_vma_unpin(vma);
142 }
143
__ring_active(struct intel_ring * ring,struct i915_gem_ww_ctx * ww)144 static int __ring_active(struct intel_ring *ring,
145 struct i915_gem_ww_ctx *ww)
146 {
147 int err;
148
149 err = intel_ring_pin(ring, ww);
150 if (err)
151 return err;
152
153 err = i915_active_acquire(&ring->vma->active);
154 if (err)
155 goto err_pin;
156
157 return 0;
158
159 err_pin:
160 intel_ring_unpin(ring);
161 return err;
162 }
163
__ring_retire(struct intel_ring * ring)164 static void __ring_retire(struct intel_ring *ring)
165 {
166 i915_active_release(&ring->vma->active);
167 intel_ring_unpin(ring);
168 }
169
intel_context_pre_pin(struct intel_context * ce,struct i915_gem_ww_ctx * ww)170 static int intel_context_pre_pin(struct intel_context *ce,
171 struct i915_gem_ww_ctx *ww)
172 {
173 int err;
174
175 CE_TRACE(ce, "active\n");
176
177 err = __ring_active(ce->ring, ww);
178 if (err)
179 return err;
180
181 err = intel_timeline_pin(ce->timeline, ww);
182 if (err)
183 goto err_ring;
184
185 if (!ce->state)
186 return 0;
187
188 err = __context_pin_state(ce->state, ww);
189 if (err)
190 goto err_timeline;
191
192
193 return 0;
194
195 err_timeline:
196 intel_timeline_unpin(ce->timeline);
197 err_ring:
198 __ring_retire(ce->ring);
199 return err;
200 }
201
intel_context_post_unpin(struct intel_context * ce)202 static void intel_context_post_unpin(struct intel_context *ce)
203 {
204 if (ce->state)
205 __context_unpin_state(ce->state);
206
207 intel_timeline_unpin(ce->timeline);
208 __ring_retire(ce->ring);
209 }
210
__intel_context_do_pin_ww(struct intel_context * ce,struct i915_gem_ww_ctx * ww)211 int __intel_context_do_pin_ww(struct intel_context *ce,
212 struct i915_gem_ww_ctx *ww)
213 {
214 bool handoff = false;
215 void *vaddr;
216 int err = 0;
217
218 if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
219 err = intel_context_alloc_state(ce);
220 if (err)
221 return err;
222 }
223
224 /*
225 * We always pin the context/ring/timeline here, to ensure a pin
226 * refcount for __intel_context_active(), which prevent a lock
227 * inversion of ce->pin_mutex vs dma_resv_lock().
228 */
229
230 err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
231 if (!err)
232 err = i915_gem_object_lock(ce->ring->vma->obj, ww);
233 if (!err && ce->state)
234 err = i915_gem_object_lock(ce->state->obj, ww);
235 if (!err)
236 err = intel_context_pre_pin(ce, ww);
237 if (err)
238 return err;
239
240 err = ce->ops->pre_pin(ce, ww, &vaddr);
241 if (err)
242 goto err_ctx_unpin;
243
244 err = i915_active_acquire(&ce->active);
245 if (err)
246 goto err_post_unpin;
247
248 err = mutex_lock_interruptible(&ce->pin_mutex);
249 if (err)
250 goto err_release;
251
252 intel_engine_pm_might_get(ce->engine);
253
254 if (unlikely(intel_context_is_closed(ce))) {
255 err = -ENOENT;
256 goto err_unlock;
257 }
258
259 if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
260 err = intel_context_active_acquire(ce);
261 if (unlikely(err))
262 goto err_unlock;
263
264 err = ce->ops->pin(ce, vaddr);
265 if (err) {
266 intel_context_active_release(ce);
267 goto err_unlock;
268 }
269
270 CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
271 i915_ggtt_offset(ce->ring->vma),
272 ce->ring->head, ce->ring->tail);
273
274 handoff = true;
275 smp_mb__before_atomic(); /* flush pin before it is visible */
276 atomic_inc(&ce->pin_count);
277 }
278
279 GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
280
281 trace_intel_context_do_pin(ce);
282
283 err_unlock:
284 mutex_unlock(&ce->pin_mutex);
285 err_release:
286 i915_active_release(&ce->active);
287 err_post_unpin:
288 if (!handoff)
289 ce->ops->post_unpin(ce);
290 err_ctx_unpin:
291 intel_context_post_unpin(ce);
292
293 /*
294 * Unlock the hwsp_ggtt object since it's shared.
295 * In principle we can unlock all the global state locked above
296 * since it's pinned and doesn't need fencing, and will
297 * thus remain resident until it is explicitly unpinned.
298 */
299 i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
300
301 return err;
302 }
303
__intel_context_do_pin(struct intel_context * ce)304 int __intel_context_do_pin(struct intel_context *ce)
305 {
306 struct i915_gem_ww_ctx ww;
307 int err;
308
309 i915_gem_ww_ctx_init(&ww, true);
310 retry:
311 err = __intel_context_do_pin_ww(ce, &ww);
312 if (err == -EDEADLK) {
313 err = i915_gem_ww_ctx_backoff(&ww);
314 if (!err)
315 goto retry;
316 }
317 i915_gem_ww_ctx_fini(&ww);
318 return err;
319 }
320
__intel_context_do_unpin(struct intel_context * ce,int sub)321 void __intel_context_do_unpin(struct intel_context *ce, int sub)
322 {
323 if (!atomic_sub_and_test(sub, &ce->pin_count))
324 return;
325
326 CE_TRACE(ce, "unpin\n");
327 ce->ops->unpin(ce);
328 ce->ops->post_unpin(ce);
329
330 /*
331 * Once released, we may asynchronously drop the active reference.
332 * As that may be the only reference keeping the context alive,
333 * take an extra now so that it is not freed before we finish
334 * dereferencing it.
335 */
336 intel_context_get(ce);
337 intel_context_active_release(ce);
338 trace_intel_context_do_unpin(ce);
339 intel_context_put(ce);
340 }
341
__intel_context_retire(struct i915_active * active)342 static void __intel_context_retire(struct i915_active *active)
343 {
344 struct intel_context *ce = container_of(active, typeof(*ce), active);
345
346 CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
347 intel_context_get_total_runtime_ns(ce),
348 intel_context_get_avg_runtime_ns(ce));
349
350 set_bit(CONTEXT_VALID_BIT, &ce->flags);
351 intel_context_post_unpin(ce);
352 intel_context_put(ce);
353 }
354
__intel_context_active(struct i915_active * active)355 static int __intel_context_active(struct i915_active *active)
356 {
357 struct intel_context *ce = container_of(active, typeof(*ce), active);
358
359 intel_context_get(ce);
360
361 /* everything should already be activated by intel_context_pre_pin() */
362 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
363 __intel_ring_pin(ce->ring);
364
365 __intel_timeline_pin(ce->timeline);
366
367 if (ce->state) {
368 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
369 __i915_vma_pin(ce->state);
370 i915_vma_make_unshrinkable(ce->state);
371 }
372
373 return 0;
374 }
375
376 static int
sw_fence_dummy_notify(struct i915_sw_fence * sf,enum i915_sw_fence_notify state)377 sw_fence_dummy_notify(struct i915_sw_fence *sf,
378 enum i915_sw_fence_notify state)
379 {
380 return NOTIFY_DONE;
381 }
382
383 void
intel_context_init(struct intel_context * ce,struct intel_engine_cs * engine)384 intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
385 {
386 GEM_BUG_ON(!engine->cops);
387 GEM_BUG_ON(!engine->gt->vm);
388
389 kref_init(&ce->ref);
390
391 ce->engine = engine;
392 ce->ops = engine->cops;
393 ce->sseu = engine->sseu;
394 ce->ring = NULL;
395 ce->ring_size = SZ_4K;
396
397 ewma_runtime_init(&ce->stats.runtime.avg);
398
399 ce->vm = i915_vm_get(engine->gt->vm);
400
401 /* NB ce->signal_link/lock is used under RCU */
402 mtx_init(&ce->signal_lock, IPL_NONE);
403 INIT_LIST_HEAD(&ce->signals);
404
405 rw_init(&ce->pin_mutex, "cepin");
406
407 mtx_init(&ce->guc_state.lock, IPL_TTY);
408 INIT_LIST_HEAD(&ce->guc_state.fences);
409 INIT_LIST_HEAD(&ce->guc_state.requests);
410
411 ce->guc_id.id = GUC_INVALID_CONTEXT_ID;
412 INIT_LIST_HEAD(&ce->guc_id.link);
413
414 INIT_LIST_HEAD(&ce->destroyed_link);
415
416 INIT_LIST_HEAD(&ce->parallel.child_list);
417
418 /*
419 * Initialize fence to be complete as this is expected to be complete
420 * unless there is a pending schedule disable outstanding.
421 */
422 i915_sw_fence_init(&ce->guc_state.blocked,
423 sw_fence_dummy_notify);
424 i915_sw_fence_commit(&ce->guc_state.blocked);
425
426 i915_active_init(&ce->active,
427 __intel_context_active, __intel_context_retire, 0);
428 }
429
intel_context_fini(struct intel_context * ce)430 void intel_context_fini(struct intel_context *ce)
431 {
432 struct intel_context *child, *next;
433
434 if (ce->timeline)
435 intel_timeline_put(ce->timeline);
436 i915_vm_put(ce->vm);
437
438 /* Need to put the creation ref for the children */
439 if (intel_context_is_parent(ce))
440 for_each_child_safe(ce, child, next)
441 intel_context_put(child);
442
443 mutex_destroy(&ce->pin_mutex);
444 i915_active_fini(&ce->active);
445 i915_sw_fence_fini(&ce->guc_state.blocked);
446 }
447
i915_context_module_exit(void)448 void i915_context_module_exit(void)
449 {
450 #ifdef __linux__
451 kmem_cache_destroy(slab_ce);
452 #else
453 pool_destroy(&slab_ce);
454 #endif
455 }
456
i915_context_module_init(void)457 int __init i915_context_module_init(void)
458 {
459 #ifdef __linux__
460 slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
461 if (!slab_ce)
462 return -ENOMEM;
463 #else
464 pool_init(&slab_ce, sizeof(struct intel_context),
465 CACHELINESIZE, IPL_TTY, 0, "ictx", NULL);
466 #endif
467
468 return 0;
469 }
470
intel_context_enter_engine(struct intel_context * ce)471 void intel_context_enter_engine(struct intel_context *ce)
472 {
473 intel_engine_pm_get(ce->engine);
474 intel_timeline_enter(ce->timeline);
475 }
476
intel_context_exit_engine(struct intel_context * ce)477 void intel_context_exit_engine(struct intel_context *ce)
478 {
479 intel_timeline_exit(ce->timeline);
480 intel_engine_pm_put(ce->engine);
481 }
482
intel_context_prepare_remote_request(struct intel_context * ce,struct i915_request * rq)483 int intel_context_prepare_remote_request(struct intel_context *ce,
484 struct i915_request *rq)
485 {
486 struct intel_timeline *tl = ce->timeline;
487 int err;
488
489 /* Only suitable for use in remotely modifying this context */
490 GEM_BUG_ON(rq->context == ce);
491
492 if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
493 /* Queue this switch after current activity by this context. */
494 err = i915_active_fence_set(&tl->last_request, rq);
495 if (err)
496 return err;
497 }
498
499 /*
500 * Guarantee context image and the timeline remains pinned until the
501 * modifying request is retired by setting the ce activity tracker.
502 *
503 * But we only need to take one pin on the account of it. Or in other
504 * words transfer the pinned ce object to tracked active request.
505 */
506 GEM_BUG_ON(i915_active_is_idle(&ce->active));
507 return i915_active_add_request(&ce->active, rq);
508 }
509
intel_context_create_request(struct intel_context * ce)510 struct i915_request *intel_context_create_request(struct intel_context *ce)
511 {
512 struct i915_gem_ww_ctx ww;
513 struct i915_request *rq;
514 int err;
515
516 i915_gem_ww_ctx_init(&ww, true);
517 retry:
518 err = intel_context_pin_ww(ce, &ww);
519 if (!err) {
520 rq = i915_request_create(ce);
521 intel_context_unpin(ce);
522 } else if (err == -EDEADLK) {
523 err = i915_gem_ww_ctx_backoff(&ww);
524 if (!err)
525 goto retry;
526 rq = ERR_PTR(err);
527 } else {
528 rq = ERR_PTR(err);
529 }
530
531 i915_gem_ww_ctx_fini(&ww);
532
533 if (IS_ERR(rq))
534 return rq;
535
536 /*
537 * timeline->mutex should be the inner lock, but is used as outer lock.
538 * Hack around this to shut up lockdep in selftests..
539 */
540 lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
541 mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
542 mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
543 rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
544
545 return rq;
546 }
547
intel_context_get_active_request(struct intel_context * ce)548 struct i915_request *intel_context_get_active_request(struct intel_context *ce)
549 {
550 struct intel_context *parent = intel_context_to_parent(ce);
551 struct i915_request *rq, *active = NULL;
552 unsigned long flags;
553
554 GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
555
556 /*
557 * We search the parent list to find an active request on the submitted
558 * context. The parent list contains the requests for all the contexts
559 * in the relationship so we have to do a compare of each request's
560 * context.
561 */
562 spin_lock_irqsave(&parent->guc_state.lock, flags);
563 list_for_each_entry_reverse(rq, &parent->guc_state.requests,
564 sched.link) {
565 if (rq->context != ce)
566 continue;
567 if (i915_request_completed(rq))
568 break;
569
570 active = rq;
571 }
572 if (active)
573 active = i915_request_get_rcu(active);
574 spin_unlock_irqrestore(&parent->guc_state.lock, flags);
575
576 return active;
577 }
578
intel_context_bind_parent_child(struct intel_context * parent,struct intel_context * child)579 void intel_context_bind_parent_child(struct intel_context *parent,
580 struct intel_context *child)
581 {
582 /*
583 * Callers responsibility to validate that this function is used
584 * correctly but we use GEM_BUG_ON here ensure that they do.
585 */
586 GEM_BUG_ON(intel_context_is_pinned(parent));
587 GEM_BUG_ON(intel_context_is_child(parent));
588 GEM_BUG_ON(intel_context_is_pinned(child));
589 GEM_BUG_ON(intel_context_is_child(child));
590 GEM_BUG_ON(intel_context_is_parent(child));
591
592 parent->parallel.child_index = parent->parallel.number_children++;
593 list_add_tail(&child->parallel.child_link,
594 &parent->parallel.child_list);
595 child->parallel.parent = parent;
596 }
597
intel_context_get_total_runtime_ns(struct intel_context * ce)598 u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
599 {
600 u64 total, active;
601
602 if (ce->ops->update_stats)
603 ce->ops->update_stats(ce);
604
605 total = ce->stats.runtime.total;
606 if (ce->ops->flags & COPS_RUNTIME_CYCLES)
607 total *= ce->engine->gt->clock_period_ns;
608
609 active = READ_ONCE(ce->stats.active);
610 if (active)
611 active = intel_context_clock() - active;
612
613 return total + active;
614 }
615
intel_context_get_avg_runtime_ns(struct intel_context * ce)616 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
617 {
618 u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);
619
620 if (ce->ops->flags & COPS_RUNTIME_CYCLES)
621 avg *= ce->engine->gt->clock_period_ns;
622
623 return avg;
624 }
625
intel_context_ban(struct intel_context * ce,struct i915_request * rq)626 bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
627 {
628 bool ret = intel_context_set_banned(ce);
629
630 trace_intel_context_ban(ce);
631
632 if (ce->ops->revoke)
633 ce->ops->revoke(ce, rq,
634 INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
635
636 return ret;
637 }
638
intel_context_revoke(struct intel_context * ce)639 bool intel_context_revoke(struct intel_context *ce)
640 {
641 bool ret = intel_context_set_exiting(ce);
642
643 if (ce->ops->revoke)
644 ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms);
645
646 return ret;
647 }
648
649 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
650 #include "selftest_context.c"
651 #endif
652