1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/debugobjects.h> 8 9 #include "gt/intel_context.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_ring.h" 13 14 #include "i915_drv.h" 15 #include "i915_active.h" 16 #include "i915_globals.h" 17 18 /* 19 * Active refs memory management 20 * 21 * To be more economical with memory, we reap all the i915_active trees as 22 * they idle (when we know the active requests are inactive) and allocate the 23 * nodes from a local slab cache to hopefully reduce the fragmentation. 24 */ 25 static struct i915_global_active { 26 struct i915_global base; 27 #ifdef __linux__ 28 struct kmem_cache *slab_cache; 29 #else 30 struct pool slab_cache; 31 #endif 32 } global; 33 34 struct active_node { 35 struct i915_active_fence base; 36 struct i915_active *ref; 37 struct rb_node node; 38 u64 timeline; 39 }; 40 41 static inline struct active_node * 42 node_from_active(struct i915_active_fence *active) 43 { 44 return container_of(active, struct active_node, base); 45 } 46 47 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 48 49 static inline bool is_barrier(const struct i915_active_fence *active) 50 { 51 return IS_ERR(rcu_access_pointer(active->fence)); 52 } 53 54 static inline struct llist_node *barrier_to_ll(struct active_node *node) 55 { 56 GEM_BUG_ON(!is_barrier(&node->base)); 57 return (struct llist_node *)&node->base.cb.node; 58 } 59 60 static inline struct intel_engine_cs * 61 __barrier_to_engine(struct active_node *node) 62 { 63 return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); 64 } 65 66 static inline struct intel_engine_cs * 67 barrier_to_engine(struct active_node *node) 68 { 69 GEM_BUG_ON(!is_barrier(&node->base)); 70 return __barrier_to_engine(node); 71 } 72 73 static inline struct active_node *barrier_from_ll(struct llist_node *x) 74 { 75 return container_of((struct list_head *)x, 76 struct active_node, base.cb.node); 77 } 78 79 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 80 81 static void *active_debug_hint(void *addr) 82 { 83 struct i915_active *ref = addr; 84 85 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 86 } 87 88 static struct debug_obj_descr active_debug_desc = { 89 .name = "i915_active", 90 .debug_hint = active_debug_hint, 91 }; 92 93 static void debug_active_init(struct i915_active *ref) 94 { 95 debug_object_init(ref, &active_debug_desc); 96 } 97 98 static void debug_active_activate(struct i915_active *ref) 99 { 100 lockdep_assert_held(&ref->tree_lock); 101 if (!atomic_read(&ref->count)) /* before the first inc */ 102 debug_object_activate(ref, &active_debug_desc); 103 } 104 105 static void debug_active_deactivate(struct i915_active *ref) 106 { 107 lockdep_assert_held(&ref->tree_lock); 108 if (!atomic_read(&ref->count)) /* after the last dec */ 109 debug_object_deactivate(ref, &active_debug_desc); 110 } 111 112 static void debug_active_fini(struct i915_active *ref) 113 { 114 debug_object_free(ref, &active_debug_desc); 115 } 116 117 static void debug_active_assert(struct i915_active *ref) 118 { 119 debug_object_assert_init(ref, &active_debug_desc); 120 } 121 122 #else 123 124 static inline void debug_active_init(struct i915_active *ref) { } 125 static inline void debug_active_activate(struct i915_active *ref) { } 126 static inline void debug_active_deactivate(struct i915_active *ref) { } 127 static inline void debug_active_fini(struct i915_active *ref) { } 128 static inline void debug_active_assert(struct i915_active *ref) { } 129 130 #endif 131 132 static void 133 __active_retire(struct i915_active *ref) 134 { 135 struct active_node *it, *n; 136 struct rb_root root; 137 unsigned long flags; 138 139 GEM_BUG_ON(i915_active_is_idle(ref)); 140 141 /* return the unused nodes to our slabcache -- flushing the allocator */ 142 if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 143 return; 144 145 GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 146 debug_active_deactivate(ref); 147 148 root = ref->tree; 149 ref->tree = RB_ROOT; 150 ref->cache = NULL; 151 152 spin_unlock_irqrestore(&ref->tree_lock, flags); 153 154 /* After the final retire, the entire struct may be freed */ 155 if (ref->retire) 156 ref->retire(ref); 157 158 /* ... except if you wait on it, you must manage your own references! */ 159 wake_up_var(ref); 160 161 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 162 GEM_BUG_ON(i915_active_fence_isset(&it->base)); 163 #ifdef __linux__ 164 kmem_cache_free(global.slab_cache, it); 165 #else 166 pool_put(&global.slab_cache, it); 167 #endif 168 } 169 } 170 171 static void 172 active_work(struct work_struct *wrk) 173 { 174 struct i915_active *ref = container_of(wrk, typeof(*ref), work); 175 176 GEM_BUG_ON(!atomic_read(&ref->count)); 177 if (atomic_add_unless(&ref->count, -1, 1)) 178 return; 179 180 __active_retire(ref); 181 } 182 183 static void 184 active_retire(struct i915_active *ref) 185 { 186 GEM_BUG_ON(!atomic_read(&ref->count)); 187 if (atomic_add_unless(&ref->count, -1, 1)) 188 return; 189 190 if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 191 queue_work(system_unbound_wq, &ref->work); 192 return; 193 } 194 195 __active_retire(ref); 196 } 197 198 static inline struct dma_fence ** 199 __active_fence_slot(struct i915_active_fence *active) 200 { 201 return (struct dma_fence ** __force)&active->fence; 202 } 203 204 static inline bool 205 active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 206 { 207 struct i915_active_fence *active = 208 container_of(cb, typeof(*active), cb); 209 210 return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 211 } 212 213 static void 214 node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 215 { 216 if (active_fence_cb(fence, cb)) 217 active_retire(container_of(cb, struct active_node, base.cb)->ref); 218 } 219 220 static void 221 excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 222 { 223 if (active_fence_cb(fence, cb)) 224 active_retire(container_of(cb, struct i915_active, excl.cb)); 225 } 226 227 static struct i915_active_fence * 228 active_instance(struct i915_active *ref, struct intel_timeline *tl) 229 { 230 struct active_node *node, *prealloc; 231 struct rb_node **p, *parent; 232 u64 idx = tl->fence_context; 233 234 /* 235 * We track the most recently used timeline to skip a rbtree search 236 * for the common case, under typical loads we never need the rbtree 237 * at all. We can reuse the last slot if it is empty, that is 238 * after the previous activity has been retired, or if it matches the 239 * current timeline. 240 */ 241 node = READ_ONCE(ref->cache); 242 if (node && node->timeline == idx) 243 return &node->base; 244 245 /* Preallocate a replacement, just in case */ 246 #ifdef __linux__ 247 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 248 #else 249 prealloc = pool_get(&global.slab_cache, PR_WAITOK); 250 #endif 251 if (!prealloc) 252 return NULL; 253 254 spin_lock_irq(&ref->tree_lock); 255 GEM_BUG_ON(i915_active_is_idle(ref)); 256 257 parent = NULL; 258 p = &ref->tree.rb_node; 259 while (*p) { 260 parent = *p; 261 262 node = rb_entry(parent, struct active_node, node); 263 if (node->timeline == idx) { 264 #ifdef __linux__ 265 kmem_cache_free(global.slab_cache, prealloc); 266 #else 267 pool_put(&global.slab_cache, prealloc); 268 #endif 269 goto out; 270 } 271 272 if (node->timeline < idx) 273 p = &parent->rb_right; 274 else 275 p = &parent->rb_left; 276 } 277 278 node = prealloc; 279 __i915_active_fence_init(&node->base, NULL, node_retire); 280 node->ref = ref; 281 node->timeline = idx; 282 283 rb_link_node(&node->node, parent, p); 284 rb_insert_color(&node->node, &ref->tree); 285 286 out: 287 ref->cache = node; 288 spin_unlock_irq(&ref->tree_lock); 289 290 BUILD_BUG_ON(offsetof(typeof(*node), base)); 291 return &node->base; 292 } 293 294 void __i915_active_init(struct i915_active *ref, 295 int (*active)(struct i915_active *ref), 296 void (*retire)(struct i915_active *ref), 297 struct lock_class_key *mkey, 298 struct lock_class_key *wkey) 299 { 300 unsigned long bits; 301 302 debug_active_init(ref); 303 304 ref->flags = 0; 305 ref->active = active; 306 ref->retire = ptr_unpack_bits(retire, &bits, 2); 307 if (bits & I915_ACTIVE_MAY_SLEEP) 308 ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 309 310 mtx_init(&ref->tree_lock, IPL_TTY); 311 ref->tree = RB_ROOT; 312 ref->cache = NULL; 313 314 init_llist_head(&ref->preallocated_barriers); 315 atomic_set(&ref->count, 0); 316 #ifdef __linux__ 317 __mutex_init(&ref->mutex, "i915_active", mkey); 318 #else 319 rw_init(&ref->mutex, "i915_active"); 320 #endif 321 __i915_active_fence_init(&ref->excl, NULL, excl_retire); 322 INIT_WORK(&ref->work, active_work); 323 #if IS_ENABLED(CONFIG_LOCKDEP) 324 lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 325 #endif 326 } 327 328 static bool ____active_del_barrier(struct i915_active *ref, 329 struct active_node *node, 330 struct intel_engine_cs *engine) 331 332 { 333 struct llist_node *head = NULL, *tail = NULL; 334 struct llist_node *pos, *next; 335 336 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 337 338 /* 339 * Rebuild the llist excluding our node. We may perform this 340 * outside of the kernel_context timeline mutex and so someone 341 * else may be manipulating the engine->barrier_tasks, in 342 * which case either we or they will be upset :) 343 * 344 * A second __active_del_barrier() will report failure to claim 345 * the active_node and the caller will just shrug and know not to 346 * claim ownership of its node. 347 * 348 * A concurrent i915_request_add_active_barriers() will miss adding 349 * any of the tasks, but we will try again on the next -- and since 350 * we are actively using the barrier, we know that there will be 351 * at least another opportunity when we idle. 352 */ 353 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 354 if (node == barrier_from_ll(pos)) { 355 node = NULL; 356 continue; 357 } 358 359 pos->next = head; 360 head = pos; 361 if (!tail) 362 tail = pos; 363 } 364 if (head) 365 llist_add_batch(head, tail, &engine->barrier_tasks); 366 367 return !node; 368 } 369 370 static bool 371 __active_del_barrier(struct i915_active *ref, struct active_node *node) 372 { 373 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 374 } 375 376 int i915_active_ref(struct i915_active *ref, 377 struct intel_timeline *tl, 378 struct dma_fence *fence) 379 { 380 struct i915_active_fence *active; 381 int err; 382 383 lockdep_assert_held(&tl->mutex); 384 385 /* Prevent reaping in case we malloc/wait while building the tree */ 386 err = i915_active_acquire(ref); 387 if (err) 388 return err; 389 390 active = active_instance(ref, tl); 391 if (!active) { 392 err = -ENOMEM; 393 goto out; 394 } 395 396 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 397 /* 398 * This request is on the kernel_context timeline, and so 399 * we can use it to substitute for the pending idle-barrer 400 * request that we want to emit on the kernel_context. 401 */ 402 __active_del_barrier(ref, node_from_active(active)); 403 RCU_INIT_POINTER(active->fence, NULL); 404 atomic_dec(&ref->count); 405 } 406 if (!__i915_active_fence_set(active, fence)) 407 atomic_inc(&ref->count); 408 409 out: 410 i915_active_release(ref); 411 return err; 412 } 413 414 struct dma_fence * 415 i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 416 { 417 struct dma_fence *prev; 418 419 /* We expect the caller to manage the exclusive timeline ordering */ 420 GEM_BUG_ON(i915_active_is_idle(ref)); 421 422 rcu_read_lock(); 423 prev = __i915_active_fence_set(&ref->excl, f); 424 if (prev) 425 prev = dma_fence_get_rcu(prev); 426 else 427 atomic_inc(&ref->count); 428 rcu_read_unlock(); 429 430 return prev; 431 } 432 433 bool i915_active_acquire_if_busy(struct i915_active *ref) 434 { 435 debug_active_assert(ref); 436 return atomic_add_unless(&ref->count, 1, 0); 437 } 438 439 int i915_active_acquire(struct i915_active *ref) 440 { 441 int err; 442 443 if (i915_active_acquire_if_busy(ref)) 444 return 0; 445 446 err = mutex_lock_interruptible(&ref->mutex); 447 if (err) 448 return err; 449 450 if (likely(!i915_active_acquire_if_busy(ref))) { 451 if (ref->active) 452 err = ref->active(ref); 453 if (!err) { 454 spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 455 debug_active_activate(ref); 456 atomic_inc(&ref->count); 457 spin_unlock_irq(&ref->tree_lock); 458 } 459 } 460 461 mutex_unlock(&ref->mutex); 462 463 return err; 464 } 465 466 void i915_active_release(struct i915_active *ref) 467 { 468 debug_active_assert(ref); 469 active_retire(ref); 470 } 471 472 static void enable_signaling(struct i915_active_fence *active) 473 { 474 struct dma_fence *fence; 475 476 if (unlikely(is_barrier(active))) 477 return; 478 479 fence = i915_active_fence_get(active); 480 if (!fence) 481 return; 482 483 dma_fence_enable_sw_signaling(fence); 484 dma_fence_put(fence); 485 } 486 487 static int flush_barrier(struct active_node *it) 488 { 489 struct intel_engine_cs *engine; 490 491 if (likely(!is_barrier(&it->base))) 492 return 0; 493 494 engine = __barrier_to_engine(it); 495 smp_rmb(); /* serialise with add_active_barriers */ 496 if (!is_barrier(&it->base)) 497 return 0; 498 499 return intel_engine_flush_barriers(engine); 500 } 501 502 static int flush_lazy_signals(struct i915_active *ref) 503 { 504 struct active_node *it, *n; 505 int err = 0; 506 507 enable_signaling(&ref->excl); 508 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 509 err = flush_barrier(it); /* unconnected idle barrier? */ 510 if (err) 511 break; 512 513 enable_signaling(&it->base); 514 } 515 516 return err; 517 } 518 519 int i915_active_wait(struct i915_active *ref) 520 { 521 int err; 522 523 might_sleep(); 524 525 if (!i915_active_acquire_if_busy(ref)) 526 return 0; 527 528 /* Any fence added after the wait begins will not be auto-signaled */ 529 err = flush_lazy_signals(ref); 530 i915_active_release(ref); 531 if (err) 532 return err; 533 534 if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) 535 return -EINTR; 536 537 flush_work(&ref->work); 538 return 0; 539 } 540 541 static int __await_active(struct i915_active_fence *active, 542 int (*fn)(void *arg, struct dma_fence *fence), 543 void *arg) 544 { 545 struct dma_fence *fence; 546 547 if (is_barrier(active)) /* XXX flush the barrier? */ 548 return 0; 549 550 fence = i915_active_fence_get(active); 551 if (fence) { 552 int err; 553 554 err = fn(arg, fence); 555 dma_fence_put(fence); 556 if (err < 0) 557 return err; 558 } 559 560 return 0; 561 } 562 563 static int await_active(struct i915_active *ref, 564 unsigned int flags, 565 int (*fn)(void *arg, struct dma_fence *fence), 566 void *arg) 567 { 568 int err = 0; 569 570 /* We must always wait for the exclusive fence! */ 571 if (rcu_access_pointer(ref->excl.fence)) { 572 err = __await_active(&ref->excl, fn, arg); 573 if (err) 574 return err; 575 } 576 577 if (flags & I915_ACTIVE_AWAIT_ALL && i915_active_acquire_if_busy(ref)) { 578 struct active_node *it, *n; 579 580 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 581 err = __await_active(&it->base, fn, arg); 582 if (err) 583 break; 584 } 585 i915_active_release(ref); 586 if (err) 587 return err; 588 } 589 590 return 0; 591 } 592 593 static int rq_await_fence(void *arg, struct dma_fence *fence) 594 { 595 return i915_request_await_dma_fence(arg, fence); 596 } 597 598 int i915_request_await_active(struct i915_request *rq, 599 struct i915_active *ref, 600 unsigned int flags) 601 { 602 return await_active(ref, flags, rq_await_fence, rq); 603 } 604 605 static int sw_await_fence(void *arg, struct dma_fence *fence) 606 { 607 return i915_sw_fence_await_dma_fence(arg, fence, 0, 608 GFP_NOWAIT | __GFP_NOWARN); 609 } 610 611 int i915_sw_fence_await_active(struct i915_sw_fence *fence, 612 struct i915_active *ref, 613 unsigned int flags) 614 { 615 return await_active(ref, flags, sw_await_fence, fence); 616 } 617 618 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 619 void i915_active_fini(struct i915_active *ref) 620 { 621 debug_active_fini(ref); 622 GEM_BUG_ON(atomic_read(&ref->count)); 623 GEM_BUG_ON(work_pending(&ref->work)); 624 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 625 mutex_destroy(&ref->mutex); 626 } 627 #endif 628 629 static inline bool is_idle_barrier(struct active_node *node, u64 idx) 630 { 631 return node->timeline == idx && !i915_active_fence_isset(&node->base); 632 } 633 634 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 635 { 636 struct rb_node *prev, *p; 637 638 if (RB_EMPTY_ROOT(&ref->tree)) 639 return NULL; 640 641 spin_lock_irq(&ref->tree_lock); 642 GEM_BUG_ON(i915_active_is_idle(ref)); 643 644 /* 645 * Try to reuse any existing barrier nodes already allocated for this 646 * i915_active, due to overlapping active phases there is likely a 647 * node kept alive (as we reuse before parking). We prefer to reuse 648 * completely idle barriers (less hassle in manipulating the llists), 649 * but otherwise any will do. 650 */ 651 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 652 p = &ref->cache->node; 653 goto match; 654 } 655 656 prev = NULL; 657 p = ref->tree.rb_node; 658 while (p) { 659 struct active_node *node = 660 rb_entry(p, struct active_node, node); 661 662 if (is_idle_barrier(node, idx)) 663 goto match; 664 665 prev = p; 666 if (node->timeline < idx) 667 p = p->rb_right; 668 else 669 p = p->rb_left; 670 } 671 672 /* 673 * No quick match, but we did find the leftmost rb_node for the 674 * kernel_context. Walk the rb_tree in-order to see if there were 675 * any idle-barriers on this timeline that we missed, or just use 676 * the first pending barrier. 677 */ 678 for (p = prev; p; p = rb_next(p)) { 679 struct active_node *node = 680 rb_entry(p, struct active_node, node); 681 struct intel_engine_cs *engine; 682 683 if (node->timeline > idx) 684 break; 685 686 if (node->timeline < idx) 687 continue; 688 689 if (is_idle_barrier(node, idx)) 690 goto match; 691 692 /* 693 * The list of pending barriers is protected by the 694 * kernel_context timeline, which notably we do not hold 695 * here. i915_request_add_active_barriers() may consume 696 * the barrier before we claim it, so we have to check 697 * for success. 698 */ 699 engine = __barrier_to_engine(node); 700 smp_rmb(); /* serialise with add_active_barriers */ 701 if (is_barrier(&node->base) && 702 ____active_del_barrier(ref, node, engine)) 703 goto match; 704 } 705 706 spin_unlock_irq(&ref->tree_lock); 707 708 return NULL; 709 710 match: 711 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 712 if (p == &ref->cache->node) 713 ref->cache = NULL; 714 spin_unlock_irq(&ref->tree_lock); 715 716 return rb_entry(p, struct active_node, node); 717 } 718 719 int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 720 struct intel_engine_cs *engine) 721 { 722 intel_engine_mask_t tmp, mask = engine->mask; 723 struct llist_node *first = NULL, *last = NULL; 724 struct intel_gt *gt = engine->gt; 725 int err; 726 727 GEM_BUG_ON(i915_active_is_idle(ref)); 728 729 /* Wait until the previous preallocation is completed */ 730 while (!llist_empty(&ref->preallocated_barriers)) 731 cond_resched(); 732 733 /* 734 * Preallocate a node for each physical engine supporting the target 735 * engine (remember virtual engines have more than one sibling). 736 * We can then use the preallocated nodes in 737 * i915_active_acquire_barrier() 738 */ 739 GEM_BUG_ON(!mask); 740 for_each_engine_masked(engine, gt, mask, tmp) { 741 u64 idx = engine->kernel_context->timeline->fence_context; 742 struct llist_node *prev = first; 743 struct active_node *node; 744 745 node = reuse_idle_barrier(ref, idx); 746 if (!node) { 747 #ifdef __linux__ 748 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 749 #else 750 node = pool_get(&global.slab_cache, PR_WAITOK); 751 #endif 752 if (!node) { 753 err = ENOMEM; 754 goto unwind; 755 } 756 757 RCU_INIT_POINTER(node->base.fence, NULL); 758 node->base.cb.func = node_retire; 759 node->timeline = idx; 760 node->ref = ref; 761 } 762 763 if (!i915_active_fence_isset(&node->base)) { 764 /* 765 * Mark this as being *our* unconnected proto-node. 766 * 767 * Since this node is not in any list, and we have 768 * decoupled it from the rbtree, we can reuse the 769 * request to indicate this is an idle-barrier node 770 * and then we can use the rb_node and list pointers 771 * for our tracking of the pending barrier. 772 */ 773 RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 774 node->base.cb.node.prev = (void *)engine; 775 atomic_inc(&ref->count); 776 } 777 GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 778 779 GEM_BUG_ON(barrier_to_engine(node) != engine); 780 first = barrier_to_ll(node); 781 first->next = prev; 782 if (!last) 783 last = first; 784 intel_engine_pm_get(engine); 785 } 786 787 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 788 llist_add_batch(first, last, &ref->preallocated_barriers); 789 790 return 0; 791 792 unwind: 793 while (first) { 794 struct active_node *node = barrier_from_ll(first); 795 796 first = first->next; 797 798 atomic_dec(&ref->count); 799 intel_engine_pm_put(barrier_to_engine(node)); 800 801 #ifdef __linux__ 802 kmem_cache_free(global.slab_cache, node); 803 #else 804 pool_put(&global.slab_cache, node); 805 #endif 806 } 807 return err; 808 } 809 810 void i915_active_acquire_barrier(struct i915_active *ref) 811 { 812 struct llist_node *pos, *next; 813 unsigned long flags; 814 815 GEM_BUG_ON(i915_active_is_idle(ref)); 816 817 /* 818 * Transfer the list of preallocated barriers into the 819 * i915_active rbtree, but only as proto-nodes. They will be 820 * populated by i915_request_add_active_barriers() to point to the 821 * request that will eventually release them. 822 */ 823 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 824 struct active_node *node = barrier_from_ll(pos); 825 struct intel_engine_cs *engine = barrier_to_engine(node); 826 struct rb_node **p, *parent; 827 828 spin_lock_irqsave_nested(&ref->tree_lock, flags, 829 SINGLE_DEPTH_NESTING); 830 parent = NULL; 831 p = &ref->tree.rb_node; 832 while (*p) { 833 struct active_node *it; 834 835 parent = *p; 836 837 it = rb_entry(parent, struct active_node, node); 838 if (it->timeline < node->timeline) 839 p = &parent->rb_right; 840 else 841 p = &parent->rb_left; 842 } 843 rb_link_node(&node->node, parent, p); 844 rb_insert_color(&node->node, &ref->tree); 845 spin_unlock_irqrestore(&ref->tree_lock, flags); 846 847 GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 848 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 849 intel_engine_pm_put(engine); 850 } 851 } 852 853 static struct dma_fence **ll_to_fence_slot(struct llist_node *node) 854 { 855 return __active_fence_slot(&barrier_from_ll(node)->base); 856 } 857 858 void i915_request_add_active_barriers(struct i915_request *rq) 859 { 860 struct intel_engine_cs *engine = rq->engine; 861 struct llist_node *node, *next; 862 unsigned long flags; 863 864 GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 865 GEM_BUG_ON(intel_engine_is_virtual(engine)); 866 GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 867 868 node = llist_del_all(&engine->barrier_tasks); 869 if (!node) 870 return; 871 /* 872 * Attach the list of proto-fences to the in-flight request such 873 * that the parent i915_active will be released when this request 874 * is retired. 875 */ 876 spin_lock_irqsave(&rq->lock, flags); 877 llist_for_each_safe(node, next, node) { 878 /* serialise with reuse_idle_barrier */ 879 smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 880 list_add_tail((struct list_head *)node, &rq->fence.cb_list); 881 } 882 spin_unlock_irqrestore(&rq->lock, flags); 883 } 884 885 /* 886 * __i915_active_fence_set: Update the last active fence along its timeline 887 * @active: the active tracker 888 * @fence: the new fence (under construction) 889 * 890 * Records the new @fence as the last active fence along its timeline in 891 * this active tracker, moving the tracking callbacks from the previous 892 * fence onto this one. Returns the previous fence (if not already completed), 893 * which the caller must ensure is executed before the new fence. To ensure 894 * that the order of fences within the timeline of the i915_active_fence is 895 * understood, it should be locked by the caller. 896 */ 897 struct dma_fence * 898 __i915_active_fence_set(struct i915_active_fence *active, 899 struct dma_fence *fence) 900 { 901 struct dma_fence *prev; 902 unsigned long flags; 903 904 if (fence == rcu_access_pointer(active->fence)) 905 return fence; 906 907 GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 908 909 /* 910 * Consider that we have two threads arriving (A and B), with 911 * C already resident as the active->fence. 912 * 913 * A does the xchg first, and so it sees C or NULL depending 914 * on the timing of the interrupt handler. If it is NULL, the 915 * previous fence must have been signaled and we know that 916 * we are first on the timeline. If it is still present, 917 * we acquire the lock on that fence and serialise with the interrupt 918 * handler, in the process removing it from any future interrupt 919 * callback. A will then wait on C before executing (if present). 920 * 921 * As B is second, it sees A as the previous fence and so waits for 922 * it to complete its transition and takes over the occupancy for 923 * itself -- remembering that it needs to wait on A before executing. 924 * 925 * Note the strong ordering of the timeline also provides consistent 926 * nesting rules for the fence->lock; the inner lock is always the 927 * older lock. 928 */ 929 spin_lock_irqsave(fence->lock, flags); 930 prev = xchg(__active_fence_slot(active), fence); 931 if (prev) { 932 GEM_BUG_ON(prev == fence); 933 spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 934 __list_del_entry(&active->cb.node); 935 spin_unlock(prev->lock); /* serialise with prev->cb_list */ 936 } 937 list_add_tail(&active->cb.node, &fence->cb_list); 938 spin_unlock_irqrestore(fence->lock, flags); 939 940 return prev; 941 } 942 943 int i915_active_fence_set(struct i915_active_fence *active, 944 struct i915_request *rq) 945 { 946 struct dma_fence *fence; 947 int err = 0; 948 949 /* Must maintain timeline ordering wrt previous active requests */ 950 rcu_read_lock(); 951 fence = __i915_active_fence_set(active, &rq->fence); 952 if (fence) /* but the previous fence may not belong to that timeline! */ 953 fence = dma_fence_get_rcu(fence); 954 rcu_read_unlock(); 955 if (fence) { 956 err = i915_request_await_dma_fence(rq, fence); 957 dma_fence_put(fence); 958 } 959 960 return err; 961 } 962 963 void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 964 { 965 active_fence_cb(fence, cb); 966 } 967 968 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 969 #include "selftests/i915_active.c" 970 #endif 971 972 static void i915_global_active_shrink(void) 973 { 974 #ifdef notyet 975 kmem_cache_shrink(global.slab_cache); 976 #endif 977 } 978 979 static void i915_global_active_exit(void) 980 { 981 #ifdef __linux__ 982 kmem_cache_destroy(global.slab_cache); 983 #else 984 pool_destroy(&global.slab_cache); 985 #endif 986 } 987 988 static struct i915_global_active global = { { 989 .shrink = i915_global_active_shrink, 990 .exit = i915_global_active_exit, 991 } }; 992 993 int __init i915_global_active_init(void) 994 { 995 #ifdef __linux__ 996 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 997 if (!global.slab_cache) 998 return -ENOMEM; 999 #else 1000 pool_init(&global.slab_cache, sizeof(struct active_node), 1001 CACHELINESIZE, IPL_TTY, 0, "drmsc", NULL); 1002 #endif 1003 1004 i915_global_register(&global.base); 1005 return 0; 1006 } 1007