1 /* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kthread.h> 26 #include <uapi/linux/sched/types.h> 27 28 #include "i915_drv.h" 29 30 static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) 31 { 32 struct intel_wait *wait; 33 unsigned int result = 0; 34 35 lockdep_assert_held(&b->irq_lock); 36 37 wait = b->irq_wait; 38 if (wait) { 39 result = ENGINE_WAKEUP_WAITER; 40 if (wake_up_process(wait->tsk)) 41 result |= ENGINE_WAKEUP_ASLEEP; 42 } 43 44 return result; 45 } 46 47 unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) 48 { 49 struct intel_breadcrumbs *b = &engine->breadcrumbs; 50 unsigned long flags; 51 unsigned int result; 52 53 spin_lock_irqsave(&b->irq_lock, flags); 54 result = __intel_breadcrumbs_wakeup(b); 55 spin_unlock_irqrestore(&b->irq_lock, flags); 56 57 return result; 58 } 59 60 static unsigned long wait_timeout(void) 61 { 62 return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); 63 } 64 65 static noinline void missed_breadcrumb(struct intel_engine_cs *engine) 66 { 67 DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n", 68 engine->name, __builtin_return_address(0), 69 yesno(test_bit(ENGINE_IRQ_BREADCRUMB, 70 &engine->irq_posted)), 71 intel_engine_get_seqno(engine), 72 intel_engine_last_submit(engine)); 73 74 set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 75 } 76 77 static void intel_breadcrumbs_hangcheck(struct timer_list *t) 78 { 79 struct intel_engine_cs *engine = from_timer(engine, t, 80 breadcrumbs.hangcheck); 81 struct intel_breadcrumbs *b = &engine->breadcrumbs; 82 83 if (!b->irq_armed) 84 return; 85 86 if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { 87 b->hangcheck_interrupts = atomic_read(&engine->irq_count); 88 mod_timer(&b->hangcheck, wait_timeout()); 89 return; 90 } 91 92 /* We keep the hangcheck timer alive until we disarm the irq, even 93 * if there are no waiters at present. 94 * 95 * If the waiter was currently running, assume it hasn't had a chance 96 * to process the pending interrupt (e.g, low priority task on a loaded 97 * system) and wait until it sleeps before declaring a missed interrupt. 98 * 99 * If the waiter was asleep (and not even pending a wakeup), then we 100 * must have missed an interrupt as the GPU has stopped advancing 101 * but we still have a waiter. Assuming all batches complete within 102 * DRM_I915_HANGCHECK_JIFFIES [1.5s]! 103 */ 104 if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { 105 missed_breadcrumb(engine); 106 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 107 } else { 108 mod_timer(&b->hangcheck, wait_timeout()); 109 } 110 } 111 112 static void intel_breadcrumbs_fake_irq(struct timer_list *t) 113 { 114 struct intel_engine_cs *engine = from_timer(engine, t, 115 breadcrumbs.fake_irq); 116 struct intel_breadcrumbs *b = &engine->breadcrumbs; 117 118 /* The timer persists in case we cannot enable interrupts, 119 * or if we have previously seen seqno/interrupt incoherency 120 * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). 121 * Here the worker will wake up every jiffie in order to kick the 122 * oldest waiter to do the coherent seqno check. 123 */ 124 125 spin_lock_irq(&b->irq_lock); 126 if (!__intel_breadcrumbs_wakeup(b)) 127 __intel_engine_disarm_breadcrumbs(engine); 128 spin_unlock_irq(&b->irq_lock); 129 if (!b->irq_armed) 130 return; 131 132 mod_timer(&b->fake_irq, jiffies + 1); 133 134 /* Ensure that even if the GPU hangs, we get woken up. 135 * 136 * However, note that if no one is waiting, we never notice 137 * a gpu hang. Eventually, we will have to wait for a resource 138 * held by the GPU and so trigger a hangcheck. In the most 139 * pathological case, this will be upon memory starvation! To 140 * prevent this, we also queue the hangcheck from the retire 141 * worker. 142 */ 143 i915_queue_hangcheck(engine->i915); 144 } 145 146 static void irq_enable(struct intel_engine_cs *engine) 147 { 148 /* Enabling the IRQ may miss the generation of the interrupt, but 149 * we still need to force the barrier before reading the seqno, 150 * just in case. 151 */ 152 set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 153 154 /* Caller disables interrupts */ 155 lockmgr(&engine->i915->irq_lock, LK_EXCLUSIVE); 156 engine->irq_enable(engine); 157 lockmgr(&engine->i915->irq_lock, LK_RELEASE); 158 } 159 160 static void irq_disable(struct intel_engine_cs *engine) 161 { 162 /* Caller disables interrupts */ 163 lockmgr(&engine->i915->irq_lock, LK_EXCLUSIVE); 164 engine->irq_disable(engine); 165 lockmgr(&engine->i915->irq_lock, LK_RELEASE); 166 } 167 168 #pragma GCC diagnostic ignored "-Wdiscarded-qualifiers" 169 170 void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) 171 { 172 struct intel_breadcrumbs *b = &engine->breadcrumbs; 173 174 lockdep_assert_held(&b->irq_lock); 175 GEM_BUG_ON(b->irq_wait); 176 177 if (b->irq_enabled) { 178 irq_disable(engine); 179 b->irq_enabled = false; 180 } 181 182 b->irq_armed = false; 183 } 184 185 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) 186 { 187 struct intel_breadcrumbs *b = &engine->breadcrumbs; 188 struct intel_wait *wait, *n, *first; 189 190 if (!b->irq_armed) 191 goto wakeup_signaler; 192 193 /* We only disarm the irq when we are idle (all requests completed), 194 * so if the bottom-half remains asleep, it missed the request 195 * completion. 196 */ 197 198 spin_lock_irq(&b->rb_lock); 199 200 lockmgr(&b->irq_lock, LK_EXCLUSIVE); 201 first = fetch_and_zero(&b->irq_wait); 202 __intel_engine_disarm_breadcrumbs(engine); 203 lockmgr(&b->irq_lock, LK_RELEASE); 204 205 rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { 206 RB_CLEAR_NODE(&wait->node); 207 if (wake_up_process(wait->tsk) && wait == first) 208 missed_breadcrumb(engine); 209 } 210 b->waiters = LINUX_RB_ROOT; 211 212 spin_unlock_irq(&b->rb_lock); 213 214 /* 215 * The signaling thread may be asleep holding a reference to a request, 216 * that had its signaling cancelled prior to being preempted. We need 217 * to kick the signaler, just in case, to release any such reference. 218 */ 219 wakeup_signaler: 220 wake_up_process(b->signaler); 221 } 222 223 static bool use_fake_irq(const struct intel_breadcrumbs *b) 224 { 225 const struct intel_engine_cs *engine = 226 container_of(b, struct intel_engine_cs, breadcrumbs); 227 228 if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) 229 return false; 230 231 /* Only start with the heavy weight fake irq timer if we have not 232 * seen any interrupts since enabling it the first time. If the 233 * interrupts are still arriving, it means we made a mistake in our 234 * engine->seqno_barrier(), a timing error that should be transient 235 * and unlikely to reoccur. 236 */ 237 return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; 238 } 239 240 static void enable_fake_irq(struct intel_breadcrumbs *b) 241 { 242 /* Ensure we never sleep indefinitely */ 243 if (!b->irq_enabled || use_fake_irq(b)) 244 mod_timer(&b->fake_irq, jiffies + 1); 245 else 246 mod_timer(&b->hangcheck, wait_timeout()); 247 } 248 249 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) 250 { 251 struct intel_engine_cs *engine = 252 container_of(b, struct intel_engine_cs, breadcrumbs); 253 struct drm_i915_private *i915 = engine->i915; 254 255 lockdep_assert_held(&b->irq_lock); 256 if (b->irq_armed) 257 return false; 258 259 /* The breadcrumb irq will be disarmed on the interrupt after the 260 * waiters are signaled. This gives us a single interrupt window in 261 * which we can add a new waiter and avoid the cost of re-enabling 262 * the irq. 263 */ 264 b->irq_armed = true; 265 GEM_BUG_ON(b->irq_enabled); 266 267 if (I915_SELFTEST_ONLY(b->mock)) { 268 /* For our mock objects we want to avoid interaction 269 * with the real hardware (which is not set up). So 270 * we simply pretend we have enabled the powerwell 271 * and the irq, and leave it up to the mock 272 * implementation to call intel_engine_wakeup() 273 * itself when it wants to simulate a user interrupt, 274 */ 275 return true; 276 } 277 278 /* Since we are waiting on a request, the GPU should be busy 279 * and should have its own rpm reference. This is tracked 280 * by i915->gt.awake, we can forgo holding our own wakref 281 * for the interrupt as before i915->gt.awake is released (when 282 * the driver is idle) we disarm the breadcrumbs. 283 */ 284 285 /* No interrupts? Kick the waiter every jiffie! */ 286 if (intel_irqs_enabled(i915)) { 287 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) 288 irq_enable(engine); 289 b->irq_enabled = true; 290 } 291 292 enable_fake_irq(b); 293 return true; 294 } 295 296 static inline struct intel_wait *to_wait(struct rb_node *node) 297 { 298 return rb_entry(node, struct intel_wait, node); 299 } 300 301 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, 302 struct intel_wait *wait) 303 { 304 lockdep_assert_held(&b->rb_lock); 305 GEM_BUG_ON(b->irq_wait == wait); 306 307 /* This request is completed, so remove it from the tree, mark it as 308 * complete, and *then* wake up the associated task. N.B. when the 309 * task wakes up, it will find the empty rb_node, discern that it 310 * has already been removed from the tree and skip the serialisation 311 * of the b->rb_lock and b->irq_lock. This means that the destruction 312 * of the intel_wait is not serialised with the interrupt handler 313 * by the waiter - it must instead be serialised by the caller. 314 */ 315 rb_erase(&wait->node, &b->waiters); 316 RB_CLEAR_NODE(&wait->node); 317 318 wake_up_process(wait->tsk); /* implicit smp_wmb() */ 319 } 320 321 static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, 322 struct rb_node *next) 323 { 324 struct intel_breadcrumbs *b = &engine->breadcrumbs; 325 326 lockmgr(&b->irq_lock, LK_EXCLUSIVE); 327 GEM_BUG_ON(!b->irq_armed); 328 GEM_BUG_ON(!b->irq_wait); 329 b->irq_wait = to_wait(next); 330 lockmgr(&b->irq_lock, LK_RELEASE); 331 332 /* We always wake up the next waiter that takes over as the bottom-half 333 * as we may delegate not only the irq-seqno barrier to the next waiter 334 * but also the task of waking up concurrent waiters. 335 */ 336 if (next) 337 wake_up_process(to_wait(next)->tsk); 338 } 339 340 static bool __intel_engine_add_wait(struct intel_engine_cs *engine, 341 struct intel_wait *wait) 342 { 343 struct intel_breadcrumbs *b = &engine->breadcrumbs; 344 struct rb_node **p, *parent, *completed; 345 bool first, armed; 346 u32 seqno; 347 348 /* Insert the request into the retirement ordered list 349 * of waiters by walking the rbtree. If we are the oldest 350 * seqno in the tree (the first to be retired), then 351 * set ourselves as the bottom-half. 352 * 353 * As we descend the tree, prune completed branches since we hold the 354 * spinlock we know that the first_waiter must be delayed and can 355 * reduce some of the sequential wake up latency if we take action 356 * ourselves and wake up the completed tasks in parallel. Also, by 357 * removing stale elements in the tree, we may be able to reduce the 358 * ping-pong between the old bottom-half and ourselves as first-waiter. 359 */ 360 armed = false; 361 first = true; 362 parent = NULL; 363 completed = NULL; 364 seqno = intel_engine_get_seqno(engine); 365 366 /* If the request completed before we managed to grab the spinlock, 367 * return now before adding ourselves to the rbtree. We let the 368 * current bottom-half handle any pending wakeups and instead 369 * try and get out of the way quickly. 370 */ 371 if (i915_seqno_passed(seqno, wait->seqno)) { 372 RB_CLEAR_NODE(&wait->node); 373 return first; 374 } 375 376 p = &b->waiters.rb_node; 377 while (*p) { 378 parent = *p; 379 if (wait->seqno == to_wait(parent)->seqno) { 380 /* We have multiple waiters on the same seqno, select 381 * the highest priority task (that with the smallest 382 * task->prio) to serve as the bottom-half for this 383 * group. 384 */ 385 if (wait->tsk->prio > to_wait(parent)->tsk->prio) { 386 p = &parent->rb_right; 387 first = false; 388 } else { 389 p = &parent->rb_left; 390 } 391 } else if (i915_seqno_passed(wait->seqno, 392 to_wait(parent)->seqno)) { 393 p = &parent->rb_right; 394 if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) 395 completed = parent; 396 else 397 first = false; 398 } else { 399 p = &parent->rb_left; 400 } 401 } 402 rb_link_node(&wait->node, parent, p); 403 rb_insert_color(&wait->node, &b->waiters); 404 405 if (first) { 406 lockmgr(&b->irq_lock, LK_EXCLUSIVE); 407 b->irq_wait = wait; 408 /* After assigning ourselves as the new bottom-half, we must 409 * perform a cursory check to prevent a missed interrupt. 410 * Either we miss the interrupt whilst programming the hardware, 411 * or if there was a previous waiter (for a later seqno) they 412 * may be woken instead of us (due to the inherent race 413 * in the unlocked read of b->irq_seqno_bh in the irq handler) 414 * and so we miss the wake up. 415 */ 416 armed = __intel_breadcrumbs_enable_irq(b); 417 lockmgr(&b->irq_lock, LK_RELEASE); 418 } 419 420 if (completed) { 421 /* Advance the bottom-half (b->irq_wait) before we wake up 422 * the waiters who may scribble over their intel_wait 423 * just as the interrupt handler is dereferencing it via 424 * b->irq_wait. 425 */ 426 if (!first) { 427 struct rb_node *next = rb_next(completed); 428 GEM_BUG_ON(next == &wait->node); 429 __intel_breadcrumbs_next(engine, next); 430 } 431 432 do { 433 struct intel_wait *crumb = to_wait(completed); 434 completed = rb_prev(completed); 435 __intel_breadcrumbs_finish(b, crumb); 436 } while (completed); 437 } 438 439 GEM_BUG_ON(!b->irq_wait); 440 GEM_BUG_ON(!b->irq_armed); 441 GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); 442 443 return armed; 444 } 445 446 bool intel_engine_add_wait(struct intel_engine_cs *engine, 447 struct intel_wait *wait) 448 { 449 struct intel_breadcrumbs *b = &engine->breadcrumbs; 450 bool armed; 451 452 spin_lock_irq(&b->rb_lock); 453 armed = __intel_engine_add_wait(engine, wait); 454 spin_unlock_irq(&b->rb_lock); 455 if (armed) 456 return armed; 457 458 /* Make the caller recheck if its request has already started. */ 459 return i915_seqno_passed(intel_engine_get_seqno(engine), 460 wait->seqno - 1); 461 } 462 463 static inline bool chain_wakeup(struct rb_node *rb, int priority) 464 { 465 return rb && to_wait(rb)->tsk->prio <= priority; 466 } 467 468 static inline int wakeup_priority(struct intel_breadcrumbs *b, 469 struct task_struct *tsk) 470 { 471 if (tsk == b->signaler) 472 return INT_MIN; 473 else 474 return tsk->prio; 475 } 476 477 static void __intel_engine_remove_wait(struct intel_engine_cs *engine, 478 struct intel_wait *wait) 479 { 480 struct intel_breadcrumbs *b = &engine->breadcrumbs; 481 482 lockdep_assert_held(&b->rb_lock); 483 484 if (RB_EMPTY_NODE(&wait->node)) 485 goto out; 486 487 if (b->irq_wait == wait) { 488 const int priority = wakeup_priority(b, wait->tsk); 489 struct rb_node *next; 490 491 /* We are the current bottom-half. Find the next candidate, 492 * the first waiter in the queue on the remaining oldest 493 * request. As multiple seqnos may complete in the time it 494 * takes us to wake up and find the next waiter, we have to 495 * wake up that waiter for it to perform its own coherent 496 * completion check. 497 */ 498 next = rb_next(&wait->node); 499 if (chain_wakeup(next, priority)) { 500 /* If the next waiter is already complete, 501 * wake it up and continue onto the next waiter. So 502 * if have a small herd, they will wake up in parallel 503 * rather than sequentially, which should reduce 504 * the overall latency in waking all the completed 505 * clients. 506 * 507 * However, waking up a chain adds extra latency to 508 * the first_waiter. This is undesirable if that 509 * waiter is a high priority task. 510 */ 511 u32 seqno = intel_engine_get_seqno(engine); 512 513 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { 514 struct rb_node *n = rb_next(next); 515 516 __intel_breadcrumbs_finish(b, to_wait(next)); 517 next = n; 518 if (!chain_wakeup(next, priority)) 519 break; 520 } 521 } 522 523 __intel_breadcrumbs_next(engine, next); 524 } else { 525 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); 526 } 527 528 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); 529 rb_erase(&wait->node, &b->waiters); 530 RB_CLEAR_NODE(&wait->node); 531 532 out: 533 GEM_BUG_ON(b->irq_wait == wait); 534 GEM_BUG_ON(rb_first(&b->waiters) != 535 (b->irq_wait ? &b->irq_wait->node : NULL)); 536 } 537 538 void intel_engine_remove_wait(struct intel_engine_cs *engine, 539 struct intel_wait *wait) 540 { 541 struct intel_breadcrumbs *b = &engine->breadcrumbs; 542 543 /* Quick check to see if this waiter was already decoupled from 544 * the tree by the bottom-half to avoid contention on the spinlock 545 * by the herd. 546 */ 547 if (RB_EMPTY_NODE(&wait->node)) { 548 GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait); 549 return; 550 } 551 552 spin_lock_irq(&b->rb_lock); 553 __intel_engine_remove_wait(engine, wait); 554 spin_unlock_irq(&b->rb_lock); 555 } 556 557 static bool signal_complete(const struct drm_i915_gem_request *request) 558 { 559 if (!request) 560 return false; 561 562 /* 563 * Carefully check if the request is complete, giving time for the 564 * seqno to be visible or if the GPU hung. 565 */ 566 return __i915_request_irq_complete(request); 567 } 568 569 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) 570 { 571 return rb_entry(rb, struct drm_i915_gem_request, signaling.node); 572 } 573 574 static void signaler_set_rtpriority(void) 575 { 576 struct sched_param param = { .sched_priority = 1 }; 577 578 sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); 579 } 580 581 static int intel_breadcrumbs_signaler(void *arg) 582 { 583 struct intel_engine_cs *engine = arg; 584 struct intel_breadcrumbs *b = &engine->breadcrumbs; 585 struct drm_i915_gem_request *request; 586 587 /* Install ourselves with high priority to reduce signalling latency */ 588 signaler_set_rtpriority(); 589 590 do { 591 bool do_schedule = true; 592 593 set_current_state(TASK_INTERRUPTIBLE); 594 595 /* We are either woken up by the interrupt bottom-half, 596 * or by a client adding a new signaller. In both cases, 597 * the GPU seqno may have advanced beyond our oldest signal. 598 * If it has, propagate the signal, remove the waiter and 599 * check again with the next oldest signal. Otherwise we 600 * need to wait for a new interrupt from the GPU or for 601 * a new client. 602 */ 603 rcu_read_lock(); 604 request = rcu_dereference(b->first_signal); 605 if (request) 606 request = i915_gem_request_get_rcu(request); 607 rcu_read_unlock(); 608 if (signal_complete(request)) { 609 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 610 &request->fence.flags)) { 611 local_bh_disable(); 612 dma_fence_signal(&request->fence); 613 GEM_BUG_ON(!i915_gem_request_completed(request)); 614 local_bh_enable(); /* kick start the tasklets */ 615 } 616 617 spin_lock_irq(&b->rb_lock); 618 619 /* Wake up all other completed waiters and select the 620 * next bottom-half for the next user interrupt. 621 */ 622 __intel_engine_remove_wait(engine, 623 &request->signaling.wait); 624 625 /* Find the next oldest signal. Note that as we have 626 * not been holding the lock, another client may 627 * have installed an even older signal than the one 628 * we just completed - so double check we are still 629 * the oldest before picking the next one. 630 */ 631 if (request == rcu_access_pointer(b->first_signal)) { 632 struct rb_node *rb = 633 rb_next(&request->signaling.node); 634 rcu_assign_pointer(b->first_signal, 635 rb ? to_signaler(rb) : NULL); 636 } 637 rb_erase(&request->signaling.node, &b->signals); 638 RB_CLEAR_NODE(&request->signaling.node); 639 640 spin_unlock_irq(&b->rb_lock); 641 642 i915_gem_request_put(request); 643 644 /* If the engine is saturated we may be continually 645 * processing completed requests. This angers the 646 * NMI watchdog if we never let anything else 647 * have access to the CPU. Let's pretend to be nice 648 * and relinquish the CPU if we burn through the 649 * entire RT timeslice! 650 */ 651 do_schedule = need_resched(); 652 } 653 654 if (unlikely(do_schedule)) { 655 if (kthread_should_park()) 656 kthread_parkme(); 657 658 if (unlikely(kthread_should_stop())) { 659 i915_gem_request_put(request); 660 break; 661 } 662 663 schedule(); 664 } 665 i915_gem_request_put(request); 666 } while (1); 667 __set_current_state(TASK_RUNNING); 668 669 return 0; 670 } 671 672 void intel_engine_enable_signaling(struct drm_i915_gem_request *request, 673 bool wakeup) 674 { 675 struct intel_engine_cs *engine = request->engine; 676 struct intel_breadcrumbs *b = &engine->breadcrumbs; 677 u32 seqno; 678 679 /* Note that we may be called from an interrupt handler on another 680 * device (e.g. nouveau signaling a fence completion causing us 681 * to submit a request, and so enable signaling). As such, 682 * we need to make sure that all other users of b->rb_lock protect 683 * against interrupts, i.e. use spin_lock_irqsave. 684 */ 685 686 /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ 687 // GEM_BUG_ON(!irqs_disabled()); 688 lockdep_assert_held(&request->lock); 689 690 seqno = i915_gem_request_global_seqno(request); 691 if (!seqno) 692 return; 693 694 request->signaling.wait.tsk = b->signaler; 695 request->signaling.wait.request = request; 696 request->signaling.wait.seqno = seqno; 697 i915_gem_request_get(request); 698 699 lockmgr(&b->rb_lock, LK_EXCLUSIVE); 700 701 /* First add ourselves into the list of waiters, but register our 702 * bottom-half as the signaller thread. As per usual, only the oldest 703 * waiter (not just signaller) is tasked as the bottom-half waking 704 * up all completed waiters after the user interrupt. 705 * 706 * If we are the oldest waiter, enable the irq (after which we 707 * must double check that the seqno did not complete). 708 */ 709 wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); 710 711 if (!__i915_gem_request_completed(request, seqno)) { 712 struct rb_node *parent, **p; 713 bool first; 714 715 /* Now insert ourselves into the retirement ordered list of 716 * signals on this engine. We track the oldest seqno as that 717 * will be the first signal to complete. 718 */ 719 parent = NULL; 720 first = true; 721 p = &b->signals.rb_node; 722 while (*p) { 723 parent = *p; 724 if (i915_seqno_passed(seqno, 725 to_signaler(parent)->signaling.wait.seqno)) { 726 p = &parent->rb_right; 727 first = false; 728 } else { 729 p = &parent->rb_left; 730 } 731 } 732 rb_link_node(&request->signaling.node, parent, p); 733 rb_insert_color(&request->signaling.node, &b->signals); 734 if (first) 735 rcu_assign_pointer(b->first_signal, request); 736 } else { 737 __intel_engine_remove_wait(engine, &request->signaling.wait); 738 i915_gem_request_put(request); 739 wakeup = false; 740 } 741 742 lockmgr(&b->rb_lock, LK_RELEASE); 743 744 if (wakeup) 745 wake_up_process(b->signaler); 746 } 747 748 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) 749 { 750 struct intel_engine_cs *engine = request->engine; 751 struct intel_breadcrumbs *b = &engine->breadcrumbs; 752 753 GEM_BUG_ON(!irqs_disabled()); 754 lockdep_assert_held(&request->lock); 755 GEM_BUG_ON(!request->signaling.wait.seqno); 756 757 lockmgr(&b->rb_lock, LK_EXCLUSIVE); 758 759 if (!RB_EMPTY_NODE(&request->signaling.node)) { 760 if (request == rcu_access_pointer(b->first_signal)) { 761 struct rb_node *rb = 762 rb_next(&request->signaling.node); 763 rcu_assign_pointer(b->first_signal, 764 rb ? to_signaler(rb) : NULL); 765 } 766 rb_erase(&request->signaling.node, &b->signals); 767 RB_CLEAR_NODE(&request->signaling.node); 768 i915_gem_request_put(request); 769 } 770 771 __intel_engine_remove_wait(engine, &request->signaling.wait); 772 773 lockmgr(&b->rb_lock, LK_RELEASE); 774 775 request->signaling.wait.seqno = 0; 776 } 777 778 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) 779 { 780 struct intel_breadcrumbs *b = &engine->breadcrumbs; 781 struct task_struct *tsk; 782 783 lockinit(&b->rb_lock, "i9brbl", 0, 0); 784 lockinit(&b->irq_lock, "i91bil", 0, 0); 785 786 timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); 787 timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); 788 789 /* Spawn a thread to provide a common bottom-half for all signals. 790 * As this is an asynchronous interface we cannot steal the current 791 * task for handling the bottom-half to the user interrupt, therefore 792 * we create a thread to do the coherent seqno dance after the 793 * interrupt and then signal the waitqueue (via the dma-buf/fence). 794 */ 795 tsk = kthread_run(intel_breadcrumbs_signaler, engine, 796 "i915/signal:%d", engine->id); 797 if (IS_ERR(tsk)) 798 return PTR_ERR(tsk); 799 800 b->signaler = tsk; 801 802 return 0; 803 } 804 805 static void cancel_fake_irq(struct intel_engine_cs *engine) 806 { 807 struct intel_breadcrumbs *b = &engine->breadcrumbs; 808 809 del_timer_sync(&b->hangcheck); 810 del_timer_sync(&b->fake_irq); 811 clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 812 } 813 814 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) 815 { 816 struct intel_breadcrumbs *b = &engine->breadcrumbs; 817 818 cancel_fake_irq(engine); 819 spin_lock_irq(&b->irq_lock); 820 821 if (b->irq_enabled) 822 irq_enable(engine); 823 else 824 irq_disable(engine); 825 826 /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the 827 * GPU is active and may have already executed the MI_USER_INTERRUPT 828 * before the CPU is ready to receive. However, the engine is currently 829 * idle (we haven't started it yet), there is no possibility for a 830 * missed interrupt as we enabled the irq and so we can clear the 831 * immediate wakeup (until a real interrupt arrives for the waiter). 832 */ 833 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); 834 835 if (b->irq_armed) 836 enable_fake_irq(b); 837 838 spin_unlock_irq(&b->irq_lock); 839 } 840 841 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) 842 { 843 struct intel_breadcrumbs *b = &engine->breadcrumbs; 844 845 /* The engines should be idle and all requests accounted for! */ 846 WARN_ON(READ_ONCE(b->irq_wait)); 847 WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); 848 WARN_ON(rcu_access_pointer(b->first_signal)); 849 WARN_ON(!RB_EMPTY_ROOT(&b->signals)); 850 851 if (!IS_ERR_OR_NULL(b->signaler)) 852 kthread_stop(b->signaler); 853 854 cancel_fake_irq(engine); 855 } 856 857 bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) 858 { 859 struct intel_breadcrumbs *b = &engine->breadcrumbs; 860 bool busy = false; 861 862 spin_lock_irq(&b->rb_lock); 863 864 if (b->irq_wait) { 865 wake_up_process(b->irq_wait->tsk); 866 busy = true; 867 } 868 869 if (rcu_access_pointer(b->first_signal)) { 870 wake_up_process(b->signaler); 871 busy = true; 872 } 873 874 spin_unlock_irq(&b->rb_lock); 875 876 return busy; 877 } 878 879 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 880 #include "selftests/intel_breadcrumbs.c" 881 #endif 882