1 /* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kthread.h> 26 27 #include "i915_drv.h" 28 29 static void intel_breadcrumbs_hangcheck(unsigned long data) 30 { 31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data; 32 struct intel_breadcrumbs *b = &engine->breadcrumbs; 33 34 if (!b->irq_enabled) 35 return; 36 37 if (time_before(jiffies, b->timeout)) { 38 mod_timer(&b->hangcheck, b->timeout); 39 return; 40 } 41 42 DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); 43 set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 44 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 45 46 /* Ensure that even if the GPU hangs, we get woken up. 47 * 48 * However, note that if no one is waiting, we never notice 49 * a gpu hang. Eventually, we will have to wait for a resource 50 * held by the GPU and so trigger a hangcheck. In the most 51 * pathological case, this will be upon memory starvation! To 52 * prevent this, we also queue the hangcheck from the retire 53 * worker. 54 */ 55 i915_queue_hangcheck(engine->i915); 56 } 57 58 static unsigned long wait_timeout(void) 59 { 60 return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); 61 } 62 63 static void intel_breadcrumbs_fake_irq(unsigned long data) 64 { 65 struct intel_engine_cs *engine = (struct intel_engine_cs *)data; 66 67 /* 68 * The timer persists in case we cannot enable interrupts, 69 * or if we have previously seen seqno/interrupt incoherency 70 * ("missed interrupt" syndrome). Here the worker will wake up 71 * every jiffie in order to kick the oldest waiter to do the 72 * coherent seqno check. 73 */ 74 if (intel_engine_wakeup(engine)) 75 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 76 } 77 78 static void irq_enable(struct intel_engine_cs *engine) 79 { 80 /* Enabling the IRQ may miss the generation of the interrupt, but 81 * we still need to force the barrier before reading the seqno, 82 * just in case. 83 */ 84 engine->breadcrumbs.irq_posted = true; 85 86 /* Caller disables interrupts */ 87 lockmgr(&engine->i915->irq_lock, LK_EXCLUSIVE); 88 engine->irq_enable(engine); 89 lockmgr(&engine->i915->irq_lock, LK_RELEASE); 90 } 91 92 static void irq_disable(struct intel_engine_cs *engine) 93 { 94 /* Caller disables interrupts */ 95 lockmgr(&engine->i915->irq_lock, LK_EXCLUSIVE); 96 engine->irq_disable(engine); 97 lockmgr(&engine->i915->irq_lock, LK_RELEASE); 98 99 engine->breadcrumbs.irq_posted = false; 100 } 101 102 static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) 103 { 104 struct intel_engine_cs *engine = 105 container_of(b, struct intel_engine_cs, breadcrumbs); 106 struct drm_i915_private *i915 = engine->i915; 107 108 assert_spin_locked(&b->lock); 109 if (b->rpm_wakelock) 110 return; 111 112 /* Since we are waiting on a request, the GPU should be busy 113 * and should have its own rpm reference. For completeness, 114 * record an rpm reference for ourselves to cover the 115 * interrupt we unmask. 116 */ 117 intel_runtime_pm_get_noresume(i915); 118 b->rpm_wakelock = true; 119 120 /* No interrupts? Kick the waiter every jiffie! */ 121 if (intel_irqs_enabled(i915)) { 122 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) 123 irq_enable(engine); 124 b->irq_enabled = true; 125 } 126 127 if (!b->irq_enabled || 128 test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { 129 mod_timer(&b->fake_irq, jiffies + 1); 130 } else { 131 /* Ensure we never sleep indefinitely */ 132 GEM_BUG_ON(!time_after(b->timeout, jiffies)); 133 mod_timer(&b->hangcheck, b->timeout); 134 } 135 } 136 137 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) 138 { 139 struct intel_engine_cs *engine = 140 container_of(b, struct intel_engine_cs, breadcrumbs); 141 142 assert_spin_locked(&b->lock); 143 if (!b->rpm_wakelock) 144 return; 145 146 if (b->irq_enabled) { 147 irq_disable(engine); 148 b->irq_enabled = false; 149 } 150 151 intel_runtime_pm_put(engine->i915); 152 b->rpm_wakelock = false; 153 } 154 155 static inline struct intel_wait *to_wait(struct rb_node *node) 156 { 157 return container_of(node, struct intel_wait, node); 158 } 159 160 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, 161 struct intel_wait *wait) 162 { 163 assert_spin_locked(&b->lock); 164 165 /* This request is completed, so remove it from the tree, mark it as 166 * complete, and *then* wake up the associated task. 167 */ 168 rb_erase(&wait->node, &b->waiters); 169 RB_CLEAR_NODE(&wait->node); 170 171 wake_up_process(wait->tsk); /* implicit smp_wmb() */ 172 } 173 174 static bool __intel_engine_add_wait(struct intel_engine_cs *engine, 175 struct intel_wait *wait) 176 { 177 struct intel_breadcrumbs *b = &engine->breadcrumbs; 178 struct rb_node **p, *parent, *completed; 179 bool first; 180 u32 seqno; 181 182 /* Insert the request into the retirement ordered list 183 * of waiters by walking the rbtree. If we are the oldest 184 * seqno in the tree (the first to be retired), then 185 * set ourselves as the bottom-half. 186 * 187 * As we descend the tree, prune completed branches since we hold the 188 * spinlock we know that the first_waiter must be delayed and can 189 * reduce some of the sequential wake up latency if we take action 190 * ourselves and wake up the completed tasks in parallel. Also, by 191 * removing stale elements in the tree, we may be able to reduce the 192 * ping-pong between the old bottom-half and ourselves as first-waiter. 193 */ 194 first = true; 195 parent = NULL; 196 completed = NULL; 197 seqno = intel_engine_get_seqno(engine); 198 199 /* If the request completed before we managed to grab the spinlock, 200 * return now before adding ourselves to the rbtree. We let the 201 * current bottom-half handle any pending wakeups and instead 202 * try and get out of the way quickly. 203 */ 204 if (i915_seqno_passed(seqno, wait->seqno)) { 205 RB_CLEAR_NODE(&wait->node); 206 return first; 207 } 208 209 p = &b->waiters.rb_node; 210 while (*p) { 211 parent = *p; 212 if (wait->seqno == to_wait(parent)->seqno) { 213 /* We have multiple waiters on the same seqno, select 214 * the highest priority task (that with the smallest 215 * task->prio) to serve as the bottom-half for this 216 * group. 217 */ 218 if (wait->tsk->prio > to_wait(parent)->tsk->prio) { 219 p = &parent->rb_right; 220 first = false; 221 } else { 222 p = &parent->rb_left; 223 } 224 } else if (i915_seqno_passed(wait->seqno, 225 to_wait(parent)->seqno)) { 226 p = &parent->rb_right; 227 if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) 228 completed = parent; 229 else 230 first = false; 231 } else { 232 p = &parent->rb_left; 233 } 234 } 235 rb_link_node(&wait->node, parent, p); 236 rb_insert_color(&wait->node, &b->waiters); 237 GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); 238 239 if (completed) { 240 struct rb_node *next = rb_next(completed); 241 242 GEM_BUG_ON(!next && !first); 243 if (next && next != &wait->node) { 244 GEM_BUG_ON(first); 245 b->timeout = wait_timeout(); 246 b->first_wait = to_wait(next); 247 rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); 248 /* As there is a delay between reading the current 249 * seqno, processing the completed tasks and selecting 250 * the next waiter, we may have missed the interrupt 251 * and so need for the next bottom-half to wakeup. 252 * 253 * Also as we enable the IRQ, we may miss the 254 * interrupt for that seqno, so we have to wake up 255 * the next bottom-half in order to do a coherent check 256 * in case the seqno passed. 257 */ 258 __intel_breadcrumbs_enable_irq(b); 259 if (READ_ONCE(b->irq_posted)) 260 wake_up_process(to_wait(next)->tsk); 261 } 262 263 do { 264 struct intel_wait *crumb = to_wait(completed); 265 completed = rb_prev(completed); 266 __intel_breadcrumbs_finish(b, crumb); 267 } while (completed); 268 } 269 270 if (first) { 271 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); 272 b->timeout = wait_timeout(); 273 b->first_wait = wait; 274 rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); 275 /* After assigning ourselves as the new bottom-half, we must 276 * perform a cursory check to prevent a missed interrupt. 277 * Either we miss the interrupt whilst programming the hardware, 278 * or if there was a previous waiter (for a later seqno) they 279 * may be woken instead of us (due to the inherent race 280 * in the unlocked read of b->irq_seqno_bh in the irq handler) 281 * and so we miss the wake up. 282 */ 283 __intel_breadcrumbs_enable_irq(b); 284 } 285 GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); 286 GEM_BUG_ON(!b->first_wait); 287 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); 288 289 return first; 290 } 291 292 bool intel_engine_add_wait(struct intel_engine_cs *engine, 293 struct intel_wait *wait) 294 { 295 struct intel_breadcrumbs *b = &engine->breadcrumbs; 296 bool first; 297 298 spin_lock_irq(&b->lock); 299 first = __intel_engine_add_wait(engine, wait); 300 spin_unlock_irq(&b->lock); 301 302 return first; 303 } 304 305 static inline bool chain_wakeup(struct rb_node *rb, int priority) 306 { 307 return rb && to_wait(rb)->tsk->prio <= priority; 308 } 309 310 static inline int wakeup_priority(struct intel_breadcrumbs *b, 311 struct task_struct *tsk) 312 { 313 if (tsk == b->signaler) 314 return INT_MIN; 315 else 316 return tsk->prio; 317 } 318 319 void intel_engine_remove_wait(struct intel_engine_cs *engine, 320 struct intel_wait *wait) 321 { 322 struct intel_breadcrumbs *b = &engine->breadcrumbs; 323 324 /* Quick check to see if this waiter was already decoupled from 325 * the tree by the bottom-half to avoid contention on the spinlock 326 * by the herd. 327 */ 328 if (RB_EMPTY_NODE(&wait->node)) 329 return; 330 331 spin_lock_irq(&b->lock); 332 333 if (RB_EMPTY_NODE(&wait->node)) 334 goto out_unlock; 335 336 if (b->first_wait == wait) { 337 const int priority = wakeup_priority(b, wait->tsk); 338 struct rb_node *next; 339 340 GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); 341 342 /* We are the current bottom-half. Find the next candidate, 343 * the first waiter in the queue on the remaining oldest 344 * request. As multiple seqnos may complete in the time it 345 * takes us to wake up and find the next waiter, we have to 346 * wake up that waiter for it to perform its own coherent 347 * completion check. 348 */ 349 next = rb_next(&wait->node); 350 if (chain_wakeup(next, priority)) { 351 /* If the next waiter is already complete, 352 * wake it up and continue onto the next waiter. So 353 * if have a small herd, they will wake up in parallel 354 * rather than sequentially, which should reduce 355 * the overall latency in waking all the completed 356 * clients. 357 * 358 * However, waking up a chain adds extra latency to 359 * the first_waiter. This is undesirable if that 360 * waiter is a high priority task. 361 */ 362 u32 seqno = intel_engine_get_seqno(engine); 363 364 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { 365 struct rb_node *n = rb_next(next); 366 367 __intel_breadcrumbs_finish(b, to_wait(next)); 368 next = n; 369 if (!chain_wakeup(next, priority)) 370 break; 371 } 372 } 373 374 if (next) { 375 /* In our haste, we may have completed the first waiter 376 * before we enabled the interrupt. Do so now as we 377 * have a second waiter for a future seqno. Afterwards, 378 * we have to wake up that waiter in case we missed 379 * the interrupt, or if we have to handle an 380 * exception rather than a seqno completion. 381 */ 382 b->timeout = wait_timeout(); 383 b->first_wait = to_wait(next); 384 rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); 385 if (b->first_wait->seqno != wait->seqno) 386 __intel_breadcrumbs_enable_irq(b); 387 wake_up_process(b->first_wait->tsk); 388 } else { 389 b->first_wait = NULL; 390 rcu_assign_pointer(b->irq_seqno_bh, NULL); 391 __intel_breadcrumbs_disable_irq(b); 392 } 393 } else { 394 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); 395 } 396 397 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); 398 rb_erase(&wait->node, &b->waiters); 399 400 out_unlock: 401 GEM_BUG_ON(b->first_wait == wait); 402 GEM_BUG_ON(rb_first(&b->waiters) != 403 (b->first_wait ? &b->first_wait->node : NULL)); 404 GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); 405 spin_unlock_irq(&b->lock); 406 } 407 408 static bool signal_complete(struct drm_i915_gem_request *request) 409 { 410 if (!request) 411 return false; 412 413 /* If another process served as the bottom-half it may have already 414 * signalled that this wait is already completed. 415 */ 416 if (intel_wait_complete(&request->signaling.wait)) 417 return true; 418 419 /* Carefully check if the request is complete, giving time for the 420 * seqno to be visible or if the GPU hung. 421 */ 422 if (__i915_request_irq_complete(request)) 423 return true; 424 425 return false; 426 } 427 428 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) 429 { 430 return container_of(rb, struct drm_i915_gem_request, signaling.node); 431 } 432 433 static void signaler_set_rtpriority(void) 434 { 435 struct sched_param param = { .sched_priority = 1 }; 436 437 sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); 438 } 439 440 static int intel_breadcrumbs_signaler(void *arg) 441 { 442 struct intel_engine_cs *engine = arg; 443 struct intel_breadcrumbs *b = &engine->breadcrumbs; 444 struct drm_i915_gem_request *request; 445 446 /* Install ourselves with high priority to reduce signalling latency */ 447 signaler_set_rtpriority(); 448 449 do { 450 set_current_state(TASK_INTERRUPTIBLE); 451 452 /* We are either woken up by the interrupt bottom-half, 453 * or by a client adding a new signaller. In both cases, 454 * the GPU seqno may have advanced beyond our oldest signal. 455 * If it has, propagate the signal, remove the waiter and 456 * check again with the next oldest signal. Otherwise we 457 * need to wait for a new interrupt from the GPU or for 458 * a new client. 459 */ 460 request = READ_ONCE(b->first_signal); 461 if (signal_complete(request)) { 462 /* Wake up all other completed waiters and select the 463 * next bottom-half for the next user interrupt. 464 */ 465 intel_engine_remove_wait(engine, 466 &request->signaling.wait); 467 468 local_bh_disable(); 469 dma_fence_signal(&request->fence); 470 local_bh_enable(); /* kick start the tasklets */ 471 472 /* Find the next oldest signal. Note that as we have 473 * not been holding the lock, another client may 474 * have installed an even older signal than the one 475 * we just completed - so double check we are still 476 * the oldest before picking the next one. 477 */ 478 spin_lock_irq(&b->lock); 479 if (request == b->first_signal) { 480 struct rb_node *rb = 481 rb_next(&request->signaling.node); 482 b->first_signal = rb ? to_signaler(rb) : NULL; 483 } 484 rb_erase(&request->signaling.node, &b->signals); 485 spin_unlock_irq(&b->lock); 486 487 i915_gem_request_put(request); 488 } else { 489 if (kthread_should_stop()) 490 break; 491 492 schedule(); 493 } 494 } while (1); 495 __set_current_state(TASK_RUNNING); 496 497 return 0; 498 } 499 500 void intel_engine_enable_signaling(struct drm_i915_gem_request *request) 501 { 502 struct intel_engine_cs *engine = request->engine; 503 struct intel_breadcrumbs *b = &engine->breadcrumbs; 504 struct rb_node *parent, **p; 505 bool first, wakeup; 506 507 /* Note that we may be called from an interrupt handler on another 508 * device (e.g. nouveau signaling a fence completion causing us 509 * to submit a request, and so enable signaling). As such, 510 * we need to make sure that all other users of b->lock protect 511 * against interrupts, i.e. use spin_lock_irqsave. 512 */ 513 514 /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ 515 assert_spin_locked(&request->lock); 516 if (!request->global_seqno) 517 return; 518 519 request->signaling.wait.tsk = b->signaler; 520 request->signaling.wait.seqno = request->global_seqno; 521 i915_gem_request_get(request); 522 523 lockmgr(&b->lock, LK_EXCLUSIVE); 524 525 /* First add ourselves into the list of waiters, but register our 526 * bottom-half as the signaller thread. As per usual, only the oldest 527 * waiter (not just signaller) is tasked as the bottom-half waking 528 * up all completed waiters after the user interrupt. 529 * 530 * If we are the oldest waiter, enable the irq (after which we 531 * must double check that the seqno did not complete). 532 */ 533 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); 534 535 /* Now insert ourselves into the retirement ordered list of signals 536 * on this engine. We track the oldest seqno as that will be the 537 * first signal to complete. 538 */ 539 parent = NULL; 540 first = true; 541 p = &b->signals.rb_node; 542 while (*p) { 543 parent = *p; 544 if (i915_seqno_passed(request->global_seqno, 545 to_signaler(parent)->global_seqno)) { 546 p = &parent->rb_right; 547 first = false; 548 } else { 549 p = &parent->rb_left; 550 } 551 } 552 rb_link_node(&request->signaling.node, parent, p); 553 rb_insert_color(&request->signaling.node, &b->signals); 554 if (first) 555 smp_store_mb(b->first_signal, request); 556 557 lockmgr(&b->lock, LK_RELEASE); 558 559 if (wakeup) 560 wake_up_process(b->signaler); 561 } 562 563 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) 564 { 565 struct intel_breadcrumbs *b = &engine->breadcrumbs; 566 struct task_struct *tsk; 567 568 lockinit(&b->lock, "i915_bl", 0, 0); 569 setup_timer(&b->fake_irq, 570 intel_breadcrumbs_fake_irq, 571 (unsigned long)engine); 572 setup_timer(&b->hangcheck, 573 intel_breadcrumbs_hangcheck, 574 (unsigned long)engine); 575 576 /* Spawn a thread to provide a common bottom-half for all signals. 577 * As this is an asynchronous interface we cannot steal the current 578 * task for handling the bottom-half to the user interrupt, therefore 579 * we create a thread to do the coherent seqno dance after the 580 * interrupt and then signal the waitqueue (via the dma-buf/fence). 581 */ 582 tsk = kthread_run(intel_breadcrumbs_signaler, engine, 583 "i915/signal:%d", engine->id); 584 if (IS_ERR(tsk)) 585 return PTR_ERR(tsk); 586 587 b->signaler = tsk; 588 589 return 0; 590 } 591 592 static void cancel_fake_irq(struct intel_engine_cs *engine) 593 { 594 struct intel_breadcrumbs *b = &engine->breadcrumbs; 595 596 del_timer_sync(&b->hangcheck); 597 del_timer_sync(&b->fake_irq); 598 clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); 599 } 600 601 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) 602 { 603 struct intel_breadcrumbs *b = &engine->breadcrumbs; 604 605 cancel_fake_irq(engine); 606 spin_lock_irq(&b->lock); 607 608 __intel_breadcrumbs_disable_irq(b); 609 if (intel_engine_has_waiter(engine)) { 610 b->timeout = wait_timeout(); 611 __intel_breadcrumbs_enable_irq(b); 612 if (READ_ONCE(b->irq_posted)) 613 wake_up_process(b->first_wait->tsk); 614 } else { 615 /* sanitize the IMR and unmask any auxiliary interrupts */ 616 irq_disable(engine); 617 } 618 619 spin_unlock_irq(&b->lock); 620 } 621 622 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) 623 { 624 struct intel_breadcrumbs *b = &engine->breadcrumbs; 625 626 if (!IS_ERR_OR_NULL(b->signaler)) 627 kthread_stop(b->signaler); 628 629 cancel_fake_irq(engine); 630 } 631 632 unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) 633 { 634 struct intel_engine_cs *engine; 635 enum intel_engine_id id; 636 unsigned int mask = 0; 637 638 for_each_engine(engine, i915, id) { 639 struct intel_breadcrumbs *b = &engine->breadcrumbs; 640 641 spin_lock_irq(&b->lock); 642 643 if (b->first_wait) { 644 wake_up_process(b->first_wait->tsk); 645 mask |= intel_engine_flag(engine); 646 } 647 648 if (b->first_signal) { 649 wake_up_process(b->signaler); 650 mask |= intel_engine_flag(engine); 651 } 652 653 spin_unlock_irq(&b->lock); 654 } 655 656 return mask; 657 } 658