1 /* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/kthread.h> 26 27 #include "i915_drv.h" 28 29 static void intel_breadcrumbs_fake_irq(unsigned long data) 30 { 31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data; 32 33 /* 34 * The timer persists in case we cannot enable interrupts, 35 * or if we have previously seen seqno/interrupt incoherency 36 * ("missed interrupt" syndrome). Here the worker will wake up 37 * every jiffie in order to kick the oldest waiter to do the 38 * coherent seqno check. 39 */ 40 rcu_read_lock(); 41 if (intel_engine_wakeup(engine)) 42 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 43 rcu_read_unlock(); 44 } 45 46 static void irq_enable(struct intel_engine_cs *engine) 47 { 48 /* Enabling the IRQ may miss the generation of the interrupt, but 49 * we still need to force the barrier before reading the seqno, 50 * just in case. 51 */ 52 engine->breadcrumbs.irq_posted = true; 53 54 spin_lock_irq(&engine->i915->irq_lock); 55 engine->irq_enable(engine); 56 spin_unlock_irq(&engine->i915->irq_lock); 57 } 58 59 static void irq_disable(struct intel_engine_cs *engine) 60 { 61 spin_lock_irq(&engine->i915->irq_lock); 62 engine->irq_disable(engine); 63 spin_unlock_irq(&engine->i915->irq_lock); 64 65 engine->breadcrumbs.irq_posted = false; 66 } 67 68 static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) 69 { 70 struct intel_engine_cs *engine = 71 container_of(b, struct intel_engine_cs, breadcrumbs); 72 struct drm_i915_private *i915 = engine->i915; 73 74 assert_spin_locked(&b->lock); 75 if (b->rpm_wakelock) 76 return; 77 78 /* Since we are waiting on a request, the GPU should be busy 79 * and should have its own rpm reference. For completeness, 80 * record an rpm reference for ourselves to cover the 81 * interrupt we unmask. 82 */ 83 intel_runtime_pm_get_noresume(i915); 84 b->rpm_wakelock = true; 85 86 /* No interrupts? Kick the waiter every jiffie! */ 87 if (intel_irqs_enabled(i915)) { 88 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) 89 irq_enable(engine); 90 b->irq_enabled = true; 91 } 92 93 if (!b->irq_enabled || 94 test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) 95 mod_timer(&b->fake_irq, jiffies + 1); 96 97 /* Ensure that even if the GPU hangs, we get woken up. 98 * 99 * However, note that if no one is waiting, we never notice 100 * a gpu hang. Eventually, we will have to wait for a resource 101 * held by the GPU and so trigger a hangcheck. In the most 102 * pathological case, this will be upon memory starvation! 103 */ 104 i915_queue_hangcheck(i915); 105 } 106 107 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) 108 { 109 struct intel_engine_cs *engine = 110 container_of(b, struct intel_engine_cs, breadcrumbs); 111 112 assert_spin_locked(&b->lock); 113 if (!b->rpm_wakelock) 114 return; 115 116 if (b->irq_enabled) { 117 irq_disable(engine); 118 b->irq_enabled = false; 119 } 120 121 intel_runtime_pm_put(engine->i915); 122 b->rpm_wakelock = false; 123 } 124 125 static inline struct intel_wait *to_wait(struct rb_node *node) 126 { 127 return container_of(node, struct intel_wait, node); 128 } 129 130 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, 131 struct intel_wait *wait) 132 { 133 assert_spin_locked(&b->lock); 134 135 /* This request is completed, so remove it from the tree, mark it as 136 * complete, and *then* wake up the associated task. 137 */ 138 rb_erase(&wait->node, &b->waiters); 139 RB_CLEAR_NODE(&wait->node); 140 141 wake_up_process(wait->tsk); /* implicit smp_wmb() */ 142 } 143 144 static bool __intel_engine_add_wait(struct intel_engine_cs *engine, 145 struct intel_wait *wait) 146 { 147 struct intel_breadcrumbs *b = &engine->breadcrumbs; 148 struct rb_node **p, *parent, *completed; 149 bool first; 150 u32 seqno; 151 152 /* Insert the request into the retirement ordered list 153 * of waiters by walking the rbtree. If we are the oldest 154 * seqno in the tree (the first to be retired), then 155 * set ourselves as the bottom-half. 156 * 157 * As we descend the tree, prune completed branches since we hold the 158 * spinlock we know that the first_waiter must be delayed and can 159 * reduce some of the sequential wake up latency if we take action 160 * ourselves and wake up the completed tasks in parallel. Also, by 161 * removing stale elements in the tree, we may be able to reduce the 162 * ping-pong between the old bottom-half and ourselves as first-waiter. 163 */ 164 first = true; 165 parent = NULL; 166 completed = NULL; 167 seqno = intel_engine_get_seqno(engine); 168 169 /* If the request completed before we managed to grab the spinlock, 170 * return now before adding ourselves to the rbtree. We let the 171 * current bottom-half handle any pending wakeups and instead 172 * try and get out of the way quickly. 173 */ 174 if (i915_seqno_passed(seqno, wait->seqno)) { 175 RB_CLEAR_NODE(&wait->node); 176 return first; 177 } 178 179 p = &b->waiters.rb_node; 180 while (*p) { 181 parent = *p; 182 if (wait->seqno == to_wait(parent)->seqno) { 183 /* We have multiple waiters on the same seqno, select 184 * the highest priority task (that with the smallest 185 * task->prio) to serve as the bottom-half for this 186 * group. 187 */ 188 if (wait->tsk->prio > to_wait(parent)->tsk->prio) { 189 p = &parent->rb_right; 190 first = false; 191 } else { 192 p = &parent->rb_left; 193 } 194 } else if (i915_seqno_passed(wait->seqno, 195 to_wait(parent)->seqno)) { 196 p = &parent->rb_right; 197 if (i915_seqno_passed(seqno, to_wait(parent)->seqno)) 198 completed = parent; 199 else 200 first = false; 201 } else { 202 p = &parent->rb_left; 203 } 204 } 205 rb_link_node(&wait->node, parent, p); 206 rb_insert_color(&wait->node, &b->waiters); 207 GEM_BUG_ON(!first && !b->irq_seqno_bh); 208 209 if (completed) { 210 struct rb_node *next = rb_next(completed); 211 212 GEM_BUG_ON(!next && !first); 213 if (next && next != &wait->node) { 214 GEM_BUG_ON(first); 215 b->first_wait = to_wait(next); 216 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); 217 /* As there is a delay between reading the current 218 * seqno, processing the completed tasks and selecting 219 * the next waiter, we may have missed the interrupt 220 * and so need for the next bottom-half to wakeup. 221 * 222 * Also as we enable the IRQ, we may miss the 223 * interrupt for that seqno, so we have to wake up 224 * the next bottom-half in order to do a coherent check 225 * in case the seqno passed. 226 */ 227 __intel_breadcrumbs_enable_irq(b); 228 if (READ_ONCE(b->irq_posted)) 229 wake_up_process(to_wait(next)->tsk); 230 } 231 232 do { 233 struct intel_wait *crumb = to_wait(completed); 234 completed = rb_prev(completed); 235 __intel_breadcrumbs_finish(b, crumb); 236 } while (completed); 237 } 238 239 if (first) { 240 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); 241 b->first_wait = wait; 242 smp_store_mb(b->irq_seqno_bh, wait->tsk); 243 /* After assigning ourselves as the new bottom-half, we must 244 * perform a cursory check to prevent a missed interrupt. 245 * Either we miss the interrupt whilst programming the hardware, 246 * or if there was a previous waiter (for a later seqno) they 247 * may be woken instead of us (due to the inherent race 248 * in the unlocked read of b->irq_seqno_bh in the irq handler) 249 * and so we miss the wake up. 250 */ 251 __intel_breadcrumbs_enable_irq(b); 252 } 253 GEM_BUG_ON(!b->irq_seqno_bh); 254 GEM_BUG_ON(!b->first_wait); 255 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); 256 257 return first; 258 } 259 260 bool intel_engine_add_wait(struct intel_engine_cs *engine, 261 struct intel_wait *wait) 262 { 263 struct intel_breadcrumbs *b = &engine->breadcrumbs; 264 bool first; 265 266 lockmgr(&b->lock, LK_EXCLUSIVE); 267 first = __intel_engine_add_wait(engine, wait); 268 lockmgr(&b->lock, LK_RELEASE); 269 270 return first; 271 } 272 273 void intel_engine_enable_fake_irq(struct intel_engine_cs *engine) 274 { 275 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); 276 } 277 278 static inline bool chain_wakeup(struct rb_node *rb, int priority) 279 { 280 return rb && to_wait(rb)->tsk->prio <= priority; 281 } 282 283 static inline int wakeup_priority(struct intel_breadcrumbs *b, 284 struct task_struct *tsk) 285 { 286 if (tsk == b->signaler) 287 return INT_MIN; 288 else 289 return tsk->prio; 290 } 291 292 void intel_engine_remove_wait(struct intel_engine_cs *engine, 293 struct intel_wait *wait) 294 { 295 struct intel_breadcrumbs *b = &engine->breadcrumbs; 296 297 /* Quick check to see if this waiter was already decoupled from 298 * the tree by the bottom-half to avoid contention on the spinlock 299 * by the herd. 300 */ 301 if (RB_EMPTY_NODE(&wait->node)) 302 return; 303 304 lockmgr(&b->lock, LK_EXCLUSIVE); 305 306 if (RB_EMPTY_NODE(&wait->node)) 307 goto out_unlock; 308 309 if (b->first_wait == wait) { 310 const int priority = wakeup_priority(b, wait->tsk); 311 struct rb_node *next; 312 313 GEM_BUG_ON(b->irq_seqno_bh != wait->tsk); 314 315 /* We are the current bottom-half. Find the next candidate, 316 * the first waiter in the queue on the remaining oldest 317 * request. As multiple seqnos may complete in the time it 318 * takes us to wake up and find the next waiter, we have to 319 * wake up that waiter for it to perform its own coherent 320 * completion check. 321 */ 322 next = rb_next(&wait->node); 323 if (chain_wakeup(next, priority)) { 324 /* If the next waiter is already complete, 325 * wake it up and continue onto the next waiter. So 326 * if have a small herd, they will wake up in parallel 327 * rather than sequentially, which should reduce 328 * the overall latency in waking all the completed 329 * clients. 330 * 331 * However, waking up a chain adds extra latency to 332 * the first_waiter. This is undesirable if that 333 * waiter is a high priority task. 334 */ 335 u32 seqno = intel_engine_get_seqno(engine); 336 337 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) { 338 struct rb_node *n = rb_next(next); 339 340 __intel_breadcrumbs_finish(b, to_wait(next)); 341 next = n; 342 if (!chain_wakeup(next, priority)) 343 break; 344 } 345 } 346 347 if (next) { 348 /* In our haste, we may have completed the first waiter 349 * before we enabled the interrupt. Do so now as we 350 * have a second waiter for a future seqno. Afterwards, 351 * we have to wake up that waiter in case we missed 352 * the interrupt, or if we have to handle an 353 * exception rather than a seqno completion. 354 */ 355 b->first_wait = to_wait(next); 356 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); 357 if (b->first_wait->seqno != wait->seqno) 358 __intel_breadcrumbs_enable_irq(b); 359 wake_up_process(b->irq_seqno_bh); 360 } else { 361 b->first_wait = NULL; 362 WRITE_ONCE(b->irq_seqno_bh, NULL); 363 __intel_breadcrumbs_disable_irq(b); 364 } 365 } else { 366 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); 367 } 368 369 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); 370 rb_erase(&wait->node, &b->waiters); 371 372 out_unlock: 373 GEM_BUG_ON(b->first_wait == wait); 374 GEM_BUG_ON(rb_first(&b->waiters) != 375 (b->first_wait ? &b->first_wait->node : NULL)); 376 GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters)); 377 lockmgr(&b->lock, LK_RELEASE); 378 } 379 380 static bool signal_complete(struct drm_i915_gem_request *request) 381 { 382 if (!request) 383 return false; 384 385 /* If another process served as the bottom-half it may have already 386 * signalled that this wait is already completed. 387 */ 388 if (intel_wait_complete(&request->signaling.wait)) 389 return true; 390 391 /* Carefully check if the request is complete, giving time for the 392 * seqno to be visible or if the GPU hung. 393 */ 394 if (__i915_request_irq_complete(request)) 395 return true; 396 397 return false; 398 } 399 400 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb) 401 { 402 return container_of(rb, struct drm_i915_gem_request, signaling.node); 403 } 404 405 static void signaler_set_rtpriority(void) 406 { 407 struct sched_param param = { .sched_priority = 1 }; 408 409 sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); 410 } 411 412 static int intel_breadcrumbs_signaler(void *arg) 413 { 414 struct intel_engine_cs *engine = arg; 415 struct intel_breadcrumbs *b = &engine->breadcrumbs; 416 struct drm_i915_gem_request *request; 417 418 /* Install ourselves with high priority to reduce signalling latency */ 419 signaler_set_rtpriority(); 420 421 do { 422 set_current_state(TASK_INTERRUPTIBLE); 423 424 /* We are either woken up by the interrupt bottom-half, 425 * or by a client adding a new signaller. In both cases, 426 * the GPU seqno may have advanced beyond our oldest signal. 427 * If it has, propagate the signal, remove the waiter and 428 * check again with the next oldest signal. Otherwise we 429 * need to wait for a new interrupt from the GPU or for 430 * a new client. 431 */ 432 request = READ_ONCE(b->first_signal); 433 if (signal_complete(request)) { 434 /* Wake up all other completed waiters and select the 435 * next bottom-half for the next user interrupt. 436 */ 437 intel_engine_remove_wait(engine, 438 &request->signaling.wait); 439 440 /* Find the next oldest signal. Note that as we have 441 * not been holding the lock, another client may 442 * have installed an even older signal than the one 443 * we just completed - so double check we are still 444 * the oldest before picking the next one. 445 */ 446 lockmgr(&b->lock, LK_EXCLUSIVE); 447 if (request == b->first_signal) { 448 struct rb_node *rb = 449 rb_next(&request->signaling.node); 450 b->first_signal = rb ? to_signaler(rb) : NULL; 451 } 452 rb_erase(&request->signaling.node, &b->signals); 453 lockmgr(&b->lock, LK_RELEASE); 454 455 i915_gem_request_unreference(request); 456 } else { 457 if (kthread_should_stop()) 458 break; 459 460 schedule(); 461 } 462 } while (1); 463 __set_current_state(TASK_RUNNING); 464 465 return 0; 466 } 467 468 void intel_engine_enable_signaling(struct drm_i915_gem_request *request) 469 { 470 struct intel_engine_cs *engine = request->engine; 471 struct intel_breadcrumbs *b = &engine->breadcrumbs; 472 struct rb_node *parent, **p; 473 bool first, wakeup; 474 475 if (unlikely(READ_ONCE(request->signaling.wait.tsk))) 476 return; 477 478 lockmgr(&b->lock, LK_EXCLUSIVE); 479 if (unlikely(request->signaling.wait.tsk)) { 480 wakeup = false; 481 goto unlock; 482 } 483 484 request->signaling.wait.tsk = b->signaler; 485 request->signaling.wait.seqno = request->seqno; 486 i915_gem_request_reference(request); 487 488 /* First add ourselves into the list of waiters, but register our 489 * bottom-half as the signaller thread. As per usual, only the oldest 490 * waiter (not just signaller) is tasked as the bottom-half waking 491 * up all completed waiters after the user interrupt. 492 * 493 * If we are the oldest waiter, enable the irq (after which we 494 * must double check that the seqno did not complete). 495 */ 496 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); 497 498 /* Now insert ourselves into the retirement ordered list of signals 499 * on this engine. We track the oldest seqno as that will be the 500 * first signal to complete. 501 */ 502 parent = NULL; 503 first = true; 504 p = &b->signals.rb_node; 505 while (*p) { 506 parent = *p; 507 if (i915_seqno_passed(request->seqno, 508 to_signaler(parent)->seqno)) { 509 p = &parent->rb_right; 510 first = false; 511 } else { 512 p = &parent->rb_left; 513 } 514 } 515 rb_link_node(&request->signaling.node, parent, p); 516 rb_insert_color(&request->signaling.node, &b->signals); 517 if (first) 518 smp_store_mb(b->first_signal, request); 519 520 unlock: 521 lockmgr(&b->lock, LK_RELEASE); 522 523 if (wakeup) 524 wake_up_process(b->signaler); 525 } 526 527 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) 528 { 529 struct intel_breadcrumbs *b = &engine->breadcrumbs; 530 struct task_struct *tsk; 531 532 lockinit(&b->lock, "i915_bl", 0, LK_CANRECURSE); 533 setup_timer(&b->fake_irq, 534 intel_breadcrumbs_fake_irq, 535 (unsigned long)engine); 536 537 /* Spawn a thread to provide a common bottom-half for all signals. 538 * As this is an asynchronous interface we cannot steal the current 539 * task for handling the bottom-half to the user interrupt, therefore 540 * we create a thread to do the coherent seqno dance after the 541 * interrupt and then signal the waitqueue (via the dma-buf/fence). 542 */ 543 tsk = kthread_run(intel_breadcrumbs_signaler, engine, 544 "i915/signal:%d", engine->id); 545 if (IS_ERR(tsk)) 546 return PTR_ERR(tsk); 547 548 b->signaler = tsk; 549 550 return 0; 551 } 552 553 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) 554 { 555 struct intel_breadcrumbs *b = &engine->breadcrumbs; 556 557 if (!IS_ERR_OR_NULL(b->signaler)) 558 kthread_stop(b->signaler); 559 560 del_timer_sync(&b->fake_irq); 561 } 562 563 unsigned int intel_kick_waiters(struct drm_i915_private *i915) 564 { 565 struct intel_engine_cs *engine; 566 unsigned int mask = 0; 567 568 /* To avoid the task_struct disappearing beneath us as we wake up 569 * the process, we must first inspect the task_struct->state under the 570 * RCU lock, i.e. as we call wake_up_process() we must be holding the 571 * rcu_read_lock(). 572 */ 573 rcu_read_lock(); 574 for_each_engine(engine, i915) 575 if (unlikely(intel_engine_wakeup(engine))) 576 mask |= intel_engine_flag(engine); 577 rcu_read_unlock(); 578 579 return mask; 580 } 581 582 unsigned int intel_kick_signalers(struct drm_i915_private *i915) 583 { 584 struct intel_engine_cs *engine; 585 unsigned int mask = 0; 586 587 for_each_engine(engine, i915) { 588 if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) { 589 wake_up_process(engine->breadcrumbs.signaler); 590 mask |= intel_engine_flag(engine); 591 } 592 } 593 594 return mask; 595 } 596