1 /* 2 * Copyright 2009 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Jerome Glisse <glisse@freedesktop.org> 29 * Dave Airlie 30 * 31 * $FreeBSD: head/sys/dev/drm2/radeon/radeon_fence.c 254885 2013-08-25 19:37:15Z dumbbell $ 32 */ 33 34 #include <drm/drmP.h> 35 #include "radeon_reg.h" 36 #include "radeon.h" 37 #ifdef DUMBBELL_WIP 38 #include "radeon_trace.h" 39 #endif /* DUMBBELL_WIP */ 40 41 /* 42 * Fences 43 * Fences mark an event in the GPUs pipeline and are used 44 * for GPU/CPU synchronization. When the fence is written, 45 * it is expected that all buffers associated with that fence 46 * are no longer in use by the associated ring on the GPU and 47 * that the the relevant GPU caches have been flushed. Whether 48 * we use a scratch register or memory location depends on the asic 49 * and whether writeback is enabled. 50 */ 51 52 /** 53 * radeon_fence_write - write a fence value 54 * 55 * @rdev: radeon_device pointer 56 * @seq: sequence number to write 57 * @ring: ring index the fence is associated with 58 * 59 * Writes a fence value to memory or a scratch register (all asics). 60 */ 61 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring) 62 { 63 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 64 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 65 *drv->cpu_addr = cpu_to_le32(seq); 66 } else { 67 WREG32(drv->scratch_reg, seq); 68 } 69 } 70 71 /** 72 * radeon_fence_read - read a fence value 73 * 74 * @rdev: radeon_device pointer 75 * @ring: ring index the fence is associated with 76 * 77 * Reads a fence value from memory or a scratch register (all asics). 78 * Returns the value of the fence read from memory or register. 79 */ 80 static u32 radeon_fence_read(struct radeon_device *rdev, int ring) 81 { 82 struct radeon_fence_driver *drv = &rdev->fence_drv[ring]; 83 u32 seq = 0; 84 85 if (likely(rdev->wb.enabled || !drv->scratch_reg)) { 86 seq = le32_to_cpu(*drv->cpu_addr); 87 } else { 88 seq = RREG32(drv->scratch_reg); 89 } 90 return seq; 91 } 92 93 /** 94 * radeon_fence_emit - emit a fence on the requested ring 95 * 96 * @rdev: radeon_device pointer 97 * @fence: radeon fence object 98 * @ring: ring index the fence is associated with 99 * 100 * Emits a fence command on the requested ring (all asics). 101 * Returns 0 on success, -ENOMEM on failure. 102 */ 103 int radeon_fence_emit(struct radeon_device *rdev, 104 struct radeon_fence **fence, 105 int ring) 106 { 107 /* we are protected by the ring emission mutex */ 108 *fence = kmalloc(sizeof(struct radeon_fence), M_DRM, 109 M_WAITOK); 110 if ((*fence) == NULL) { 111 return -ENOMEM; 112 } 113 refcount_init(&((*fence)->kref), 1); 114 (*fence)->rdev = rdev; 115 (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; 116 (*fence)->ring = ring; 117 radeon_fence_ring_emit(rdev, ring, *fence); 118 return 0; 119 } 120 121 /** 122 * radeon_fence_process - process a fence 123 * 124 * @rdev: radeon_device pointer 125 * @ring: ring index the fence is associated with 126 * 127 * Checks the current fence value and wakes the fence queue 128 * if the sequence number has increased (all asics). 129 */ 130 void radeon_fence_process(struct radeon_device *rdev, int ring) 131 { 132 uint64_t seq, last_seq, last_emitted; 133 unsigned count_loop = 0; 134 bool wake = false; 135 136 /* Note there is a scenario here for an infinite loop but it's 137 * very unlikely to happen. For it to happen, the current polling 138 * process need to be interrupted by another process and another 139 * process needs to update the last_seq btw the atomic read and 140 * xchg of the current process. 141 * 142 * More over for this to go in infinite loop there need to be 143 * continuously new fence signaled ie radeon_fence_read needs 144 * to return a different value each time for both the currently 145 * polling process and the other process that xchg the last_seq 146 * btw atomic read and xchg of the current process. And the 147 * value the other process set as last seq must be higher than 148 * the seq value we just read. Which means that current process 149 * need to be interrupted after radeon_fence_read and before 150 * atomic xchg. 151 * 152 * To be even more safe we count the number of time we loop and 153 * we bail after 10 loop just accepting the fact that we might 154 * have temporarly set the last_seq not to the true real last 155 * seq but to an older one. 156 */ 157 last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); 158 do { 159 last_emitted = rdev->fence_drv[ring].sync_seq[ring]; 160 seq = radeon_fence_read(rdev, ring); 161 seq |= last_seq & 0xffffffff00000000LL; 162 if (seq < last_seq) { 163 seq &= 0xffffffff; 164 seq |= last_emitted & 0xffffffff00000000LL; 165 } 166 167 if (seq <= last_seq || seq > last_emitted) { 168 break; 169 } 170 /* If we loop over we don't want to return without 171 * checking if a fence is signaled as it means that the 172 * seq we just read is different from the previous on. 173 */ 174 wake = true; 175 last_seq = seq; 176 if ((count_loop++) > 10) { 177 /* We looped over too many time leave with the 178 * fact that we might have set an older fence 179 * seq then the current real last seq as signaled 180 * by the hw. 181 */ 182 break; 183 } 184 } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); 185 186 if (wake) { 187 rdev->fence_drv[ring].last_activity = jiffies; 188 wake_up_all(&rdev->fence_queue); 189 } 190 } 191 192 /** 193 * radeon_fence_destroy - destroy a fence 194 * 195 * @kref: fence kref 196 * 197 * Frees the fence object (all asics). 198 */ 199 static void radeon_fence_destroy(struct radeon_fence *fence) 200 { 201 202 drm_free(fence, M_DRM); 203 } 204 205 /** 206 * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled 207 * 208 * @rdev: radeon device pointer 209 * @seq: sequence number 210 * @ring: ring index the fence is associated with 211 * 212 * Check if the last singled fence sequnce number is >= the requested 213 * sequence number (all asics). 214 * Returns true if the fence has signaled (current fence value 215 * is >= requested value) or false if it has not (current fence 216 * value is < the requested value. Helper function for 217 * radeon_fence_signaled(). 218 */ 219 static bool radeon_fence_seq_signaled(struct radeon_device *rdev, 220 u64 seq, unsigned ring) 221 { 222 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 223 return true; 224 } 225 /* poll new last sequence at least once */ 226 radeon_fence_process(rdev, ring); 227 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { 228 return true; 229 } 230 return false; 231 } 232 233 /** 234 * radeon_fence_signaled - check if a fence has signaled 235 * 236 * @fence: radeon fence object 237 * 238 * Check if the requested fence has signaled (all asics). 239 * Returns true if the fence has signaled or false if it has not. 240 */ 241 bool radeon_fence_signaled(struct radeon_fence *fence) 242 { 243 if (!fence) { 244 return true; 245 } 246 if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { 247 return true; 248 } 249 if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { 250 fence->seq = RADEON_FENCE_SIGNALED_SEQ; 251 return true; 252 } 253 return false; 254 } 255 256 /** 257 * radeon_fence_wait_seq - wait for a specific sequence number 258 * 259 * @rdev: radeon device pointer 260 * @target_seq: sequence number we want to wait for 261 * @ring: ring index the fence is associated with 262 * @intr: use interruptable sleep 263 * @lock_ring: whether the ring should be locked or not 264 * 265 * Wait for the requested sequence number to be written (all asics). 266 * @intr selects whether to use interruptable (true) or non-interruptable 267 * (false) sleep when waiting for the sequence number. Helper function 268 * for radeon_fence_wait(), et al. 269 * Returns 0 if the sequence number has passed, error for all other cases. 270 * -EDEADLK is returned when a GPU lockup has been detected and the ring is 271 * marked as not ready so no further jobs get scheduled until a successful 272 * reset. 273 */ 274 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, 275 unsigned ring, bool intr, bool lock_ring) 276 { 277 unsigned long timeout, last_activity; 278 uint64_t seq; 279 unsigned i; 280 bool signaled; 281 int r; 282 283 while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) { 284 if (!rdev->ring[ring].ready) { 285 return -EBUSY; 286 } 287 288 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; 289 if (time_after(rdev->fence_drv[ring].last_activity, timeout)) { 290 /* the normal case, timeout is somewhere before last_activity */ 291 timeout = rdev->fence_drv[ring].last_activity - timeout; 292 } else { 293 /* either jiffies wrapped around, or no fence was signaled in the last 500ms 294 * anyway we will just wait for the minimum amount and then check for a lockup 295 */ 296 timeout = 1; 297 } 298 seq = atomic64_read(&rdev->fence_drv[ring].last_seq); 299 /* Save current last activity valuee, used to check for GPU lockups */ 300 last_activity = rdev->fence_drv[ring].last_activity; 301 302 radeon_irq_kms_sw_irq_get(rdev, ring); 303 if (intr) { 304 r = wait_event_interruptible_timeout(rdev->fence_queue, 305 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), 306 timeout); 307 } else { 308 r = wait_event_timeout(rdev->fence_queue, 309 (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), 310 timeout); 311 } 312 radeon_irq_kms_sw_irq_put(rdev, ring); 313 if (unlikely(r < 0)) { 314 return r; 315 } 316 317 if (unlikely(!signaled)) { 318 /* we were interrupted for some reason and fence 319 * isn't signaled yet, resume waiting */ 320 if (r) { 321 continue; 322 } 323 324 /* check if sequence value has changed since last_activity */ 325 if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) { 326 continue; 327 } 328 329 if (lock_ring) { 330 lockmgr(&rdev->ring_lock, LK_EXCLUSIVE); 331 } 332 333 /* test if somebody else has already decided that this is a lockup */ 334 if (last_activity != rdev->fence_drv[ring].last_activity) { 335 if (lock_ring) { 336 lockmgr(&rdev->ring_lock, LK_RELEASE); 337 } 338 continue; 339 } 340 341 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { 342 /* good news we believe it's a lockup */ 343 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016jx last fence id 0x%016jx)\n", 344 target_seq, seq); 345 346 /* change last activity so nobody else think there is a lockup */ 347 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 348 rdev->fence_drv[i].last_activity = jiffies; 349 } 350 351 /* mark the ring as not ready any more */ 352 rdev->ring[ring].ready = false; 353 if (lock_ring) { 354 lockmgr(&rdev->ring_lock, LK_RELEASE); 355 } 356 return -EDEADLK; 357 } 358 359 if (lock_ring) { 360 lockmgr(&rdev->ring_lock, LK_RELEASE); 361 } 362 } 363 } 364 return 0; 365 } 366 367 /** 368 * radeon_fence_wait - wait for a fence to signal 369 * 370 * @fence: radeon fence object 371 * @intr: use interruptable sleep 372 * 373 * Wait for the requested fence to signal (all asics). 374 * @intr selects whether to use interruptable (true) or non-interruptable 375 * (false) sleep when waiting for the fence. 376 * Returns 0 if the fence has passed, error for all other cases. 377 */ 378 int radeon_fence_wait(struct radeon_fence *fence, bool intr) 379 { 380 int r; 381 382 if (fence == NULL) { 383 DRM_ERROR("Querying an invalid fence : %p !\n", fence); 384 return -EINVAL; 385 } 386 387 r = radeon_fence_wait_seq(fence->rdev, fence->seq, 388 fence->ring, intr, true); 389 if (r) { 390 return r; 391 } 392 fence->seq = RADEON_FENCE_SIGNALED_SEQ; 393 return 0; 394 } 395 396 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) 397 { 398 unsigned i; 399 400 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 401 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) { 402 return true; 403 } 404 } 405 return false; 406 } 407 408 /** 409 * radeon_fence_wait_any_seq - wait for a sequence number on any ring 410 * 411 * @rdev: radeon device pointer 412 * @target_seq: sequence number(s) we want to wait for 413 * @intr: use interruptable sleep 414 * 415 * Wait for the requested sequence number(s) to be written by any ring 416 * (all asics). Sequnce number array is indexed by ring id. 417 * @intr selects whether to use interruptable (true) or non-interruptable 418 * (false) sleep when waiting for the sequence number. Helper function 419 * for radeon_fence_wait_any(), et al. 420 * Returns 0 if the sequence number has passed, error for all other cases. 421 */ 422 static int radeon_fence_wait_any_seq(struct radeon_device *rdev, 423 u64 *target_seq, bool intr) 424 { 425 unsigned long timeout, last_activity, tmp; 426 unsigned i, ring = RADEON_NUM_RINGS; 427 bool signaled; 428 int r; 429 430 for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) { 431 if (!target_seq[i]) { 432 continue; 433 } 434 435 /* use the most recent one as indicator */ 436 if (time_after(rdev->fence_drv[i].last_activity, last_activity)) { 437 last_activity = rdev->fence_drv[i].last_activity; 438 } 439 440 /* For lockup detection just pick the lowest ring we are 441 * actively waiting for 442 */ 443 if (i < ring) { 444 ring = i; 445 } 446 } 447 448 /* nothing to wait for ? */ 449 if (ring == RADEON_NUM_RINGS) { 450 return -ENOENT; 451 } 452 453 while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { 454 timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; 455 if (time_after(last_activity, timeout)) { 456 /* the normal case, timeout is somewhere before last_activity */ 457 timeout = last_activity - timeout; 458 } else { 459 /* either jiffies wrapped around, or no fence was signaled in the last 500ms 460 * anyway we will just wait for the minimum amount and then check for a lockup 461 */ 462 timeout = 1; 463 } 464 465 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 466 if (target_seq[i]) { 467 radeon_irq_kms_sw_irq_get(rdev, i); 468 } 469 } 470 if (intr) { 471 r = wait_event_interruptible_timeout(rdev->fence_queue, 472 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), 473 timeout); 474 } else { 475 r = wait_event_timeout(rdev->fence_queue, 476 (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), 477 timeout); 478 } 479 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 480 if (target_seq[i]) { 481 radeon_irq_kms_sw_irq_put(rdev, i); 482 } 483 } 484 if (unlikely(r < 0)) { 485 return r; 486 } 487 488 if (unlikely(!signaled)) { 489 /* we were interrupted for some reason and fence 490 * isn't signaled yet, resume waiting */ 491 if (r) { 492 continue; 493 } 494 495 lockmgr(&rdev->ring_lock, LK_EXCLUSIVE); 496 for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) { 497 if (time_after(rdev->fence_drv[i].last_activity, tmp)) { 498 tmp = rdev->fence_drv[i].last_activity; 499 } 500 } 501 /* test if somebody else has already decided that this is a lockup */ 502 if (last_activity != tmp) { 503 last_activity = tmp; 504 lockmgr(&rdev->ring_lock, LK_RELEASE); 505 continue; 506 } 507 508 if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { 509 /* good news we believe it's a lockup */ 510 dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016jx)\n", 511 target_seq[ring]); 512 513 /* change last activity so nobody else think there is a lockup */ 514 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 515 rdev->fence_drv[i].last_activity = jiffies; 516 } 517 518 /* mark the ring as not ready any more */ 519 rdev->ring[ring].ready = false; 520 lockmgr(&rdev->ring_lock, LK_RELEASE); 521 return -EDEADLK; 522 } 523 lockmgr(&rdev->ring_lock, LK_RELEASE); 524 } 525 } 526 return 0; 527 } 528 529 /** 530 * radeon_fence_wait_any - wait for a fence to signal on any ring 531 * 532 * @rdev: radeon device pointer 533 * @fences: radeon fence object(s) 534 * @intr: use interruptable sleep 535 * 536 * Wait for any requested fence to signal (all asics). Fence 537 * array is indexed by ring id. @intr selects whether to use 538 * interruptable (true) or non-interruptable (false) sleep when 539 * waiting for the fences. Used by the suballocator. 540 * Returns 0 if any fence has passed, error for all other cases. 541 */ 542 int radeon_fence_wait_any(struct radeon_device *rdev, 543 struct radeon_fence **fences, 544 bool intr) 545 { 546 uint64_t seq[RADEON_NUM_RINGS]; 547 unsigned i; 548 int r; 549 550 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 551 seq[i] = 0; 552 553 if (!fences[i]) { 554 continue; 555 } 556 557 if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) { 558 /* something was allready signaled */ 559 return 0; 560 } 561 562 seq[i] = fences[i]->seq; 563 } 564 565 r = radeon_fence_wait_any_seq(rdev, seq, intr); 566 if (r) { 567 return r; 568 } 569 return 0; 570 } 571 572 /** 573 * radeon_fence_wait_next_locked - wait for the next fence to signal 574 * 575 * @rdev: radeon device pointer 576 * @ring: ring index the fence is associated with 577 * 578 * Wait for the next fence on the requested ring to signal (all asics). 579 * Returns 0 if the next fence has passed, error for all other cases. 580 * Caller must hold ring lock. 581 */ 582 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) 583 { 584 uint64_t seq; 585 586 seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; 587 if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { 588 /* nothing to wait for, last_seq is 589 already the last emited fence */ 590 return -ENOENT; 591 } 592 return radeon_fence_wait_seq(rdev, seq, ring, false, false); 593 } 594 595 /** 596 * radeon_fence_wait_empty_locked - wait for all fences to signal 597 * 598 * @rdev: radeon device pointer 599 * @ring: ring index the fence is associated with 600 * 601 * Wait for all fences on the requested ring to signal (all asics). 602 * Returns 0 if the fences have passed, error for all other cases. 603 * Caller must hold ring lock. 604 */ 605 int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) 606 { 607 uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; 608 int r; 609 610 r = radeon_fence_wait_seq(rdev, seq, ring, false, false); 611 if (r) { 612 if (r == -EDEADLK) { 613 return -EDEADLK; 614 } 615 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", 616 ring, r); 617 } 618 return 0; 619 } 620 621 /** 622 * radeon_fence_ref - take a ref on a fence 623 * 624 * @fence: radeon fence object 625 * 626 * Take a reference on a fence (all asics). 627 * Returns the fence. 628 */ 629 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) 630 { 631 refcount_acquire(&fence->kref); 632 return fence; 633 } 634 635 /** 636 * radeon_fence_unref - remove a ref on a fence 637 * 638 * @fence: radeon fence object 639 * 640 * Remove a reference on a fence (all asics). 641 */ 642 void radeon_fence_unref(struct radeon_fence **fence) 643 { 644 struct radeon_fence *tmp = *fence; 645 646 *fence = NULL; 647 if (tmp) { 648 if (refcount_release(&tmp->kref)) { 649 radeon_fence_destroy(tmp); 650 } 651 } 652 } 653 654 /** 655 * radeon_fence_count_emitted - get the count of emitted fences 656 * 657 * @rdev: radeon device pointer 658 * @ring: ring index the fence is associated with 659 * 660 * Get the number of fences emitted on the requested ring (all asics). 661 * Returns the number of emitted fences on the ring. Used by the 662 * dynpm code to ring track activity. 663 */ 664 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) 665 { 666 uint64_t emitted; 667 668 /* We are not protected by ring lock when reading the last sequence 669 * but it's ok to report slightly wrong fence count here. 670 */ 671 radeon_fence_process(rdev, ring); 672 emitted = rdev->fence_drv[ring].sync_seq[ring] 673 - atomic64_read(&rdev->fence_drv[ring].last_seq); 674 /* to avoid 32bits warp around */ 675 if (emitted > 0x10000000) { 676 emitted = 0x10000000; 677 } 678 return (unsigned)emitted; 679 } 680 681 /** 682 * radeon_fence_need_sync - do we need a semaphore 683 * 684 * @fence: radeon fence object 685 * @dst_ring: which ring to check against 686 * 687 * Check if the fence needs to be synced against another ring 688 * (all asics). If so, we need to emit a semaphore. 689 * Returns true if we need to sync with another ring, false if 690 * not. 691 */ 692 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) 693 { 694 struct radeon_fence_driver *fdrv; 695 696 if (!fence) { 697 return false; 698 } 699 700 if (fence->ring == dst_ring) { 701 return false; 702 } 703 704 /* we are protected by the ring mutex */ 705 fdrv = &fence->rdev->fence_drv[dst_ring]; 706 if (fence->seq <= fdrv->sync_seq[fence->ring]) { 707 return false; 708 } 709 710 return true; 711 } 712 713 /** 714 * radeon_fence_note_sync - record the sync point 715 * 716 * @fence: radeon fence object 717 * @dst_ring: which ring to check against 718 * 719 * Note the sequence number at which point the fence will 720 * be synced with the requested ring (all asics). 721 */ 722 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) 723 { 724 struct radeon_fence_driver *dst, *src; 725 unsigned i; 726 727 if (!fence) { 728 return; 729 } 730 731 if (fence->ring == dst_ring) { 732 return; 733 } 734 735 /* we are protected by the ring mutex */ 736 src = &fence->rdev->fence_drv[fence->ring]; 737 dst = &fence->rdev->fence_drv[dst_ring]; 738 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 739 if (i == dst_ring) { 740 continue; 741 } 742 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); 743 } 744 } 745 746 /** 747 * radeon_fence_driver_start_ring - make the fence driver 748 * ready for use on the requested ring. 749 * 750 * @rdev: radeon device pointer 751 * @ring: ring index to start the fence driver on 752 * 753 * Make the fence driver ready for processing (all asics). 754 * Not all asics have all rings, so each asic will only 755 * start the fence driver on the rings it has. 756 * Returns 0 for success, errors for failure. 757 */ 758 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) 759 { 760 uint64_t index; 761 int r; 762 763 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 764 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) { 765 rdev->fence_drv[ring].scratch_reg = 0; 766 index = R600_WB_EVENT_OFFSET + ring * 4; 767 } else { 768 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); 769 if (r) { 770 dev_err(rdev->dev, "fence failed to get scratch register\n"); 771 return r; 772 } 773 index = RADEON_WB_SCRATCH_OFFSET + 774 rdev->fence_drv[ring].scratch_reg - 775 rdev->scratch.reg_base; 776 } 777 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; 778 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; 779 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); 780 rdev->fence_drv[ring].initialized = true; 781 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016jx and cpu addr 0x%p\n", 782 ring, (uintmax_t)rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); 783 return 0; 784 } 785 786 /** 787 * radeon_fence_driver_init_ring - init the fence driver 788 * for the requested ring. 789 * 790 * @rdev: radeon device pointer 791 * @ring: ring index to start the fence driver on 792 * 793 * Init the fence driver for the requested ring (all asics). 794 * Helper function for radeon_fence_driver_init(). 795 */ 796 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) 797 { 798 int i; 799 800 rdev->fence_drv[ring].scratch_reg = -1; 801 rdev->fence_drv[ring].cpu_addr = NULL; 802 rdev->fence_drv[ring].gpu_addr = 0; 803 for (i = 0; i < RADEON_NUM_RINGS; ++i) 804 rdev->fence_drv[ring].sync_seq[i] = 0; 805 atomic64_set(&rdev->fence_drv[ring].last_seq, 0); 806 rdev->fence_drv[ring].last_activity = jiffies; 807 rdev->fence_drv[ring].initialized = false; 808 } 809 810 /** 811 * radeon_fence_driver_init - init the fence driver 812 * for all possible rings. 813 * 814 * @rdev: radeon device pointer 815 * 816 * Init the fence driver for all possible rings (all asics). 817 * Not all asics have all rings, so each asic will only 818 * start the fence driver on the rings it has using 819 * radeon_fence_driver_start_ring(). 820 * Returns 0 for success. 821 */ 822 int radeon_fence_driver_init(struct radeon_device *rdev) 823 { 824 int ring; 825 826 init_waitqueue_head(&rdev->fence_queue); 827 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 828 radeon_fence_driver_init_ring(rdev, ring); 829 } 830 if (radeon_debugfs_fence_init(rdev)) { 831 dev_err(rdev->dev, "fence debugfs file creation failed\n"); 832 } 833 return 0; 834 } 835 836 /** 837 * radeon_fence_driver_fini - tear down the fence driver 838 * for all possible rings. 839 * 840 * @rdev: radeon device pointer 841 * 842 * Tear down the fence driver for all possible rings (all asics). 843 */ 844 void radeon_fence_driver_fini(struct radeon_device *rdev) 845 { 846 int ring, r; 847 848 lockmgr(&rdev->ring_lock, LK_EXCLUSIVE); 849 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 850 if (!rdev->fence_drv[ring].initialized) 851 continue; 852 r = radeon_fence_wait_empty_locked(rdev, ring); 853 if (r) { 854 /* no need to trigger GPU reset as we are unloading */ 855 radeon_fence_driver_force_completion(rdev); 856 } 857 wake_up_all(&rdev->fence_queue); 858 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 859 rdev->fence_drv[ring].initialized = false; 860 } 861 lockmgr(&rdev->ring_lock, LK_RELEASE); 862 } 863 864 /** 865 * radeon_fence_driver_force_completion - force all fence waiter to complete 866 * 867 * @rdev: radeon device pointer 868 * 869 * In case of GPU reset failure make sure no process keep waiting on fence 870 * that will never complete. 871 */ 872 void radeon_fence_driver_force_completion(struct radeon_device *rdev) 873 { 874 int ring; 875 876 for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { 877 if (!rdev->fence_drv[ring].initialized) 878 continue; 879 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); 880 } 881 } 882 883 884 /* 885 * Fence debugfs 886 */ 887 #if defined(CONFIG_DEBUG_FS) 888 static int radeon_debugfs_fence_info(struct seq_file *m, void *data) 889 { 890 struct drm_info_node *node = (struct drm_info_node *)m->private; 891 struct drm_device *dev = node->minor->dev; 892 struct radeon_device *rdev = dev->dev_private; 893 int i, j; 894 895 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 896 if (!rdev->fence_drv[i].initialized) 897 continue; 898 899 seq_printf(m, "--- ring %d ---\n", i); 900 seq_printf(m, "Last signaled fence 0x%016llx\n", 901 (unsigned long long)atomic_load_acq_64(&rdev->fence_drv[i].last_seq)); 902 seq_printf(m, "Last emitted 0x%016llx\n", 903 rdev->fence_drv[i].sync_seq[i]); 904 905 for (j = 0; j < RADEON_NUM_RINGS; ++j) { 906 if (i != j && rdev->fence_drv[j].initialized) 907 seq_printf(m, "Last sync to ring %d 0x%016llx\n", 908 j, rdev->fence_drv[i].sync_seq[j]); 909 } 910 } 911 return 0; 912 } 913 914 static struct drm_info_list radeon_debugfs_fence_list[] = { 915 {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL}, 916 }; 917 #endif 918 919 int radeon_debugfs_fence_init(struct radeon_device *rdev) 920 { 921 #if defined(CONFIG_DEBUG_FS) 922 return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1); 923 #else 924 return 0; 925 #endif 926 } 927