1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2018 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_heartbeat.h" 11 #include "gt/intel_reset.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 #include "selftests/igt_flush_test.h" 16 #include "selftests/igt_live_test.h" 17 #include "selftests/igt_spinner.h" 18 #include "selftests/lib_sw_fence.h" 19 20 #include "gem/selftests/igt_gem_utils.h" 21 #include "gem/selftests/mock_context.h" 22 23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) 24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ 25 26 static struct i915_vma *create_scratch(struct intel_gt *gt) 27 { 28 struct drm_i915_gem_object *obj; 29 struct i915_vma *vma; 30 int err; 31 32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 33 if (IS_ERR(obj)) 34 return ERR_CAST(obj); 35 36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 37 38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 39 if (IS_ERR(vma)) { 40 i915_gem_object_put(obj); 41 return vma; 42 } 43 44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 45 if (err) { 46 i915_gem_object_put(obj); 47 return ERR_PTR(err); 48 } 49 50 return vma; 51 } 52 53 static void engine_heartbeat_disable(struct intel_engine_cs *engine, 54 unsigned long *saved) 55 { 56 *saved = engine->props.heartbeat_interval_ms; 57 engine->props.heartbeat_interval_ms = 0; 58 59 intel_engine_pm_get(engine); 60 intel_engine_park_heartbeat(engine); 61 } 62 63 static void engine_heartbeat_enable(struct intel_engine_cs *engine, 64 unsigned long saved) 65 { 66 intel_engine_pm_put(engine); 67 68 engine->props.heartbeat_interval_ms = saved; 69 } 70 71 static int wait_for_submit(struct intel_engine_cs *engine, 72 struct i915_request *rq, 73 unsigned long timeout) 74 { 75 timeout += jiffies; 76 do { 77 cond_resched(); 78 intel_engine_flush_submission(engine); 79 80 if (READ_ONCE(engine->execlists.pending[0])) 81 continue; 82 83 if (i915_request_is_active(rq)) 84 return 0; 85 86 if (i915_request_started(rq)) /* that was quick! */ 87 return 0; 88 } while (time_before(jiffies, timeout)); 89 90 return -ETIME; 91 } 92 93 static int wait_for_reset(struct intel_engine_cs *engine, 94 struct i915_request *rq, 95 unsigned long timeout) 96 { 97 timeout += jiffies; 98 99 do { 100 cond_resched(); 101 intel_engine_flush_submission(engine); 102 103 if (READ_ONCE(engine->execlists.pending[0])) 104 continue; 105 106 if (i915_request_completed(rq)) 107 break; 108 109 if (READ_ONCE(rq->fence.error)) 110 break; 111 } while (time_before(jiffies, timeout)); 112 113 flush_scheduled_work(); 114 115 if (rq->fence.error != -EIO) { 116 pr_err("%s: hanging request %llx:%lld not reset\n", 117 engine->name, 118 rq->fence.context, 119 rq->fence.seqno); 120 return -EINVAL; 121 } 122 123 /* Give the request a jiffie to complete after flushing the worker */ 124 if (i915_request_wait(rq, 0, 125 max(0l, (long)(timeout - jiffies)) + 1) < 0) { 126 pr_err("%s: hanging request %llx:%lld did not complete\n", 127 engine->name, 128 rq->fence.context, 129 rq->fence.seqno); 130 return -ETIME; 131 } 132 133 return 0; 134 } 135 136 static int live_sanitycheck(void *arg) 137 { 138 struct intel_gt *gt = arg; 139 struct intel_engine_cs *engine; 140 enum intel_engine_id id; 141 struct igt_spinner spin; 142 int err = 0; 143 144 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) 145 return 0; 146 147 if (igt_spinner_init(&spin, gt)) 148 return -ENOMEM; 149 150 for_each_engine(engine, gt, id) { 151 struct intel_context *ce; 152 struct i915_request *rq; 153 154 ce = intel_context_create(engine); 155 if (IS_ERR(ce)) { 156 err = PTR_ERR(ce); 157 break; 158 } 159 160 rq = igt_spinner_create_request(&spin, ce, MI_NOOP); 161 if (IS_ERR(rq)) { 162 err = PTR_ERR(rq); 163 goto out_ctx; 164 } 165 166 i915_request_add(rq); 167 if (!igt_wait_for_spinner(&spin, rq)) { 168 GEM_TRACE("spinner failed to start\n"); 169 GEM_TRACE_DUMP(); 170 intel_gt_set_wedged(gt); 171 err = -EIO; 172 goto out_ctx; 173 } 174 175 igt_spinner_end(&spin); 176 if (igt_flush_test(gt->i915)) { 177 err = -EIO; 178 goto out_ctx; 179 } 180 181 out_ctx: 182 intel_context_put(ce); 183 if (err) 184 break; 185 } 186 187 igt_spinner_fini(&spin); 188 return err; 189 } 190 191 static int live_unlite_restore(struct intel_gt *gt, int prio) 192 { 193 struct intel_engine_cs *engine; 194 enum intel_engine_id id; 195 struct igt_spinner spin; 196 int err = -ENOMEM; 197 198 /* 199 * Check that we can correctly context switch between 2 instances 200 * on the same engine from the same parent context. 201 */ 202 203 if (igt_spinner_init(&spin, gt)) 204 return err; 205 206 err = 0; 207 for_each_engine(engine, gt, id) { 208 struct intel_context *ce[2] = {}; 209 struct i915_request *rq[2]; 210 struct igt_live_test t; 211 unsigned long saved; 212 int n; 213 214 if (prio && !intel_engine_has_preemption(engine)) 215 continue; 216 217 if (!intel_engine_can_store_dword(engine)) 218 continue; 219 220 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 221 err = -EIO; 222 break; 223 } 224 engine_heartbeat_disable(engine, &saved); 225 226 for (n = 0; n < ARRAY_SIZE(ce); n++) { 227 struct intel_context *tmp; 228 229 tmp = intel_context_create(engine); 230 if (IS_ERR(tmp)) { 231 err = PTR_ERR(tmp); 232 goto err_ce; 233 } 234 235 err = intel_context_pin(tmp); 236 if (err) { 237 intel_context_put(tmp); 238 goto err_ce; 239 } 240 241 /* 242 * Setup the pair of contexts such that if we 243 * lite-restore using the RING_TAIL from ce[1] it 244 * will execute garbage from ce[0]->ring. 245 */ 246 memset(tmp->ring->vaddr, 247 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ 248 tmp->ring->vma->size); 249 250 ce[n] = tmp; 251 } 252 GEM_BUG_ON(!ce[1]->ring->size); 253 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); 254 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); 255 256 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); 257 if (IS_ERR(rq[0])) { 258 err = PTR_ERR(rq[0]); 259 goto err_ce; 260 } 261 262 i915_request_get(rq[0]); 263 i915_request_add(rq[0]); 264 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); 265 266 if (!igt_wait_for_spinner(&spin, rq[0])) { 267 i915_request_put(rq[0]); 268 goto err_ce; 269 } 270 271 rq[1] = i915_request_create(ce[1]); 272 if (IS_ERR(rq[1])) { 273 err = PTR_ERR(rq[1]); 274 i915_request_put(rq[0]); 275 goto err_ce; 276 } 277 278 if (!prio) { 279 /* 280 * Ensure we do the switch to ce[1] on completion. 281 * 282 * rq[0] is already submitted, so this should reduce 283 * to a no-op (a wait on a request on the same engine 284 * uses the submit fence, not the completion fence), 285 * but it will install a dependency on rq[1] for rq[0] 286 * that will prevent the pair being reordered by 287 * timeslicing. 288 */ 289 i915_request_await_dma_fence(rq[1], &rq[0]->fence); 290 } 291 292 i915_request_get(rq[1]); 293 i915_request_add(rq[1]); 294 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); 295 i915_request_put(rq[0]); 296 297 if (prio) { 298 struct i915_sched_attr attr = { 299 .priority = prio, 300 }; 301 302 /* Alternatively preempt the spinner with ce[1] */ 303 engine->schedule(rq[1], &attr); 304 } 305 306 /* And switch back to ce[0] for good measure */ 307 rq[0] = i915_request_create(ce[0]); 308 if (IS_ERR(rq[0])) { 309 err = PTR_ERR(rq[0]); 310 i915_request_put(rq[1]); 311 goto err_ce; 312 } 313 314 i915_request_await_dma_fence(rq[0], &rq[1]->fence); 315 i915_request_get(rq[0]); 316 i915_request_add(rq[0]); 317 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); 318 i915_request_put(rq[1]); 319 i915_request_put(rq[0]); 320 321 err_ce: 322 tasklet_kill(&engine->execlists.tasklet); /* flush submission */ 323 igt_spinner_end(&spin); 324 for (n = 0; n < ARRAY_SIZE(ce); n++) { 325 if (IS_ERR_OR_NULL(ce[n])) 326 break; 327 328 intel_context_unpin(ce[n]); 329 intel_context_put(ce[n]); 330 } 331 332 engine_heartbeat_enable(engine, saved); 333 if (igt_live_test_end(&t)) 334 err = -EIO; 335 if (err) 336 break; 337 } 338 339 igt_spinner_fini(&spin); 340 return err; 341 } 342 343 static int live_unlite_switch(void *arg) 344 { 345 return live_unlite_restore(arg, 0); 346 } 347 348 static int live_unlite_preempt(void *arg) 349 { 350 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); 351 } 352 353 static int live_pin_rewind(void *arg) 354 { 355 struct intel_gt *gt = arg; 356 struct intel_engine_cs *engine; 357 enum intel_engine_id id; 358 int err = 0; 359 360 /* 361 * We have to be careful not to trust intel_ring too much, for example 362 * ring->head is updated upon retire which is out of sync with pinning 363 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, 364 * or else we risk writing an older, stale value. 365 * 366 * To simulate this, let's apply a bit of deliberate sabotague. 367 */ 368 369 for_each_engine(engine, gt, id) { 370 struct intel_context *ce; 371 struct i915_request *rq; 372 struct intel_ring *ring; 373 struct igt_live_test t; 374 375 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 376 err = -EIO; 377 break; 378 } 379 380 ce = intel_context_create(engine); 381 if (IS_ERR(ce)) { 382 err = PTR_ERR(ce); 383 break; 384 } 385 386 err = intel_context_pin(ce); 387 if (err) { 388 intel_context_put(ce); 389 break; 390 } 391 392 /* Keep the context awake while we play games */ 393 err = i915_active_acquire(&ce->active); 394 if (err) { 395 intel_context_unpin(ce); 396 intel_context_put(ce); 397 break; 398 } 399 ring = ce->ring; 400 401 /* Poison the ring, and offset the next request from HEAD */ 402 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); 403 ring->emit = ring->size / 2; 404 ring->tail = ring->emit; 405 GEM_BUG_ON(ring->head); 406 407 intel_context_unpin(ce); 408 409 /* Submit a simple nop request */ 410 GEM_BUG_ON(intel_context_is_pinned(ce)); 411 rq = intel_context_create_request(ce); 412 i915_active_release(&ce->active); /* e.g. async retire */ 413 intel_context_put(ce); 414 if (IS_ERR(rq)) { 415 err = PTR_ERR(rq); 416 break; 417 } 418 GEM_BUG_ON(!rq->head); 419 i915_request_add(rq); 420 421 /* Expect not to hang! */ 422 if (igt_live_test_end(&t)) { 423 err = -EIO; 424 break; 425 } 426 } 427 428 return err; 429 } 430 431 static int live_hold_reset(void *arg) 432 { 433 struct intel_gt *gt = arg; 434 struct intel_engine_cs *engine; 435 enum intel_engine_id id; 436 struct igt_spinner spin; 437 int err = 0; 438 439 /* 440 * In order to support offline error capture for fast preempt reset, 441 * we need to decouple the guilty request and ensure that it and its 442 * descendents are not executed while the capture is in progress. 443 */ 444 445 if (!intel_has_reset_engine(gt)) 446 return 0; 447 448 if (igt_spinner_init(&spin, gt)) 449 return -ENOMEM; 450 451 for_each_engine(engine, gt, id) { 452 struct intel_context *ce; 453 unsigned long heartbeat; 454 struct i915_request *rq; 455 456 ce = intel_context_create(engine); 457 if (IS_ERR(ce)) { 458 err = PTR_ERR(ce); 459 break; 460 } 461 462 engine_heartbeat_disable(engine, &heartbeat); 463 464 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 465 if (IS_ERR(rq)) { 466 err = PTR_ERR(rq); 467 goto out; 468 } 469 i915_request_add(rq); 470 471 if (!igt_wait_for_spinner(&spin, rq)) { 472 intel_gt_set_wedged(gt); 473 err = -ETIME; 474 goto out; 475 } 476 477 /* We have our request executing, now remove it and reset */ 478 479 if (test_and_set_bit(I915_RESET_ENGINE + id, 480 >->reset.flags)) { 481 intel_gt_set_wedged(gt); 482 err = -EBUSY; 483 goto out; 484 } 485 tasklet_disable(&engine->execlists.tasklet); 486 487 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 488 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 489 490 i915_request_get(rq); 491 execlists_hold(engine, rq); 492 GEM_BUG_ON(!i915_request_on_hold(rq)); 493 494 intel_engine_reset(engine, NULL); 495 GEM_BUG_ON(rq->fence.error != -EIO); 496 497 tasklet_enable(&engine->execlists.tasklet); 498 clear_and_wake_up_bit(I915_RESET_ENGINE + id, 499 >->reset.flags); 500 501 /* Check that we do not resubmit the held request */ 502 if (!i915_request_wait(rq, 0, HZ / 5)) { 503 pr_err("%s: on hold request completed!\n", 504 engine->name); 505 i915_request_put(rq); 506 err = -EIO; 507 goto out; 508 } 509 GEM_BUG_ON(!i915_request_on_hold(rq)); 510 511 /* But is resubmitted on release */ 512 execlists_unhold(engine, rq); 513 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 514 pr_err("%s: held request did not complete!\n", 515 engine->name); 516 intel_gt_set_wedged(gt); 517 err = -ETIME; 518 } 519 i915_request_put(rq); 520 521 out: 522 engine_heartbeat_enable(engine, heartbeat); 523 intel_context_put(ce); 524 if (err) 525 break; 526 } 527 528 igt_spinner_fini(&spin); 529 return err; 530 } 531 532 static const char *error_repr(int err) 533 { 534 return err ? "bad" : "good"; 535 } 536 537 static int live_error_interrupt(void *arg) 538 { 539 static const struct error_phase { 540 enum { GOOD = 0, BAD = -EIO } error[2]; 541 } phases[] = { 542 { { BAD, GOOD } }, 543 { { BAD, BAD } }, 544 { { BAD, GOOD } }, 545 { { GOOD, GOOD } }, /* sentinel */ 546 }; 547 struct intel_gt *gt = arg; 548 struct intel_engine_cs *engine; 549 enum intel_engine_id id; 550 551 /* 552 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning 553 * of invalid commands in user batches that will cause a GPU hang. 554 * This is a faster mechanism than using hangcheck/heartbeats, but 555 * only detects problems the HW knows about -- it will not warn when 556 * we kill the HW! 557 * 558 * To verify our detection and reset, we throw some invalid commands 559 * at the HW and wait for the interrupt. 560 */ 561 562 if (!intel_has_reset_engine(gt)) 563 return 0; 564 565 for_each_engine(engine, gt, id) { 566 const struct error_phase *p; 567 unsigned long heartbeat; 568 int err = 0; 569 570 engine_heartbeat_disable(engine, &heartbeat); 571 572 for (p = phases; p->error[0] != GOOD; p++) { 573 struct i915_request *client[ARRAY_SIZE(phases->error)]; 574 u32 *cs; 575 int i; 576 577 memset(client, 0, sizeof(*client)); 578 for (i = 0; i < ARRAY_SIZE(client); i++) { 579 struct intel_context *ce; 580 struct i915_request *rq; 581 582 ce = intel_context_create(engine); 583 if (IS_ERR(ce)) { 584 err = PTR_ERR(ce); 585 goto out; 586 } 587 588 rq = intel_context_create_request(ce); 589 intel_context_put(ce); 590 if (IS_ERR(rq)) { 591 err = PTR_ERR(rq); 592 goto out; 593 } 594 595 if (rq->engine->emit_init_breadcrumb) { 596 err = rq->engine->emit_init_breadcrumb(rq); 597 if (err) { 598 i915_request_add(rq); 599 goto out; 600 } 601 } 602 603 cs = intel_ring_begin(rq, 2); 604 if (IS_ERR(cs)) { 605 i915_request_add(rq); 606 err = PTR_ERR(cs); 607 goto out; 608 } 609 610 if (p->error[i]) { 611 *cs++ = 0xdeadbeef; 612 *cs++ = 0xdeadbeef; 613 } else { 614 *cs++ = MI_NOOP; 615 *cs++ = MI_NOOP; 616 } 617 618 client[i] = i915_request_get(rq); 619 i915_request_add(rq); 620 } 621 622 err = wait_for_submit(engine, client[0], HZ / 2); 623 if (err) { 624 pr_err("%s: first request did not start within time!\n", 625 engine->name); 626 err = -ETIME; 627 goto out; 628 } 629 630 for (i = 0; i < ARRAY_SIZE(client); i++) { 631 if (i915_request_wait(client[i], 0, HZ / 5) < 0) 632 pr_debug("%s: %s request incomplete!\n", 633 engine->name, 634 error_repr(p->error[i])); 635 636 if (!i915_request_started(client[i])) { 637 pr_debug("%s: %s request not stated!\n", 638 engine->name, 639 error_repr(p->error[i])); 640 err = -ETIME; 641 goto out; 642 } 643 644 /* Kick the tasklet to process the error */ 645 intel_engine_flush_submission(engine); 646 if (client[i]->fence.error != p->error[i]) { 647 pr_err("%s: %s request completed with wrong error code: %d\n", 648 engine->name, 649 error_repr(p->error[i]), 650 client[i]->fence.error); 651 err = -EINVAL; 652 goto out; 653 } 654 } 655 656 out: 657 for (i = 0; i < ARRAY_SIZE(client); i++) 658 if (client[i]) 659 i915_request_put(client[i]); 660 if (err) { 661 pr_err("%s: failed at phase[%zd] { %d, %d }\n", 662 engine->name, p - phases, 663 p->error[0], p->error[1]); 664 break; 665 } 666 } 667 668 engine_heartbeat_enable(engine, heartbeat); 669 if (err) { 670 intel_gt_set_wedged(gt); 671 return err; 672 } 673 } 674 675 return 0; 676 } 677 678 static int 679 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) 680 { 681 u32 *cs; 682 683 cs = intel_ring_begin(rq, 10); 684 if (IS_ERR(cs)) 685 return PTR_ERR(cs); 686 687 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 688 689 *cs++ = MI_SEMAPHORE_WAIT | 690 MI_SEMAPHORE_GLOBAL_GTT | 691 MI_SEMAPHORE_POLL | 692 MI_SEMAPHORE_SAD_NEQ_SDD; 693 *cs++ = 0; 694 *cs++ = i915_ggtt_offset(vma) + 4 * idx; 695 *cs++ = 0; 696 697 if (idx > 0) { 698 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 699 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 700 *cs++ = 0; 701 *cs++ = 1; 702 } else { 703 *cs++ = MI_NOOP; 704 *cs++ = MI_NOOP; 705 *cs++ = MI_NOOP; 706 *cs++ = MI_NOOP; 707 } 708 709 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 710 711 intel_ring_advance(rq, cs); 712 return 0; 713 } 714 715 static struct i915_request * 716 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) 717 { 718 struct intel_context *ce; 719 struct i915_request *rq; 720 int err; 721 722 ce = intel_context_create(engine); 723 if (IS_ERR(ce)) 724 return ERR_CAST(ce); 725 726 rq = intel_context_create_request(ce); 727 if (IS_ERR(rq)) 728 goto out_ce; 729 730 err = 0; 731 if (rq->engine->emit_init_breadcrumb) 732 err = rq->engine->emit_init_breadcrumb(rq); 733 if (err == 0) 734 err = emit_semaphore_chain(rq, vma, idx); 735 if (err == 0) 736 i915_request_get(rq); 737 i915_request_add(rq); 738 if (err) 739 rq = ERR_PTR(err); 740 741 out_ce: 742 intel_context_put(ce); 743 return rq; 744 } 745 746 static int 747 release_queue(struct intel_engine_cs *engine, 748 struct i915_vma *vma, 749 int idx, int prio) 750 { 751 struct i915_sched_attr attr = { 752 .priority = prio, 753 }; 754 struct i915_request *rq; 755 u32 *cs; 756 757 rq = intel_engine_create_kernel_request(engine); 758 if (IS_ERR(rq)) 759 return PTR_ERR(rq); 760 761 cs = intel_ring_begin(rq, 4); 762 if (IS_ERR(cs)) { 763 i915_request_add(rq); 764 return PTR_ERR(cs); 765 } 766 767 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 768 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); 769 *cs++ = 0; 770 *cs++ = 1; 771 772 intel_ring_advance(rq, cs); 773 774 i915_request_get(rq); 775 i915_request_add(rq); 776 777 local_bh_disable(); 778 engine->schedule(rq, &attr); 779 local_bh_enable(); /* kick tasklet */ 780 781 i915_request_put(rq); 782 783 return 0; 784 } 785 786 static int 787 slice_semaphore_queue(struct intel_engine_cs *outer, 788 struct i915_vma *vma, 789 int count) 790 { 791 struct intel_engine_cs *engine; 792 struct i915_request *head; 793 enum intel_engine_id id; 794 int err, i, n = 0; 795 796 head = semaphore_queue(outer, vma, n++); 797 if (IS_ERR(head)) 798 return PTR_ERR(head); 799 800 for_each_engine(engine, outer->gt, id) { 801 for (i = 0; i < count; i++) { 802 struct i915_request *rq; 803 804 rq = semaphore_queue(engine, vma, n++); 805 if (IS_ERR(rq)) { 806 err = PTR_ERR(rq); 807 goto out; 808 } 809 810 i915_request_put(rq); 811 } 812 } 813 814 err = release_queue(outer, vma, n, INT_MAX); 815 if (err) 816 goto out; 817 818 if (i915_request_wait(head, 0, 819 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { 820 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", 821 count, n); 822 GEM_TRACE_DUMP(); 823 intel_gt_set_wedged(outer->gt); 824 err = -EIO; 825 } 826 827 out: 828 i915_request_put(head); 829 return err; 830 } 831 832 static int live_timeslice_preempt(void *arg) 833 { 834 struct intel_gt *gt = arg; 835 struct drm_i915_gem_object *obj; 836 struct i915_vma *vma; 837 void *vaddr; 838 int err = 0; 839 int count; 840 841 /* 842 * If a request takes too long, we would like to give other users 843 * a fair go on the GPU. In particular, users may create batches 844 * that wait upon external input, where that input may even be 845 * supplied by another GPU job. To avoid blocking forever, we 846 * need to preempt the current task and replace it with another 847 * ready task. 848 */ 849 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 850 return 0; 851 852 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 853 if (IS_ERR(obj)) 854 return PTR_ERR(obj); 855 856 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 857 if (IS_ERR(vma)) { 858 err = PTR_ERR(vma); 859 goto err_obj; 860 } 861 862 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 863 if (IS_ERR(vaddr)) { 864 err = PTR_ERR(vaddr); 865 goto err_obj; 866 } 867 868 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 869 if (err) 870 goto err_map; 871 872 err = i915_vma_sync(vma); 873 if (err) 874 goto err_pin; 875 876 for_each_prime_number_from(count, 1, 16) { 877 struct intel_engine_cs *engine; 878 enum intel_engine_id id; 879 880 for_each_engine(engine, gt, id) { 881 unsigned long saved; 882 883 if (!intel_engine_has_preemption(engine)) 884 continue; 885 886 memset(vaddr, 0, PAGE_SIZE); 887 888 engine_heartbeat_disable(engine, &saved); 889 err = slice_semaphore_queue(engine, vma, count); 890 engine_heartbeat_enable(engine, saved); 891 if (err) 892 goto err_pin; 893 894 if (igt_flush_test(gt->i915)) { 895 err = -EIO; 896 goto err_pin; 897 } 898 } 899 } 900 901 err_pin: 902 i915_vma_unpin(vma); 903 err_map: 904 i915_gem_object_unpin_map(obj); 905 err_obj: 906 i915_gem_object_put(obj); 907 return err; 908 } 909 910 static struct i915_request * 911 create_rewinder(struct intel_context *ce, 912 struct i915_request *wait, 913 void *slot, int idx) 914 { 915 const u32 offset = 916 i915_ggtt_offset(ce->engine->status_page.vma) + 917 offset_in_page(slot); 918 struct i915_request *rq; 919 u32 *cs; 920 int err; 921 922 rq = intel_context_create_request(ce); 923 if (IS_ERR(rq)) 924 return rq; 925 926 if (wait) { 927 err = i915_request_await_dma_fence(rq, &wait->fence); 928 if (err) 929 goto err; 930 } 931 932 cs = intel_ring_begin(rq, 14); 933 if (IS_ERR(cs)) { 934 err = PTR_ERR(cs); 935 goto err; 936 } 937 938 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 939 *cs++ = MI_NOOP; 940 941 *cs++ = MI_SEMAPHORE_WAIT | 942 MI_SEMAPHORE_GLOBAL_GTT | 943 MI_SEMAPHORE_POLL | 944 MI_SEMAPHORE_SAD_GTE_SDD; 945 *cs++ = idx; 946 *cs++ = offset; 947 *cs++ = 0; 948 949 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 950 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); 951 *cs++ = offset + idx * sizeof(u32); 952 *cs++ = 0; 953 954 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 955 *cs++ = offset; 956 *cs++ = 0; 957 *cs++ = idx + 1; 958 959 intel_ring_advance(rq, cs); 960 961 rq->sched.attr.priority = I915_PRIORITY_MASK; 962 err = 0; 963 err: 964 i915_request_get(rq); 965 i915_request_add(rq); 966 if (err) { 967 i915_request_put(rq); 968 return ERR_PTR(err); 969 } 970 971 return rq; 972 } 973 974 static int live_timeslice_rewind(void *arg) 975 { 976 struct intel_gt *gt = arg; 977 struct intel_engine_cs *engine; 978 enum intel_engine_id id; 979 980 /* 981 * The usual presumption on timeslice expiration is that we replace 982 * the active context with another. However, given a chain of 983 * dependencies we may end up with replacing the context with itself, 984 * but only a few of those requests, forcing us to rewind the 985 * RING_TAIL of the original request. 986 */ 987 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 988 return 0; 989 990 for_each_engine(engine, gt, id) { 991 enum { A1, A2, B1 }; 992 enum { X = 1, Z, Y }; 993 struct i915_request *rq[3] = {}; 994 struct intel_context *ce; 995 unsigned long heartbeat; 996 unsigned long timeslice; 997 int i, err = 0; 998 u32 *slot; 999 1000 if (!intel_engine_has_timeslices(engine)) 1001 continue; 1002 1003 /* 1004 * A:rq1 -- semaphore wait, timestamp X 1005 * A:rq2 -- write timestamp Y 1006 * 1007 * B:rq1 [await A:rq1] -- write timestamp Z 1008 * 1009 * Force timeslice, release semaphore. 1010 * 1011 * Expect execution/evaluation order XZY 1012 */ 1013 1014 engine_heartbeat_disable(engine, &heartbeat); 1015 timeslice = xchg(&engine->props.timeslice_duration_ms, 1); 1016 1017 slot = memset32(engine->status_page.addr + 1000, 0, 4); 1018 1019 ce = intel_context_create(engine); 1020 if (IS_ERR(ce)) { 1021 err = PTR_ERR(ce); 1022 goto err; 1023 } 1024 1025 rq[0] = create_rewinder(ce, NULL, slot, X); 1026 if (IS_ERR(rq[0])) { 1027 intel_context_put(ce); 1028 goto err; 1029 } 1030 1031 rq[1] = create_rewinder(ce, NULL, slot, Y); 1032 intel_context_put(ce); 1033 if (IS_ERR(rq[1])) 1034 goto err; 1035 1036 err = wait_for_submit(engine, rq[1], HZ / 2); 1037 if (err) { 1038 pr_err("%s: failed to submit first context\n", 1039 engine->name); 1040 goto err; 1041 } 1042 1043 ce = intel_context_create(engine); 1044 if (IS_ERR(ce)) { 1045 err = PTR_ERR(ce); 1046 goto err; 1047 } 1048 1049 rq[2] = create_rewinder(ce, rq[0], slot, Z); 1050 intel_context_put(ce); 1051 if (IS_ERR(rq[2])) 1052 goto err; 1053 1054 err = wait_for_submit(engine, rq[2], HZ / 2); 1055 if (err) { 1056 pr_err("%s: failed to submit second context\n", 1057 engine->name); 1058 goto err; 1059 } 1060 GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); 1061 1062 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ 1063 if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ 1064 /* Wait for the timeslice to kick in */ 1065 del_timer(&engine->execlists.timer); 1066 tasklet_hi_schedule(&engine->execlists.tasklet); 1067 intel_engine_flush_submission(engine); 1068 } 1069 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ 1070 GEM_BUG_ON(!i915_request_is_active(rq[A1])); 1071 GEM_BUG_ON(!i915_request_is_active(rq[B1])); 1072 GEM_BUG_ON(i915_request_is_active(rq[A2])); 1073 1074 /* Release the hounds! */ 1075 slot[0] = 1; 1076 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ 1077 1078 for (i = 1; i <= 3; i++) { 1079 unsigned long timeout = jiffies + HZ / 2; 1080 1081 while (!READ_ONCE(slot[i]) && 1082 time_before(jiffies, timeout)) 1083 ; 1084 1085 if (!time_before(jiffies, timeout)) { 1086 pr_err("%s: rq[%d] timed out\n", 1087 engine->name, i - 1); 1088 err = -ETIME; 1089 goto err; 1090 } 1091 1092 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); 1093 } 1094 1095 /* XZY: XZ < XY */ 1096 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { 1097 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", 1098 engine->name, 1099 slot[Z] - slot[X], 1100 slot[Y] - slot[X]); 1101 err = -EINVAL; 1102 } 1103 1104 err: 1105 memset32(&slot[0], -1, 4); 1106 wmb(); 1107 1108 engine->props.timeslice_duration_ms = timeslice; 1109 engine_heartbeat_enable(engine, heartbeat); 1110 for (i = 0; i < 3; i++) 1111 i915_request_put(rq[i]); 1112 if (igt_flush_test(gt->i915)) 1113 err = -EIO; 1114 if (err) 1115 return err; 1116 } 1117 1118 return 0; 1119 } 1120 1121 static struct i915_request *nop_request(struct intel_engine_cs *engine) 1122 { 1123 struct i915_request *rq; 1124 1125 rq = intel_engine_create_kernel_request(engine); 1126 if (IS_ERR(rq)) 1127 return rq; 1128 1129 i915_request_get(rq); 1130 i915_request_add(rq); 1131 1132 return rq; 1133 } 1134 1135 static long timeslice_threshold(const struct intel_engine_cs *engine) 1136 { 1137 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1; 1138 } 1139 1140 static int live_timeslice_queue(void *arg) 1141 { 1142 struct intel_gt *gt = arg; 1143 struct drm_i915_gem_object *obj; 1144 struct intel_engine_cs *engine; 1145 enum intel_engine_id id; 1146 struct i915_vma *vma; 1147 void *vaddr; 1148 int err = 0; 1149 1150 /* 1151 * Make sure that even if ELSP[0] and ELSP[1] are filled with 1152 * timeslicing between them disabled, we *do* enable timeslicing 1153 * if the queue demands it. (Normally, we do not submit if 1154 * ELSP[1] is already occupied, so must rely on timeslicing to 1155 * eject ELSP[0] in favour of the queue.) 1156 */ 1157 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 1158 return 0; 1159 1160 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1161 if (IS_ERR(obj)) 1162 return PTR_ERR(obj); 1163 1164 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1165 if (IS_ERR(vma)) { 1166 err = PTR_ERR(vma); 1167 goto err_obj; 1168 } 1169 1170 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 1171 if (IS_ERR(vaddr)) { 1172 err = PTR_ERR(vaddr); 1173 goto err_obj; 1174 } 1175 1176 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1177 if (err) 1178 goto err_map; 1179 1180 err = i915_vma_sync(vma); 1181 if (err) 1182 goto err_pin; 1183 1184 for_each_engine(engine, gt, id) { 1185 struct i915_sched_attr attr = { 1186 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 1187 }; 1188 struct i915_request *rq, *nop; 1189 unsigned long saved; 1190 1191 if (!intel_engine_has_preemption(engine)) 1192 continue; 1193 1194 engine_heartbeat_disable(engine, &saved); 1195 memset(vaddr, 0, PAGE_SIZE); 1196 1197 /* ELSP[0]: semaphore wait */ 1198 rq = semaphore_queue(engine, vma, 0); 1199 if (IS_ERR(rq)) { 1200 err = PTR_ERR(rq); 1201 goto err_heartbeat; 1202 } 1203 engine->schedule(rq, &attr); 1204 err = wait_for_submit(engine, rq, HZ / 2); 1205 if (err) { 1206 pr_err("%s: Timed out trying to submit semaphores\n", 1207 engine->name); 1208 goto err_rq; 1209 } 1210 1211 /* ELSP[1]: nop request */ 1212 nop = nop_request(engine); 1213 if (IS_ERR(nop)) { 1214 err = PTR_ERR(nop); 1215 goto err_rq; 1216 } 1217 err = wait_for_submit(engine, nop, HZ / 2); 1218 i915_request_put(nop); 1219 if (err) { 1220 pr_err("%s: Timed out trying to submit nop\n", 1221 engine->name); 1222 goto err_rq; 1223 } 1224 1225 GEM_BUG_ON(i915_request_completed(rq)); 1226 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 1227 1228 /* Queue: semaphore signal, matching priority as semaphore */ 1229 err = release_queue(engine, vma, 1, effective_prio(rq)); 1230 if (err) 1231 goto err_rq; 1232 1233 intel_engine_flush_submission(engine); 1234 if (!READ_ONCE(engine->execlists.timer.expires) && 1235 !i915_request_completed(rq)) { 1236 struct drm_printer p = 1237 drm_info_printer(gt->i915->drm.dev); 1238 1239 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", 1240 engine->name); 1241 intel_engine_dump(engine, &p, 1242 "%s\n", engine->name); 1243 GEM_TRACE_DUMP(); 1244 1245 memset(vaddr, 0xff, PAGE_SIZE); 1246 err = -EINVAL; 1247 } 1248 1249 /* Timeslice every jiffy, so within 2 we should signal */ 1250 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) { 1251 struct drm_printer p = 1252 drm_info_printer(gt->i915->drm.dev); 1253 1254 pr_err("%s: Failed to timeslice into queue\n", 1255 engine->name); 1256 intel_engine_dump(engine, &p, 1257 "%s\n", engine->name); 1258 1259 memset(vaddr, 0xff, PAGE_SIZE); 1260 err = -EIO; 1261 } 1262 err_rq: 1263 i915_request_put(rq); 1264 err_heartbeat: 1265 engine_heartbeat_enable(engine, saved); 1266 if (err) 1267 break; 1268 } 1269 1270 err_pin: 1271 i915_vma_unpin(vma); 1272 err_map: 1273 i915_gem_object_unpin_map(obj); 1274 err_obj: 1275 i915_gem_object_put(obj); 1276 return err; 1277 } 1278 1279 static int live_busywait_preempt(void *arg) 1280 { 1281 struct intel_gt *gt = arg; 1282 struct i915_gem_context *ctx_hi, *ctx_lo; 1283 struct intel_engine_cs *engine; 1284 struct drm_i915_gem_object *obj; 1285 struct i915_vma *vma; 1286 enum intel_engine_id id; 1287 int err = -ENOMEM; 1288 u32 *map; 1289 1290 /* 1291 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can 1292 * preempt the busywaits used to synchronise between rings. 1293 */ 1294 1295 ctx_hi = kernel_context(gt->i915); 1296 if (!ctx_hi) 1297 return -ENOMEM; 1298 ctx_hi->sched.priority = 1299 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1300 1301 ctx_lo = kernel_context(gt->i915); 1302 if (!ctx_lo) 1303 goto err_ctx_hi; 1304 ctx_lo->sched.priority = 1305 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1306 1307 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 1308 if (IS_ERR(obj)) { 1309 err = PTR_ERR(obj); 1310 goto err_ctx_lo; 1311 } 1312 1313 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 1314 if (IS_ERR(map)) { 1315 err = PTR_ERR(map); 1316 goto err_obj; 1317 } 1318 1319 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 1320 if (IS_ERR(vma)) { 1321 err = PTR_ERR(vma); 1322 goto err_map; 1323 } 1324 1325 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1326 if (err) 1327 goto err_map; 1328 1329 err = i915_vma_sync(vma); 1330 if (err) 1331 goto err_vma; 1332 1333 for_each_engine(engine, gt, id) { 1334 struct i915_request *lo, *hi; 1335 struct igt_live_test t; 1336 u32 *cs; 1337 1338 if (!intel_engine_has_preemption(engine)) 1339 continue; 1340 1341 if (!intel_engine_can_store_dword(engine)) 1342 continue; 1343 1344 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1345 err = -EIO; 1346 goto err_vma; 1347 } 1348 1349 /* 1350 * We create two requests. The low priority request 1351 * busywaits on a semaphore (inside the ringbuffer where 1352 * is should be preemptible) and the high priority requests 1353 * uses a MI_STORE_DWORD_IMM to update the semaphore value 1354 * allowing the first request to complete. If preemption 1355 * fails, we hang instead. 1356 */ 1357 1358 lo = igt_request_alloc(ctx_lo, engine); 1359 if (IS_ERR(lo)) { 1360 err = PTR_ERR(lo); 1361 goto err_vma; 1362 } 1363 1364 cs = intel_ring_begin(lo, 8); 1365 if (IS_ERR(cs)) { 1366 err = PTR_ERR(cs); 1367 i915_request_add(lo); 1368 goto err_vma; 1369 } 1370 1371 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1372 *cs++ = i915_ggtt_offset(vma); 1373 *cs++ = 0; 1374 *cs++ = 1; 1375 1376 /* XXX Do we need a flush + invalidate here? */ 1377 1378 *cs++ = MI_SEMAPHORE_WAIT | 1379 MI_SEMAPHORE_GLOBAL_GTT | 1380 MI_SEMAPHORE_POLL | 1381 MI_SEMAPHORE_SAD_EQ_SDD; 1382 *cs++ = 0; 1383 *cs++ = i915_ggtt_offset(vma); 1384 *cs++ = 0; 1385 1386 intel_ring_advance(lo, cs); 1387 1388 i915_request_get(lo); 1389 i915_request_add(lo); 1390 1391 if (wait_for(READ_ONCE(*map), 10)) { 1392 i915_request_put(lo); 1393 err = -ETIMEDOUT; 1394 goto err_vma; 1395 } 1396 1397 /* Low priority request should be busywaiting now */ 1398 if (i915_request_wait(lo, 0, 1) != -ETIME) { 1399 i915_request_put(lo); 1400 pr_err("%s: Busywaiting request did not!\n", 1401 engine->name); 1402 err = -EIO; 1403 goto err_vma; 1404 } 1405 1406 hi = igt_request_alloc(ctx_hi, engine); 1407 if (IS_ERR(hi)) { 1408 err = PTR_ERR(hi); 1409 i915_request_put(lo); 1410 goto err_vma; 1411 } 1412 1413 cs = intel_ring_begin(hi, 4); 1414 if (IS_ERR(cs)) { 1415 err = PTR_ERR(cs); 1416 i915_request_add(hi); 1417 i915_request_put(lo); 1418 goto err_vma; 1419 } 1420 1421 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1422 *cs++ = i915_ggtt_offset(vma); 1423 *cs++ = 0; 1424 *cs++ = 0; 1425 1426 intel_ring_advance(hi, cs); 1427 i915_request_add(hi); 1428 1429 if (i915_request_wait(lo, 0, HZ / 5) < 0) { 1430 struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1431 1432 pr_err("%s: Failed to preempt semaphore busywait!\n", 1433 engine->name); 1434 1435 intel_engine_dump(engine, &p, "%s\n", engine->name); 1436 GEM_TRACE_DUMP(); 1437 1438 i915_request_put(lo); 1439 intel_gt_set_wedged(gt); 1440 err = -EIO; 1441 goto err_vma; 1442 } 1443 GEM_BUG_ON(READ_ONCE(*map)); 1444 i915_request_put(lo); 1445 1446 if (igt_live_test_end(&t)) { 1447 err = -EIO; 1448 goto err_vma; 1449 } 1450 } 1451 1452 err = 0; 1453 err_vma: 1454 i915_vma_unpin(vma); 1455 err_map: 1456 i915_gem_object_unpin_map(obj); 1457 err_obj: 1458 i915_gem_object_put(obj); 1459 err_ctx_lo: 1460 kernel_context_close(ctx_lo); 1461 err_ctx_hi: 1462 kernel_context_close(ctx_hi); 1463 return err; 1464 } 1465 1466 static struct i915_request * 1467 spinner_create_request(struct igt_spinner *spin, 1468 struct i915_gem_context *ctx, 1469 struct intel_engine_cs *engine, 1470 u32 arb) 1471 { 1472 struct intel_context *ce; 1473 struct i915_request *rq; 1474 1475 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 1476 if (IS_ERR(ce)) 1477 return ERR_CAST(ce); 1478 1479 rq = igt_spinner_create_request(spin, ce, arb); 1480 intel_context_put(ce); 1481 return rq; 1482 } 1483 1484 static int live_preempt(void *arg) 1485 { 1486 struct intel_gt *gt = arg; 1487 struct i915_gem_context *ctx_hi, *ctx_lo; 1488 struct igt_spinner spin_hi, spin_lo; 1489 struct intel_engine_cs *engine; 1490 enum intel_engine_id id; 1491 int err = -ENOMEM; 1492 1493 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1494 return 0; 1495 1496 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) 1497 pr_err("Logical preemption supported, but not exposed\n"); 1498 1499 if (igt_spinner_init(&spin_hi, gt)) 1500 return -ENOMEM; 1501 1502 if (igt_spinner_init(&spin_lo, gt)) 1503 goto err_spin_hi; 1504 1505 ctx_hi = kernel_context(gt->i915); 1506 if (!ctx_hi) 1507 goto err_spin_lo; 1508 ctx_hi->sched.priority = 1509 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 1510 1511 ctx_lo = kernel_context(gt->i915); 1512 if (!ctx_lo) 1513 goto err_ctx_hi; 1514 ctx_lo->sched.priority = 1515 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 1516 1517 for_each_engine(engine, gt, id) { 1518 struct igt_live_test t; 1519 struct i915_request *rq; 1520 1521 if (!intel_engine_has_preemption(engine)) 1522 continue; 1523 1524 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1525 err = -EIO; 1526 goto err_ctx_lo; 1527 } 1528 1529 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1530 MI_ARB_CHECK); 1531 if (IS_ERR(rq)) { 1532 err = PTR_ERR(rq); 1533 goto err_ctx_lo; 1534 } 1535 1536 i915_request_add(rq); 1537 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1538 GEM_TRACE("lo spinner failed to start\n"); 1539 GEM_TRACE_DUMP(); 1540 intel_gt_set_wedged(gt); 1541 err = -EIO; 1542 goto err_ctx_lo; 1543 } 1544 1545 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1546 MI_ARB_CHECK); 1547 if (IS_ERR(rq)) { 1548 igt_spinner_end(&spin_lo); 1549 err = PTR_ERR(rq); 1550 goto err_ctx_lo; 1551 } 1552 1553 i915_request_add(rq); 1554 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1555 GEM_TRACE("hi spinner failed to start\n"); 1556 GEM_TRACE_DUMP(); 1557 intel_gt_set_wedged(gt); 1558 err = -EIO; 1559 goto err_ctx_lo; 1560 } 1561 1562 igt_spinner_end(&spin_hi); 1563 igt_spinner_end(&spin_lo); 1564 1565 if (igt_live_test_end(&t)) { 1566 err = -EIO; 1567 goto err_ctx_lo; 1568 } 1569 } 1570 1571 err = 0; 1572 err_ctx_lo: 1573 kernel_context_close(ctx_lo); 1574 err_ctx_hi: 1575 kernel_context_close(ctx_hi); 1576 err_spin_lo: 1577 igt_spinner_fini(&spin_lo); 1578 err_spin_hi: 1579 igt_spinner_fini(&spin_hi); 1580 return err; 1581 } 1582 1583 static int live_late_preempt(void *arg) 1584 { 1585 struct intel_gt *gt = arg; 1586 struct i915_gem_context *ctx_hi, *ctx_lo; 1587 struct igt_spinner spin_hi, spin_lo; 1588 struct intel_engine_cs *engine; 1589 struct i915_sched_attr attr = {}; 1590 enum intel_engine_id id; 1591 int err = -ENOMEM; 1592 1593 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1594 return 0; 1595 1596 if (igt_spinner_init(&spin_hi, gt)) 1597 return -ENOMEM; 1598 1599 if (igt_spinner_init(&spin_lo, gt)) 1600 goto err_spin_hi; 1601 1602 ctx_hi = kernel_context(gt->i915); 1603 if (!ctx_hi) 1604 goto err_spin_lo; 1605 1606 ctx_lo = kernel_context(gt->i915); 1607 if (!ctx_lo) 1608 goto err_ctx_hi; 1609 1610 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ 1611 ctx_lo->sched.priority = I915_USER_PRIORITY(1); 1612 1613 for_each_engine(engine, gt, id) { 1614 struct igt_live_test t; 1615 struct i915_request *rq; 1616 1617 if (!intel_engine_has_preemption(engine)) 1618 continue; 1619 1620 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 1621 err = -EIO; 1622 goto err_ctx_lo; 1623 } 1624 1625 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 1626 MI_ARB_CHECK); 1627 if (IS_ERR(rq)) { 1628 err = PTR_ERR(rq); 1629 goto err_ctx_lo; 1630 } 1631 1632 i915_request_add(rq); 1633 if (!igt_wait_for_spinner(&spin_lo, rq)) { 1634 pr_err("First context failed to start\n"); 1635 goto err_wedged; 1636 } 1637 1638 rq = spinner_create_request(&spin_hi, ctx_hi, engine, 1639 MI_NOOP); 1640 if (IS_ERR(rq)) { 1641 igt_spinner_end(&spin_lo); 1642 err = PTR_ERR(rq); 1643 goto err_ctx_lo; 1644 } 1645 1646 i915_request_add(rq); 1647 if (igt_wait_for_spinner(&spin_hi, rq)) { 1648 pr_err("Second context overtook first?\n"); 1649 goto err_wedged; 1650 } 1651 1652 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1653 engine->schedule(rq, &attr); 1654 1655 if (!igt_wait_for_spinner(&spin_hi, rq)) { 1656 pr_err("High priority context failed to preempt the low priority context\n"); 1657 GEM_TRACE_DUMP(); 1658 goto err_wedged; 1659 } 1660 1661 igt_spinner_end(&spin_hi); 1662 igt_spinner_end(&spin_lo); 1663 1664 if (igt_live_test_end(&t)) { 1665 err = -EIO; 1666 goto err_ctx_lo; 1667 } 1668 } 1669 1670 err = 0; 1671 err_ctx_lo: 1672 kernel_context_close(ctx_lo); 1673 err_ctx_hi: 1674 kernel_context_close(ctx_hi); 1675 err_spin_lo: 1676 igt_spinner_fini(&spin_lo); 1677 err_spin_hi: 1678 igt_spinner_fini(&spin_hi); 1679 return err; 1680 1681 err_wedged: 1682 igt_spinner_end(&spin_hi); 1683 igt_spinner_end(&spin_lo); 1684 intel_gt_set_wedged(gt); 1685 err = -EIO; 1686 goto err_ctx_lo; 1687 } 1688 1689 struct preempt_client { 1690 struct igt_spinner spin; 1691 struct i915_gem_context *ctx; 1692 }; 1693 1694 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) 1695 { 1696 c->ctx = kernel_context(gt->i915); 1697 if (!c->ctx) 1698 return -ENOMEM; 1699 1700 if (igt_spinner_init(&c->spin, gt)) 1701 goto err_ctx; 1702 1703 return 0; 1704 1705 err_ctx: 1706 kernel_context_close(c->ctx); 1707 return -ENOMEM; 1708 } 1709 1710 static void preempt_client_fini(struct preempt_client *c) 1711 { 1712 igt_spinner_fini(&c->spin); 1713 kernel_context_close(c->ctx); 1714 } 1715 1716 static int live_nopreempt(void *arg) 1717 { 1718 struct intel_gt *gt = arg; 1719 struct intel_engine_cs *engine; 1720 struct preempt_client a, b; 1721 enum intel_engine_id id; 1722 int err = -ENOMEM; 1723 1724 /* 1725 * Verify that we can disable preemption for an individual request 1726 * that may be being observed and not want to be interrupted. 1727 */ 1728 1729 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 1730 return 0; 1731 1732 if (preempt_client_init(gt, &a)) 1733 return -ENOMEM; 1734 if (preempt_client_init(gt, &b)) 1735 goto err_client_a; 1736 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); 1737 1738 for_each_engine(engine, gt, id) { 1739 struct i915_request *rq_a, *rq_b; 1740 1741 if (!intel_engine_has_preemption(engine)) 1742 continue; 1743 1744 engine->execlists.preempt_hang.count = 0; 1745 1746 rq_a = spinner_create_request(&a.spin, 1747 a.ctx, engine, 1748 MI_ARB_CHECK); 1749 if (IS_ERR(rq_a)) { 1750 err = PTR_ERR(rq_a); 1751 goto err_client_b; 1752 } 1753 1754 /* Low priority client, but unpreemptable! */ 1755 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); 1756 1757 i915_request_add(rq_a); 1758 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 1759 pr_err("First client failed to start\n"); 1760 goto err_wedged; 1761 } 1762 1763 rq_b = spinner_create_request(&b.spin, 1764 b.ctx, engine, 1765 MI_ARB_CHECK); 1766 if (IS_ERR(rq_b)) { 1767 err = PTR_ERR(rq_b); 1768 goto err_client_b; 1769 } 1770 1771 i915_request_add(rq_b); 1772 1773 /* B is much more important than A! (But A is unpreemptable.) */ 1774 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); 1775 1776 /* Wait long enough for preemption and timeslicing */ 1777 if (igt_wait_for_spinner(&b.spin, rq_b)) { 1778 pr_err("Second client started too early!\n"); 1779 goto err_wedged; 1780 } 1781 1782 igt_spinner_end(&a.spin); 1783 1784 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 1785 pr_err("Second client failed to start\n"); 1786 goto err_wedged; 1787 } 1788 1789 igt_spinner_end(&b.spin); 1790 1791 if (engine->execlists.preempt_hang.count) { 1792 pr_err("Preemption recorded x%d; should have been suppressed!\n", 1793 engine->execlists.preempt_hang.count); 1794 err = -EINVAL; 1795 goto err_wedged; 1796 } 1797 1798 if (igt_flush_test(gt->i915)) 1799 goto err_wedged; 1800 } 1801 1802 err = 0; 1803 err_client_b: 1804 preempt_client_fini(&b); 1805 err_client_a: 1806 preempt_client_fini(&a); 1807 return err; 1808 1809 err_wedged: 1810 igt_spinner_end(&b.spin); 1811 igt_spinner_end(&a.spin); 1812 intel_gt_set_wedged(gt); 1813 err = -EIO; 1814 goto err_client_b; 1815 } 1816 1817 struct live_preempt_cancel { 1818 struct intel_engine_cs *engine; 1819 struct preempt_client a, b; 1820 }; 1821 1822 static int __cancel_active0(struct live_preempt_cancel *arg) 1823 { 1824 struct i915_request *rq; 1825 struct igt_live_test t; 1826 int err; 1827 1828 /* Preempt cancel of ELSP0 */ 1829 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1830 if (igt_live_test_begin(&t, arg->engine->i915, 1831 __func__, arg->engine->name)) 1832 return -EIO; 1833 1834 rq = spinner_create_request(&arg->a.spin, 1835 arg->a.ctx, arg->engine, 1836 MI_ARB_CHECK); 1837 if (IS_ERR(rq)) 1838 return PTR_ERR(rq); 1839 1840 clear_bit(CONTEXT_BANNED, &rq->context->flags); 1841 i915_request_get(rq); 1842 i915_request_add(rq); 1843 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 1844 err = -EIO; 1845 goto out; 1846 } 1847 1848 intel_context_set_banned(rq->context); 1849 err = intel_engine_pulse(arg->engine); 1850 if (err) 1851 goto out; 1852 1853 err = wait_for_reset(arg->engine, rq, HZ / 2); 1854 if (err) { 1855 pr_err("Cancelled inflight0 request did not reset\n"); 1856 goto out; 1857 } 1858 1859 out: 1860 i915_request_put(rq); 1861 if (igt_live_test_end(&t)) 1862 err = -EIO; 1863 return err; 1864 } 1865 1866 static int __cancel_active1(struct live_preempt_cancel *arg) 1867 { 1868 struct i915_request *rq[2] = {}; 1869 struct igt_live_test t; 1870 int err; 1871 1872 /* Preempt cancel of ELSP1 */ 1873 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1874 if (igt_live_test_begin(&t, arg->engine->i915, 1875 __func__, arg->engine->name)) 1876 return -EIO; 1877 1878 rq[0] = spinner_create_request(&arg->a.spin, 1879 arg->a.ctx, arg->engine, 1880 MI_NOOP); /* no preemption */ 1881 if (IS_ERR(rq[0])) 1882 return PTR_ERR(rq[0]); 1883 1884 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1885 i915_request_get(rq[0]); 1886 i915_request_add(rq[0]); 1887 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1888 err = -EIO; 1889 goto out; 1890 } 1891 1892 rq[1] = spinner_create_request(&arg->b.spin, 1893 arg->b.ctx, arg->engine, 1894 MI_ARB_CHECK); 1895 if (IS_ERR(rq[1])) { 1896 err = PTR_ERR(rq[1]); 1897 goto out; 1898 } 1899 1900 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1901 i915_request_get(rq[1]); 1902 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1903 i915_request_add(rq[1]); 1904 if (err) 1905 goto out; 1906 1907 intel_context_set_banned(rq[1]->context); 1908 err = intel_engine_pulse(arg->engine); 1909 if (err) 1910 goto out; 1911 1912 igt_spinner_end(&arg->a.spin); 1913 err = wait_for_reset(arg->engine, rq[1], HZ / 2); 1914 if (err) 1915 goto out; 1916 1917 if (rq[0]->fence.error != 0) { 1918 pr_err("Normal inflight0 request did not complete\n"); 1919 err = -EINVAL; 1920 goto out; 1921 } 1922 1923 if (rq[1]->fence.error != -EIO) { 1924 pr_err("Cancelled inflight1 request did not report -EIO\n"); 1925 err = -EINVAL; 1926 goto out; 1927 } 1928 1929 out: 1930 i915_request_put(rq[1]); 1931 i915_request_put(rq[0]); 1932 if (igt_live_test_end(&t)) 1933 err = -EIO; 1934 return err; 1935 } 1936 1937 static int __cancel_queued(struct live_preempt_cancel *arg) 1938 { 1939 struct i915_request *rq[3] = {}; 1940 struct igt_live_test t; 1941 int err; 1942 1943 /* Full ELSP and one in the wings */ 1944 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 1945 if (igt_live_test_begin(&t, arg->engine->i915, 1946 __func__, arg->engine->name)) 1947 return -EIO; 1948 1949 rq[0] = spinner_create_request(&arg->a.spin, 1950 arg->a.ctx, arg->engine, 1951 MI_ARB_CHECK); 1952 if (IS_ERR(rq[0])) 1953 return PTR_ERR(rq[0]); 1954 1955 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); 1956 i915_request_get(rq[0]); 1957 i915_request_add(rq[0]); 1958 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { 1959 err = -EIO; 1960 goto out; 1961 } 1962 1963 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); 1964 if (IS_ERR(rq[1])) { 1965 err = PTR_ERR(rq[1]); 1966 goto out; 1967 } 1968 1969 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); 1970 i915_request_get(rq[1]); 1971 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); 1972 i915_request_add(rq[1]); 1973 if (err) 1974 goto out; 1975 1976 rq[2] = spinner_create_request(&arg->b.spin, 1977 arg->a.ctx, arg->engine, 1978 MI_ARB_CHECK); 1979 if (IS_ERR(rq[2])) { 1980 err = PTR_ERR(rq[2]); 1981 goto out; 1982 } 1983 1984 i915_request_get(rq[2]); 1985 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); 1986 i915_request_add(rq[2]); 1987 if (err) 1988 goto out; 1989 1990 intel_context_set_banned(rq[2]->context); 1991 err = intel_engine_pulse(arg->engine); 1992 if (err) 1993 goto out; 1994 1995 err = wait_for_reset(arg->engine, rq[2], HZ / 2); 1996 if (err) 1997 goto out; 1998 1999 if (rq[0]->fence.error != -EIO) { 2000 pr_err("Cancelled inflight0 request did not report -EIO\n"); 2001 err = -EINVAL; 2002 goto out; 2003 } 2004 2005 if (rq[1]->fence.error != 0) { 2006 pr_err("Normal inflight1 request did not complete\n"); 2007 err = -EINVAL; 2008 goto out; 2009 } 2010 2011 if (rq[2]->fence.error != -EIO) { 2012 pr_err("Cancelled queued request did not report -EIO\n"); 2013 err = -EINVAL; 2014 goto out; 2015 } 2016 2017 out: 2018 i915_request_put(rq[2]); 2019 i915_request_put(rq[1]); 2020 i915_request_put(rq[0]); 2021 if (igt_live_test_end(&t)) 2022 err = -EIO; 2023 return err; 2024 } 2025 2026 static int __cancel_hostile(struct live_preempt_cancel *arg) 2027 { 2028 struct i915_request *rq; 2029 int err; 2030 2031 /* Preempt cancel non-preemptible spinner in ELSP0 */ 2032 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2033 return 0; 2034 2035 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); 2036 rq = spinner_create_request(&arg->a.spin, 2037 arg->a.ctx, arg->engine, 2038 MI_NOOP); /* preemption disabled */ 2039 if (IS_ERR(rq)) 2040 return PTR_ERR(rq); 2041 2042 clear_bit(CONTEXT_BANNED, &rq->context->flags); 2043 i915_request_get(rq); 2044 i915_request_add(rq); 2045 if (!igt_wait_for_spinner(&arg->a.spin, rq)) { 2046 err = -EIO; 2047 goto out; 2048 } 2049 2050 intel_context_set_banned(rq->context); 2051 err = intel_engine_pulse(arg->engine); /* force reset */ 2052 if (err) 2053 goto out; 2054 2055 err = wait_for_reset(arg->engine, rq, HZ / 2); 2056 if (err) { 2057 pr_err("Cancelled inflight0 request did not reset\n"); 2058 goto out; 2059 } 2060 2061 out: 2062 i915_request_put(rq); 2063 if (igt_flush_test(arg->engine->i915)) 2064 err = -EIO; 2065 return err; 2066 } 2067 2068 static int live_preempt_cancel(void *arg) 2069 { 2070 struct intel_gt *gt = arg; 2071 struct live_preempt_cancel data; 2072 enum intel_engine_id id; 2073 int err = -ENOMEM; 2074 2075 /* 2076 * To cancel an inflight context, we need to first remove it from the 2077 * GPU. That sounds like preemption! Plus a little bit of bookkeeping. 2078 */ 2079 2080 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2081 return 0; 2082 2083 if (preempt_client_init(gt, &data.a)) 2084 return -ENOMEM; 2085 if (preempt_client_init(gt, &data.b)) 2086 goto err_client_a; 2087 2088 for_each_engine(data.engine, gt, id) { 2089 if (!intel_engine_has_preemption(data.engine)) 2090 continue; 2091 2092 err = __cancel_active0(&data); 2093 if (err) 2094 goto err_wedged; 2095 2096 err = __cancel_active1(&data); 2097 if (err) 2098 goto err_wedged; 2099 2100 err = __cancel_queued(&data); 2101 if (err) 2102 goto err_wedged; 2103 2104 err = __cancel_hostile(&data); 2105 if (err) 2106 goto err_wedged; 2107 } 2108 2109 err = 0; 2110 err_client_b: 2111 preempt_client_fini(&data.b); 2112 err_client_a: 2113 preempt_client_fini(&data.a); 2114 return err; 2115 2116 err_wedged: 2117 GEM_TRACE_DUMP(); 2118 igt_spinner_end(&data.b.spin); 2119 igt_spinner_end(&data.a.spin); 2120 intel_gt_set_wedged(gt); 2121 goto err_client_b; 2122 } 2123 2124 static int live_suppress_self_preempt(void *arg) 2125 { 2126 struct intel_gt *gt = arg; 2127 struct intel_engine_cs *engine; 2128 struct i915_sched_attr attr = { 2129 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) 2130 }; 2131 struct preempt_client a, b; 2132 enum intel_engine_id id; 2133 int err = -ENOMEM; 2134 2135 /* 2136 * Verify that if a preemption request does not cause a change in 2137 * the current execution order, the preempt-to-idle injection is 2138 * skipped and that we do not accidentally apply it after the CS 2139 * completion event. 2140 */ 2141 2142 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2143 return 0; 2144 2145 if (intel_uc_uses_guc_submission(>->uc)) 2146 return 0; /* presume black blox */ 2147 2148 if (intel_vgpu_active(gt->i915)) 2149 return 0; /* GVT forces single port & request submission */ 2150 2151 if (preempt_client_init(gt, &a)) 2152 return -ENOMEM; 2153 if (preempt_client_init(gt, &b)) 2154 goto err_client_a; 2155 2156 for_each_engine(engine, gt, id) { 2157 struct i915_request *rq_a, *rq_b; 2158 int depth; 2159 2160 if (!intel_engine_has_preemption(engine)) 2161 continue; 2162 2163 if (igt_flush_test(gt->i915)) 2164 goto err_wedged; 2165 2166 intel_engine_pm_get(engine); 2167 engine->execlists.preempt_hang.count = 0; 2168 2169 rq_a = spinner_create_request(&a.spin, 2170 a.ctx, engine, 2171 MI_NOOP); 2172 if (IS_ERR(rq_a)) { 2173 err = PTR_ERR(rq_a); 2174 intel_engine_pm_put(engine); 2175 goto err_client_b; 2176 } 2177 2178 i915_request_add(rq_a); 2179 if (!igt_wait_for_spinner(&a.spin, rq_a)) { 2180 pr_err("First client failed to start\n"); 2181 intel_engine_pm_put(engine); 2182 goto err_wedged; 2183 } 2184 2185 /* Keep postponing the timer to avoid premature slicing */ 2186 mod_timer(&engine->execlists.timer, jiffies + HZ); 2187 for (depth = 0; depth < 8; depth++) { 2188 rq_b = spinner_create_request(&b.spin, 2189 b.ctx, engine, 2190 MI_NOOP); 2191 if (IS_ERR(rq_b)) { 2192 err = PTR_ERR(rq_b); 2193 intel_engine_pm_put(engine); 2194 goto err_client_b; 2195 } 2196 i915_request_add(rq_b); 2197 2198 GEM_BUG_ON(i915_request_completed(rq_a)); 2199 engine->schedule(rq_a, &attr); 2200 igt_spinner_end(&a.spin); 2201 2202 if (!igt_wait_for_spinner(&b.spin, rq_b)) { 2203 pr_err("Second client failed to start\n"); 2204 intel_engine_pm_put(engine); 2205 goto err_wedged; 2206 } 2207 2208 swap(a, b); 2209 rq_a = rq_b; 2210 } 2211 igt_spinner_end(&a.spin); 2212 2213 if (engine->execlists.preempt_hang.count) { 2214 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", 2215 engine->name, 2216 engine->execlists.preempt_hang.count, 2217 depth); 2218 intel_engine_pm_put(engine); 2219 err = -EINVAL; 2220 goto err_client_b; 2221 } 2222 2223 intel_engine_pm_put(engine); 2224 if (igt_flush_test(gt->i915)) 2225 goto err_wedged; 2226 } 2227 2228 err = 0; 2229 err_client_b: 2230 preempt_client_fini(&b); 2231 err_client_a: 2232 preempt_client_fini(&a); 2233 return err; 2234 2235 err_wedged: 2236 igt_spinner_end(&b.spin); 2237 igt_spinner_end(&a.spin); 2238 intel_gt_set_wedged(gt); 2239 err = -EIO; 2240 goto err_client_b; 2241 } 2242 2243 static int __i915_sw_fence_call 2244 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 2245 { 2246 return NOTIFY_DONE; 2247 } 2248 2249 static struct i915_request *dummy_request(struct intel_engine_cs *engine) 2250 { 2251 struct i915_request *rq; 2252 2253 rq = kzalloc(sizeof(*rq), GFP_KERNEL); 2254 if (!rq) 2255 return NULL; 2256 2257 rq->engine = engine; 2258 2259 mtx_init(&rq->lock, IPL_TTY); 2260 INIT_LIST_HEAD(&rq->fence.cb_list); 2261 rq->fence.lock = &rq->lock; 2262 rq->fence.ops = &i915_fence_ops; 2263 2264 i915_sched_node_init(&rq->sched); 2265 2266 /* mark this request as permanently incomplete */ 2267 rq->fence.seqno = 1; 2268 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */ 2269 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1; 2270 GEM_BUG_ON(i915_request_completed(rq)); 2271 2272 i915_sw_fence_init(&rq->submit, dummy_notify); 2273 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2274 2275 mtx_init(&rq->lock, IPL_TTY); 2276 rq->fence.lock = &rq->lock; 2277 INIT_LIST_HEAD(&rq->fence.cb_list); 2278 2279 return rq; 2280 } 2281 2282 static void dummy_request_free(struct i915_request *dummy) 2283 { 2284 /* We have to fake the CS interrupt to kick the next request */ 2285 i915_sw_fence_commit(&dummy->submit); 2286 2287 i915_request_mark_complete(dummy); 2288 dma_fence_signal(&dummy->fence); 2289 2290 i915_sched_node_fini(&dummy->sched); 2291 i915_sw_fence_fini(&dummy->submit); 2292 2293 dma_fence_free(&dummy->fence); 2294 } 2295 2296 static int live_suppress_wait_preempt(void *arg) 2297 { 2298 struct intel_gt *gt = arg; 2299 struct preempt_client client[4]; 2300 struct i915_request *rq[ARRAY_SIZE(client)] = {}; 2301 struct intel_engine_cs *engine; 2302 enum intel_engine_id id; 2303 int err = -ENOMEM; 2304 int i; 2305 2306 /* 2307 * Waiters are given a little priority nudge, but not enough 2308 * to actually cause any preemption. Double check that we do 2309 * not needlessly generate preempt-to-idle cycles. 2310 */ 2311 2312 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2313 return 0; 2314 2315 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ 2316 return -ENOMEM; 2317 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ 2318 goto err_client_0; 2319 if (preempt_client_init(gt, &client[2])) /* head of queue */ 2320 goto err_client_1; 2321 if (preempt_client_init(gt, &client[3])) /* bystander */ 2322 goto err_client_2; 2323 2324 for_each_engine(engine, gt, id) { 2325 int depth; 2326 2327 if (!intel_engine_has_preemption(engine)) 2328 continue; 2329 2330 if (!engine->emit_init_breadcrumb) 2331 continue; 2332 2333 for (depth = 0; depth < ARRAY_SIZE(client); depth++) { 2334 struct i915_request *dummy; 2335 2336 engine->execlists.preempt_hang.count = 0; 2337 2338 dummy = dummy_request(engine); 2339 if (!dummy) 2340 goto err_client_3; 2341 2342 for (i = 0; i < ARRAY_SIZE(client); i++) { 2343 struct i915_request *this; 2344 2345 this = spinner_create_request(&client[i].spin, 2346 client[i].ctx, engine, 2347 MI_NOOP); 2348 if (IS_ERR(this)) { 2349 err = PTR_ERR(this); 2350 goto err_wedged; 2351 } 2352 2353 /* Disable NEWCLIENT promotion */ 2354 __i915_active_fence_set(&i915_request_timeline(this)->last_request, 2355 &dummy->fence); 2356 2357 rq[i] = i915_request_get(this); 2358 i915_request_add(this); 2359 } 2360 2361 dummy_request_free(dummy); 2362 2363 GEM_BUG_ON(i915_request_completed(rq[0])); 2364 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) { 2365 pr_err("%s: First client failed to start\n", 2366 engine->name); 2367 goto err_wedged; 2368 } 2369 GEM_BUG_ON(!i915_request_started(rq[0])); 2370 2371 if (i915_request_wait(rq[depth], 2372 I915_WAIT_PRIORITY, 2373 1) != -ETIME) { 2374 pr_err("%s: Waiter depth:%d completed!\n", 2375 engine->name, depth); 2376 goto err_wedged; 2377 } 2378 2379 for (i = 0; i < ARRAY_SIZE(client); i++) { 2380 igt_spinner_end(&client[i].spin); 2381 i915_request_put(rq[i]); 2382 rq[i] = NULL; 2383 } 2384 2385 if (igt_flush_test(gt->i915)) 2386 goto err_wedged; 2387 2388 if (engine->execlists.preempt_hang.count) { 2389 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n", 2390 engine->name, 2391 engine->execlists.preempt_hang.count, 2392 depth); 2393 err = -EINVAL; 2394 goto err_client_3; 2395 } 2396 } 2397 } 2398 2399 err = 0; 2400 err_client_3: 2401 preempt_client_fini(&client[3]); 2402 err_client_2: 2403 preempt_client_fini(&client[2]); 2404 err_client_1: 2405 preempt_client_fini(&client[1]); 2406 err_client_0: 2407 preempt_client_fini(&client[0]); 2408 return err; 2409 2410 err_wedged: 2411 for (i = 0; i < ARRAY_SIZE(client); i++) { 2412 igt_spinner_end(&client[i].spin); 2413 i915_request_put(rq[i]); 2414 } 2415 intel_gt_set_wedged(gt); 2416 err = -EIO; 2417 goto err_client_3; 2418 } 2419 2420 static int live_chain_preempt(void *arg) 2421 { 2422 struct intel_gt *gt = arg; 2423 struct intel_engine_cs *engine; 2424 struct preempt_client hi, lo; 2425 enum intel_engine_id id; 2426 int err = -ENOMEM; 2427 2428 /* 2429 * Build a chain AB...BA between two contexts (A, B) and request 2430 * preemption of the last request. It should then complete before 2431 * the previously submitted spinner in B. 2432 */ 2433 2434 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2435 return 0; 2436 2437 if (preempt_client_init(gt, &hi)) 2438 return -ENOMEM; 2439 2440 if (preempt_client_init(gt, &lo)) 2441 goto err_client_hi; 2442 2443 for_each_engine(engine, gt, id) { 2444 struct i915_sched_attr attr = { 2445 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), 2446 }; 2447 struct igt_live_test t; 2448 struct i915_request *rq; 2449 int ring_size, count, i; 2450 2451 if (!intel_engine_has_preemption(engine)) 2452 continue; 2453 2454 rq = spinner_create_request(&lo.spin, 2455 lo.ctx, engine, 2456 MI_ARB_CHECK); 2457 if (IS_ERR(rq)) 2458 goto err_wedged; 2459 2460 i915_request_get(rq); 2461 i915_request_add(rq); 2462 2463 ring_size = rq->wa_tail - rq->head; 2464 if (ring_size < 0) 2465 ring_size += rq->ring->size; 2466 ring_size = rq->ring->size / ring_size; 2467 pr_debug("%s(%s): Using maximum of %d requests\n", 2468 __func__, engine->name, ring_size); 2469 2470 igt_spinner_end(&lo.spin); 2471 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 2472 pr_err("Timed out waiting to flush %s\n", engine->name); 2473 i915_request_put(rq); 2474 goto err_wedged; 2475 } 2476 i915_request_put(rq); 2477 2478 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { 2479 err = -EIO; 2480 goto err_wedged; 2481 } 2482 2483 for_each_prime_number_from(count, 1, ring_size) { 2484 rq = spinner_create_request(&hi.spin, 2485 hi.ctx, engine, 2486 MI_ARB_CHECK); 2487 if (IS_ERR(rq)) 2488 goto err_wedged; 2489 i915_request_add(rq); 2490 if (!igt_wait_for_spinner(&hi.spin, rq)) 2491 goto err_wedged; 2492 2493 rq = spinner_create_request(&lo.spin, 2494 lo.ctx, engine, 2495 MI_ARB_CHECK); 2496 if (IS_ERR(rq)) 2497 goto err_wedged; 2498 i915_request_add(rq); 2499 2500 for (i = 0; i < count; i++) { 2501 rq = igt_request_alloc(lo.ctx, engine); 2502 if (IS_ERR(rq)) 2503 goto err_wedged; 2504 i915_request_add(rq); 2505 } 2506 2507 rq = igt_request_alloc(hi.ctx, engine); 2508 if (IS_ERR(rq)) 2509 goto err_wedged; 2510 2511 i915_request_get(rq); 2512 i915_request_add(rq); 2513 engine->schedule(rq, &attr); 2514 2515 igt_spinner_end(&hi.spin); 2516 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2517 struct drm_printer p = 2518 drm_info_printer(gt->i915->drm.dev); 2519 2520 pr_err("Failed to preempt over chain of %d\n", 2521 count); 2522 intel_engine_dump(engine, &p, 2523 "%s\n", engine->name); 2524 i915_request_put(rq); 2525 goto err_wedged; 2526 } 2527 igt_spinner_end(&lo.spin); 2528 i915_request_put(rq); 2529 2530 rq = igt_request_alloc(lo.ctx, engine); 2531 if (IS_ERR(rq)) 2532 goto err_wedged; 2533 2534 i915_request_get(rq); 2535 i915_request_add(rq); 2536 2537 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 2538 struct drm_printer p = 2539 drm_info_printer(gt->i915->drm.dev); 2540 2541 pr_err("Failed to flush low priority chain of %d requests\n", 2542 count); 2543 intel_engine_dump(engine, &p, 2544 "%s\n", engine->name); 2545 2546 i915_request_put(rq); 2547 goto err_wedged; 2548 } 2549 i915_request_put(rq); 2550 } 2551 2552 if (igt_live_test_end(&t)) { 2553 err = -EIO; 2554 goto err_wedged; 2555 } 2556 } 2557 2558 err = 0; 2559 err_client_lo: 2560 preempt_client_fini(&lo); 2561 err_client_hi: 2562 preempt_client_fini(&hi); 2563 return err; 2564 2565 err_wedged: 2566 igt_spinner_end(&hi.spin); 2567 igt_spinner_end(&lo.spin); 2568 intel_gt_set_wedged(gt); 2569 err = -EIO; 2570 goto err_client_lo; 2571 } 2572 2573 static int create_gang(struct intel_engine_cs *engine, 2574 struct i915_request **prev) 2575 { 2576 struct drm_i915_gem_object *obj; 2577 struct intel_context *ce; 2578 struct i915_request *rq; 2579 struct i915_vma *vma; 2580 u32 *cs; 2581 int err; 2582 2583 ce = intel_context_create(engine); 2584 if (IS_ERR(ce)) 2585 return PTR_ERR(ce); 2586 2587 obj = i915_gem_object_create_internal(engine->i915, 4096); 2588 if (IS_ERR(obj)) { 2589 err = PTR_ERR(obj); 2590 goto err_ce; 2591 } 2592 2593 vma = i915_vma_instance(obj, ce->vm, NULL); 2594 if (IS_ERR(vma)) { 2595 err = PTR_ERR(vma); 2596 goto err_obj; 2597 } 2598 2599 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2600 if (err) 2601 goto err_obj; 2602 2603 cs = i915_gem_object_pin_map(obj, I915_MAP_WC); 2604 if (IS_ERR(cs)) 2605 goto err_obj; 2606 2607 /* Semaphore target: spin until zero */ 2608 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 2609 2610 *cs++ = MI_SEMAPHORE_WAIT | 2611 MI_SEMAPHORE_POLL | 2612 MI_SEMAPHORE_SAD_EQ_SDD; 2613 *cs++ = 0; 2614 *cs++ = lower_32_bits(vma->node.start); 2615 *cs++ = upper_32_bits(vma->node.start); 2616 2617 if (*prev) { 2618 u64 offset = (*prev)->batch->node.start; 2619 2620 /* Terminate the spinner in the next lower priority batch. */ 2621 *cs++ = MI_STORE_DWORD_IMM_GEN4; 2622 *cs++ = lower_32_bits(offset); 2623 *cs++ = upper_32_bits(offset); 2624 *cs++ = 0; 2625 } 2626 2627 *cs++ = MI_BATCH_BUFFER_END; 2628 i915_gem_object_flush_map(obj); 2629 i915_gem_object_unpin_map(obj); 2630 2631 rq = intel_context_create_request(ce); 2632 if (IS_ERR(rq)) 2633 goto err_obj; 2634 2635 rq->batch = vma; 2636 i915_request_get(rq); 2637 2638 i915_vma_lock(vma); 2639 err = i915_request_await_object(rq, vma->obj, false); 2640 if (!err) 2641 err = i915_vma_move_to_active(vma, rq, 0); 2642 if (!err) 2643 err = rq->engine->emit_bb_start(rq, 2644 vma->node.start, 2645 PAGE_SIZE, 0); 2646 i915_vma_unlock(vma); 2647 i915_request_add(rq); 2648 if (err) 2649 goto err_rq; 2650 2651 i915_gem_object_put(obj); 2652 intel_context_put(ce); 2653 2654 rq->client_link.next = &(*prev)->client_link; 2655 *prev = rq; 2656 return 0; 2657 2658 err_rq: 2659 i915_request_put(rq); 2660 err_obj: 2661 i915_gem_object_put(obj); 2662 err_ce: 2663 intel_context_put(ce); 2664 return err; 2665 } 2666 2667 static int live_preempt_gang(void *arg) 2668 { 2669 struct intel_gt *gt = arg; 2670 struct intel_engine_cs *engine; 2671 enum intel_engine_id id; 2672 2673 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2674 return 0; 2675 2676 /* 2677 * Build as long a chain of preempters as we can, with each 2678 * request higher priority than the last. Once we are ready, we release 2679 * the last batch which then precolates down the chain, each releasing 2680 * the next oldest in turn. The intent is to simply push as hard as we 2681 * can with the number of preemptions, trying to exceed narrow HW 2682 * limits. At a minimum, we insist that we can sort all the user 2683 * high priority levels into execution order. 2684 */ 2685 2686 for_each_engine(engine, gt, id) { 2687 struct i915_request *rq = NULL; 2688 struct igt_live_test t; 2689 IGT_TIMEOUT(end_time); 2690 int prio = 0; 2691 int err = 0; 2692 u32 *cs; 2693 2694 if (!intel_engine_has_preemption(engine)) 2695 continue; 2696 2697 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) 2698 return -EIO; 2699 2700 do { 2701 struct i915_sched_attr attr = { 2702 .priority = I915_USER_PRIORITY(prio++), 2703 }; 2704 2705 err = create_gang(engine, &rq); 2706 if (err) 2707 break; 2708 2709 /* Submit each spinner at increasing priority */ 2710 engine->schedule(rq, &attr); 2711 2712 if (prio <= I915_PRIORITY_MAX) 2713 continue; 2714 2715 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT)) 2716 break; 2717 2718 if (__igt_timeout(end_time, NULL)) 2719 break; 2720 } while (1); 2721 pr_debug("%s: Preempt chain of %d requests\n", 2722 engine->name, prio); 2723 2724 /* 2725 * Such that the last spinner is the highest priority and 2726 * should execute first. When that spinner completes, 2727 * it will terminate the next lowest spinner until there 2728 * are no more spinners and the gang is complete. 2729 */ 2730 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); 2731 if (!IS_ERR(cs)) { 2732 *cs = 0; 2733 i915_gem_object_unpin_map(rq->batch->obj); 2734 } else { 2735 err = PTR_ERR(cs); 2736 intel_gt_set_wedged(gt); 2737 } 2738 2739 while (rq) { /* wait for each rq from highest to lowest prio */ 2740 struct i915_request *n = 2741 list_next_entry(rq, client_link); 2742 2743 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { 2744 struct drm_printer p = 2745 drm_info_printer(engine->i915->drm.dev); 2746 2747 pr_err("Failed to flush chain of %d requests, at %d\n", 2748 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); 2749 intel_engine_dump(engine, &p, 2750 "%s\n", engine->name); 2751 2752 err = -ETIME; 2753 } 2754 2755 i915_request_put(rq); 2756 rq = n; 2757 } 2758 2759 if (igt_live_test_end(&t)) 2760 err = -EIO; 2761 if (err) 2762 return err; 2763 } 2764 2765 return 0; 2766 } 2767 2768 static int live_preempt_timeout(void *arg) 2769 { 2770 struct intel_gt *gt = arg; 2771 struct i915_gem_context *ctx_hi, *ctx_lo; 2772 struct igt_spinner spin_lo; 2773 struct intel_engine_cs *engine; 2774 enum intel_engine_id id; 2775 int err = -ENOMEM; 2776 2777 /* 2778 * Check that we force preemption to occur by cancelling the previous 2779 * context if it refuses to yield the GPU. 2780 */ 2781 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) 2782 return 0; 2783 2784 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) 2785 return 0; 2786 2787 if (!intel_has_reset_engine(gt)) 2788 return 0; 2789 2790 if (igt_spinner_init(&spin_lo, gt)) 2791 return -ENOMEM; 2792 2793 ctx_hi = kernel_context(gt->i915); 2794 if (!ctx_hi) 2795 goto err_spin_lo; 2796 ctx_hi->sched.priority = 2797 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); 2798 2799 ctx_lo = kernel_context(gt->i915); 2800 if (!ctx_lo) 2801 goto err_ctx_hi; 2802 ctx_lo->sched.priority = 2803 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); 2804 2805 for_each_engine(engine, gt, id) { 2806 unsigned long saved_timeout; 2807 struct i915_request *rq; 2808 2809 if (!intel_engine_has_preemption(engine)) 2810 continue; 2811 2812 rq = spinner_create_request(&spin_lo, ctx_lo, engine, 2813 MI_NOOP); /* preemption disabled */ 2814 if (IS_ERR(rq)) { 2815 err = PTR_ERR(rq); 2816 goto err_ctx_lo; 2817 } 2818 2819 i915_request_add(rq); 2820 if (!igt_wait_for_spinner(&spin_lo, rq)) { 2821 intel_gt_set_wedged(gt); 2822 err = -EIO; 2823 goto err_ctx_lo; 2824 } 2825 2826 rq = igt_request_alloc(ctx_hi, engine); 2827 if (IS_ERR(rq)) { 2828 igt_spinner_end(&spin_lo); 2829 err = PTR_ERR(rq); 2830 goto err_ctx_lo; 2831 } 2832 2833 /* Flush the previous CS ack before changing timeouts */ 2834 while (READ_ONCE(engine->execlists.pending[0])) 2835 cpu_relax(); 2836 2837 saved_timeout = engine->props.preempt_timeout_ms; 2838 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ 2839 2840 i915_request_get(rq); 2841 i915_request_add(rq); 2842 2843 intel_engine_flush_submission(engine); 2844 engine->props.preempt_timeout_ms = saved_timeout; 2845 2846 if (i915_request_wait(rq, 0, HZ / 10) < 0) { 2847 intel_gt_set_wedged(gt); 2848 i915_request_put(rq); 2849 err = -ETIME; 2850 goto err_ctx_lo; 2851 } 2852 2853 igt_spinner_end(&spin_lo); 2854 i915_request_put(rq); 2855 } 2856 2857 err = 0; 2858 err_ctx_lo: 2859 kernel_context_close(ctx_lo); 2860 err_ctx_hi: 2861 kernel_context_close(ctx_hi); 2862 err_spin_lo: 2863 igt_spinner_fini(&spin_lo); 2864 return err; 2865 } 2866 2867 static int random_range(struct rnd_state *rnd, int min, int max) 2868 { 2869 return i915_prandom_u32_max_state(max - min, rnd) + min; 2870 } 2871 2872 static int random_priority(struct rnd_state *rnd) 2873 { 2874 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); 2875 } 2876 2877 struct preempt_smoke { 2878 struct intel_gt *gt; 2879 struct i915_gem_context **contexts; 2880 struct intel_engine_cs *engine; 2881 struct drm_i915_gem_object *batch; 2882 unsigned int ncontext; 2883 struct rnd_state prng; 2884 unsigned long count; 2885 }; 2886 2887 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) 2888 { 2889 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, 2890 &smoke->prng)]; 2891 } 2892 2893 static int smoke_submit(struct preempt_smoke *smoke, 2894 struct i915_gem_context *ctx, int prio, 2895 struct drm_i915_gem_object *batch) 2896 { 2897 struct i915_request *rq; 2898 struct i915_vma *vma = NULL; 2899 int err = 0; 2900 2901 if (batch) { 2902 struct i915_address_space *vm; 2903 2904 vm = i915_gem_context_get_vm_rcu(ctx); 2905 vma = i915_vma_instance(batch, vm, NULL); 2906 i915_vm_put(vm); 2907 if (IS_ERR(vma)) 2908 return PTR_ERR(vma); 2909 2910 err = i915_vma_pin(vma, 0, 0, PIN_USER); 2911 if (err) 2912 return err; 2913 } 2914 2915 ctx->sched.priority = prio; 2916 2917 rq = igt_request_alloc(ctx, smoke->engine); 2918 if (IS_ERR(rq)) { 2919 err = PTR_ERR(rq); 2920 goto unpin; 2921 } 2922 2923 if (vma) { 2924 i915_vma_lock(vma); 2925 err = i915_request_await_object(rq, vma->obj, false); 2926 if (!err) 2927 err = i915_vma_move_to_active(vma, rq, 0); 2928 if (!err) 2929 err = rq->engine->emit_bb_start(rq, 2930 vma->node.start, 2931 PAGE_SIZE, 0); 2932 i915_vma_unlock(vma); 2933 } 2934 2935 i915_request_add(rq); 2936 2937 unpin: 2938 if (vma) 2939 i915_vma_unpin(vma); 2940 2941 return err; 2942 } 2943 2944 static int smoke_crescendo_thread(void *arg) 2945 { 2946 struct preempt_smoke *smoke = arg; 2947 IGT_TIMEOUT(end_time); 2948 unsigned long count; 2949 2950 count = 0; 2951 do { 2952 struct i915_gem_context *ctx = smoke_context(smoke); 2953 int err; 2954 2955 err = smoke_submit(smoke, 2956 ctx, count % I915_PRIORITY_MAX, 2957 smoke->batch); 2958 if (err) 2959 return err; 2960 2961 count++; 2962 } while (!__igt_timeout(end_time, NULL)); 2963 2964 smoke->count = count; 2965 return 0; 2966 } 2967 2968 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) 2969 #define BATCH BIT(0) 2970 { 2971 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 2972 struct preempt_smoke arg[I915_NUM_ENGINES]; 2973 struct intel_engine_cs *engine; 2974 enum intel_engine_id id; 2975 unsigned long count; 2976 int err = 0; 2977 2978 for_each_engine(engine, smoke->gt, id) { 2979 arg[id] = *smoke; 2980 arg[id].engine = engine; 2981 if (!(flags & BATCH)) 2982 arg[id].batch = NULL; 2983 arg[id].count = 0; 2984 2985 tsk[id] = kthread_run(smoke_crescendo_thread, &arg, 2986 "igt/smoke:%d", id); 2987 if (IS_ERR(tsk[id])) { 2988 err = PTR_ERR(tsk[id]); 2989 break; 2990 } 2991 get_task_struct(tsk[id]); 2992 } 2993 2994 yield(); /* start all threads before we kthread_stop() */ 2995 2996 count = 0; 2997 for_each_engine(engine, smoke->gt, id) { 2998 int status; 2999 3000 if (IS_ERR_OR_NULL(tsk[id])) 3001 continue; 3002 3003 status = kthread_stop(tsk[id]); 3004 if (status && !err) 3005 err = status; 3006 3007 count += arg[id].count; 3008 3009 put_task_struct(tsk[id]); 3010 } 3011 3012 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", 3013 count, flags, 3014 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3015 return 0; 3016 } 3017 3018 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) 3019 { 3020 enum intel_engine_id id; 3021 IGT_TIMEOUT(end_time); 3022 unsigned long count; 3023 3024 count = 0; 3025 do { 3026 for_each_engine(smoke->engine, smoke->gt, id) { 3027 struct i915_gem_context *ctx = smoke_context(smoke); 3028 int err; 3029 3030 err = smoke_submit(smoke, 3031 ctx, random_priority(&smoke->prng), 3032 flags & BATCH ? smoke->batch : NULL); 3033 if (err) 3034 return err; 3035 3036 count++; 3037 } 3038 } while (!__igt_timeout(end_time, NULL)); 3039 3040 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", 3041 count, flags, 3042 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); 3043 return 0; 3044 } 3045 3046 static int live_preempt_smoke(void *arg) 3047 { 3048 struct preempt_smoke smoke = { 3049 .gt = arg, 3050 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), 3051 .ncontext = 1024, 3052 }; 3053 const unsigned int phase[] = { 0, BATCH }; 3054 struct igt_live_test t; 3055 int err = -ENOMEM; 3056 u32 *cs; 3057 int n; 3058 3059 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) 3060 return 0; 3061 3062 smoke.contexts = kmalloc_array(smoke.ncontext, 3063 sizeof(*smoke.contexts), 3064 GFP_KERNEL); 3065 if (!smoke.contexts) 3066 return -ENOMEM; 3067 3068 smoke.batch = 3069 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); 3070 if (IS_ERR(smoke.batch)) { 3071 err = PTR_ERR(smoke.batch); 3072 goto err_free; 3073 } 3074 3075 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); 3076 if (IS_ERR(cs)) { 3077 err = PTR_ERR(cs); 3078 goto err_batch; 3079 } 3080 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) 3081 cs[n] = MI_ARB_CHECK; 3082 cs[n] = MI_BATCH_BUFFER_END; 3083 i915_gem_object_flush_map(smoke.batch); 3084 i915_gem_object_unpin_map(smoke.batch); 3085 3086 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { 3087 err = -EIO; 3088 goto err_batch; 3089 } 3090 3091 for (n = 0; n < smoke.ncontext; n++) { 3092 smoke.contexts[n] = kernel_context(smoke.gt->i915); 3093 if (!smoke.contexts[n]) 3094 goto err_ctx; 3095 } 3096 3097 for (n = 0; n < ARRAY_SIZE(phase); n++) { 3098 err = smoke_crescendo(&smoke, phase[n]); 3099 if (err) 3100 goto err_ctx; 3101 3102 err = smoke_random(&smoke, phase[n]); 3103 if (err) 3104 goto err_ctx; 3105 } 3106 3107 err_ctx: 3108 if (igt_live_test_end(&t)) 3109 err = -EIO; 3110 3111 for (n = 0; n < smoke.ncontext; n++) { 3112 if (!smoke.contexts[n]) 3113 break; 3114 kernel_context_close(smoke.contexts[n]); 3115 } 3116 3117 err_batch: 3118 i915_gem_object_put(smoke.batch); 3119 err_free: 3120 kfree(smoke.contexts); 3121 3122 return err; 3123 } 3124 3125 static int nop_virtual_engine(struct intel_gt *gt, 3126 struct intel_engine_cs **siblings, 3127 unsigned int nsibling, 3128 unsigned int nctx, 3129 unsigned int flags) 3130 #define CHAIN BIT(0) 3131 { 3132 IGT_TIMEOUT(end_time); 3133 struct i915_request *request[16] = {}; 3134 struct intel_context *ve[16]; 3135 unsigned long n, prime, nc; 3136 struct igt_live_test t; 3137 ktime_t times[2] = {}; 3138 int err; 3139 3140 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); 3141 3142 for (n = 0; n < nctx; n++) { 3143 ve[n] = intel_execlists_create_virtual(siblings, nsibling); 3144 if (IS_ERR(ve[n])) { 3145 err = PTR_ERR(ve[n]); 3146 nctx = n; 3147 goto out; 3148 } 3149 3150 err = intel_context_pin(ve[n]); 3151 if (err) { 3152 intel_context_put(ve[n]); 3153 nctx = n; 3154 goto out; 3155 } 3156 } 3157 3158 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); 3159 if (err) 3160 goto out; 3161 3162 for_each_prime_number_from(prime, 1, 8192) { 3163 times[1] = ktime_get_raw(); 3164 3165 if (flags & CHAIN) { 3166 for (nc = 0; nc < nctx; nc++) { 3167 for (n = 0; n < prime; n++) { 3168 struct i915_request *rq; 3169 3170 rq = i915_request_create(ve[nc]); 3171 if (IS_ERR(rq)) { 3172 err = PTR_ERR(rq); 3173 goto out; 3174 } 3175 3176 if (request[nc]) 3177 i915_request_put(request[nc]); 3178 request[nc] = i915_request_get(rq); 3179 i915_request_add(rq); 3180 } 3181 } 3182 } else { 3183 for (n = 0; n < prime; n++) { 3184 for (nc = 0; nc < nctx; nc++) { 3185 struct i915_request *rq; 3186 3187 rq = i915_request_create(ve[nc]); 3188 if (IS_ERR(rq)) { 3189 err = PTR_ERR(rq); 3190 goto out; 3191 } 3192 3193 if (request[nc]) 3194 i915_request_put(request[nc]); 3195 request[nc] = i915_request_get(rq); 3196 i915_request_add(rq); 3197 } 3198 } 3199 } 3200 3201 for (nc = 0; nc < nctx; nc++) { 3202 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { 3203 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3204 __func__, ve[0]->engine->name, 3205 request[nc]->fence.context, 3206 request[nc]->fence.seqno); 3207 3208 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3209 __func__, ve[0]->engine->name, 3210 request[nc]->fence.context, 3211 request[nc]->fence.seqno); 3212 GEM_TRACE_DUMP(); 3213 intel_gt_set_wedged(gt); 3214 break; 3215 } 3216 } 3217 3218 times[1] = ktime_sub(ktime_get_raw(), times[1]); 3219 if (prime == 1) 3220 times[0] = times[1]; 3221 3222 for (nc = 0; nc < nctx; nc++) { 3223 i915_request_put(request[nc]); 3224 request[nc] = NULL; 3225 } 3226 3227 if (__igt_timeout(end_time, NULL)) 3228 break; 3229 } 3230 3231 err = igt_live_test_end(&t); 3232 if (err) 3233 goto out; 3234 3235 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", 3236 nctx, ve[0]->engine->name, ktime_to_ns(times[0]), 3237 prime, div64_u64(ktime_to_ns(times[1]), prime)); 3238 3239 out: 3240 if (igt_flush_test(gt->i915)) 3241 err = -EIO; 3242 3243 for (nc = 0; nc < nctx; nc++) { 3244 i915_request_put(request[nc]); 3245 intel_context_unpin(ve[nc]); 3246 intel_context_put(ve[nc]); 3247 } 3248 return err; 3249 } 3250 3251 static int live_virtual_engine(void *arg) 3252 { 3253 struct intel_gt *gt = arg; 3254 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3255 struct intel_engine_cs *engine; 3256 enum intel_engine_id id; 3257 unsigned int class, inst; 3258 int err; 3259 3260 if (intel_uc_uses_guc_submission(>->uc)) 3261 return 0; 3262 3263 for_each_engine(engine, gt, id) { 3264 err = nop_virtual_engine(gt, &engine, 1, 1, 0); 3265 if (err) { 3266 pr_err("Failed to wrap engine %s: err=%d\n", 3267 engine->name, err); 3268 return err; 3269 } 3270 } 3271 3272 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3273 int nsibling, n; 3274 3275 nsibling = 0; 3276 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3277 if (!gt->engine_class[class][inst]) 3278 continue; 3279 3280 siblings[nsibling++] = gt->engine_class[class][inst]; 3281 } 3282 if (nsibling < 2) 3283 continue; 3284 3285 for (n = 1; n <= nsibling + 1; n++) { 3286 err = nop_virtual_engine(gt, siblings, nsibling, 3287 n, 0); 3288 if (err) 3289 return err; 3290 } 3291 3292 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); 3293 if (err) 3294 return err; 3295 } 3296 3297 return 0; 3298 } 3299 3300 static int mask_virtual_engine(struct intel_gt *gt, 3301 struct intel_engine_cs **siblings, 3302 unsigned int nsibling) 3303 { 3304 struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; 3305 struct intel_context *ve; 3306 struct igt_live_test t; 3307 unsigned int n; 3308 int err; 3309 3310 /* 3311 * Check that by setting the execution mask on a request, we can 3312 * restrict it to our desired engine within the virtual engine. 3313 */ 3314 3315 ve = intel_execlists_create_virtual(siblings, nsibling); 3316 if (IS_ERR(ve)) { 3317 err = PTR_ERR(ve); 3318 goto out_close; 3319 } 3320 3321 err = intel_context_pin(ve); 3322 if (err) 3323 goto out_put; 3324 3325 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3326 if (err) 3327 goto out_unpin; 3328 3329 for (n = 0; n < nsibling; n++) { 3330 request[n] = i915_request_create(ve); 3331 if (IS_ERR(request[n])) { 3332 err = PTR_ERR(request[n]); 3333 nsibling = n; 3334 goto out; 3335 } 3336 3337 /* Reverse order as it's more likely to be unnatural */ 3338 request[n]->execution_mask = siblings[nsibling - n - 1]->mask; 3339 3340 i915_request_get(request[n]); 3341 i915_request_add(request[n]); 3342 } 3343 3344 for (n = 0; n < nsibling; n++) { 3345 if (i915_request_wait(request[n], 0, HZ / 10) < 0) { 3346 pr_err("%s(%s): wait for %llx:%lld timed out\n", 3347 __func__, ve->engine->name, 3348 request[n]->fence.context, 3349 request[n]->fence.seqno); 3350 3351 GEM_TRACE("%s(%s) failed at request %llx:%lld\n", 3352 __func__, ve->engine->name, 3353 request[n]->fence.context, 3354 request[n]->fence.seqno); 3355 GEM_TRACE_DUMP(); 3356 intel_gt_set_wedged(gt); 3357 err = -EIO; 3358 goto out; 3359 } 3360 3361 if (request[n]->engine != siblings[nsibling - n - 1]) { 3362 pr_err("Executed on wrong sibling '%s', expected '%s'\n", 3363 request[n]->engine->name, 3364 siblings[nsibling - n - 1]->name); 3365 err = -EINVAL; 3366 goto out; 3367 } 3368 } 3369 3370 err = igt_live_test_end(&t); 3371 out: 3372 if (igt_flush_test(gt->i915)) 3373 err = -EIO; 3374 3375 for (n = 0; n < nsibling; n++) 3376 i915_request_put(request[n]); 3377 3378 out_unpin: 3379 intel_context_unpin(ve); 3380 out_put: 3381 intel_context_put(ve); 3382 out_close: 3383 return err; 3384 } 3385 3386 static int live_virtual_mask(void *arg) 3387 { 3388 struct intel_gt *gt = arg; 3389 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3390 unsigned int class, inst; 3391 int err; 3392 3393 if (intel_uc_uses_guc_submission(>->uc)) 3394 return 0; 3395 3396 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3397 unsigned int nsibling; 3398 3399 nsibling = 0; 3400 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3401 if (!gt->engine_class[class][inst]) 3402 break; 3403 3404 siblings[nsibling++] = gt->engine_class[class][inst]; 3405 } 3406 if (nsibling < 2) 3407 continue; 3408 3409 err = mask_virtual_engine(gt, siblings, nsibling); 3410 if (err) 3411 return err; 3412 } 3413 3414 return 0; 3415 } 3416 3417 static int preserved_virtual_engine(struct intel_gt *gt, 3418 struct intel_engine_cs **siblings, 3419 unsigned int nsibling) 3420 { 3421 struct i915_request *last = NULL; 3422 struct intel_context *ve; 3423 struct i915_vma *scratch; 3424 struct igt_live_test t; 3425 unsigned int n; 3426 int err = 0; 3427 u32 *cs; 3428 3429 scratch = create_scratch(siblings[0]->gt); 3430 if (IS_ERR(scratch)) 3431 return PTR_ERR(scratch); 3432 3433 err = i915_vma_sync(scratch); 3434 if (err) 3435 goto out_scratch; 3436 3437 ve = intel_execlists_create_virtual(siblings, nsibling); 3438 if (IS_ERR(ve)) { 3439 err = PTR_ERR(ve); 3440 goto out_scratch; 3441 } 3442 3443 err = intel_context_pin(ve); 3444 if (err) 3445 goto out_put; 3446 3447 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); 3448 if (err) 3449 goto out_unpin; 3450 3451 for (n = 0; n < NUM_GPR_DW; n++) { 3452 struct intel_engine_cs *engine = siblings[n % nsibling]; 3453 struct i915_request *rq; 3454 3455 rq = i915_request_create(ve); 3456 if (IS_ERR(rq)) { 3457 err = PTR_ERR(rq); 3458 goto out_end; 3459 } 3460 3461 i915_request_put(last); 3462 last = i915_request_get(rq); 3463 3464 cs = intel_ring_begin(rq, 8); 3465 if (IS_ERR(cs)) { 3466 i915_request_add(rq); 3467 err = PTR_ERR(cs); 3468 goto out_end; 3469 } 3470 3471 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 3472 *cs++ = CS_GPR(engine, n); 3473 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 3474 *cs++ = 0; 3475 3476 *cs++ = MI_LOAD_REGISTER_IMM(1); 3477 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); 3478 *cs++ = n + 1; 3479 3480 *cs++ = MI_NOOP; 3481 intel_ring_advance(rq, cs); 3482 3483 /* Restrict this request to run on a particular engine */ 3484 rq->execution_mask = engine->mask; 3485 i915_request_add(rq); 3486 } 3487 3488 if (i915_request_wait(last, 0, HZ / 5) < 0) { 3489 err = -ETIME; 3490 goto out_end; 3491 } 3492 3493 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 3494 if (IS_ERR(cs)) { 3495 err = PTR_ERR(cs); 3496 goto out_end; 3497 } 3498 3499 for (n = 0; n < NUM_GPR_DW; n++) { 3500 if (cs[n] != n) { 3501 pr_err("Incorrect value[%d] found for GPR[%d]\n", 3502 cs[n], n); 3503 err = -EINVAL; 3504 break; 3505 } 3506 } 3507 3508 i915_gem_object_unpin_map(scratch->obj); 3509 3510 out_end: 3511 if (igt_live_test_end(&t)) 3512 err = -EIO; 3513 i915_request_put(last); 3514 out_unpin: 3515 intel_context_unpin(ve); 3516 out_put: 3517 intel_context_put(ve); 3518 out_scratch: 3519 i915_vma_unpin_and_release(&scratch, 0); 3520 return err; 3521 } 3522 3523 static int live_virtual_preserved(void *arg) 3524 { 3525 struct intel_gt *gt = arg; 3526 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3527 unsigned int class, inst; 3528 3529 /* 3530 * Check that the context image retains non-privileged (user) registers 3531 * from one engine to the next. For this we check that the CS_GPR 3532 * are preserved. 3533 */ 3534 3535 if (intel_uc_uses_guc_submission(>->uc)) 3536 return 0; 3537 3538 /* As we use CS_GPR we cannot run before they existed on all engines. */ 3539 if (INTEL_GEN(gt->i915) < 9) 3540 return 0; 3541 3542 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3543 int nsibling, err; 3544 3545 nsibling = 0; 3546 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3547 if (!gt->engine_class[class][inst]) 3548 continue; 3549 3550 siblings[nsibling++] = gt->engine_class[class][inst]; 3551 } 3552 if (nsibling < 2) 3553 continue; 3554 3555 err = preserved_virtual_engine(gt, siblings, nsibling); 3556 if (err) 3557 return err; 3558 } 3559 3560 return 0; 3561 } 3562 3563 static int bond_virtual_engine(struct intel_gt *gt, 3564 unsigned int class, 3565 struct intel_engine_cs **siblings, 3566 unsigned int nsibling, 3567 unsigned int flags) 3568 #define BOND_SCHEDULE BIT(0) 3569 { 3570 struct intel_engine_cs *master; 3571 struct i915_request *rq[16]; 3572 enum intel_engine_id id; 3573 struct igt_spinner spin; 3574 unsigned long n; 3575 int err; 3576 3577 /* 3578 * A set of bonded requests is intended to be run concurrently 3579 * across a number of engines. We use one request per-engine 3580 * and a magic fence to schedule each of the bonded requests 3581 * at the same time. A consequence of our current scheduler is that 3582 * we only move requests to the HW ready queue when the request 3583 * becomes ready, that is when all of its prerequisite fences have 3584 * been signaled. As one of those fences is the master submit fence, 3585 * there is a delay on all secondary fences as the HW may be 3586 * currently busy. Equally, as all the requests are independent, 3587 * they may have other fences that delay individual request 3588 * submission to HW. Ergo, we do not guarantee that all requests are 3589 * immediately submitted to HW at the same time, just that if the 3590 * rules are abided by, they are ready at the same time as the 3591 * first is submitted. Userspace can embed semaphores in its batch 3592 * to ensure parallel execution of its phases as it requires. 3593 * Though naturally it gets requested that perhaps the scheduler should 3594 * take care of parallel execution, even across preemption events on 3595 * different HW. (The proper answer is of course "lalalala".) 3596 * 3597 * With the submit-fence, we have identified three possible phases 3598 * of synchronisation depending on the master fence: queued (not 3599 * ready), executing, and signaled. The first two are quite simple 3600 * and checked below. However, the signaled master fence handling is 3601 * contentious. Currently we do not distinguish between a signaled 3602 * fence and an expired fence, as once signaled it does not convey 3603 * any information about the previous execution. It may even be freed 3604 * and hence checking later it may not exist at all. Ergo we currently 3605 * do not apply the bonding constraint for an already signaled fence, 3606 * as our expectation is that it should not constrain the secondaries 3607 * and is outside of the scope of the bonded request API (i.e. all 3608 * userspace requests are meant to be running in parallel). As 3609 * it imposes no constraint, and is effectively a no-op, we do not 3610 * check below as normal execution flows are checked extensively above. 3611 * 3612 * XXX Is the degenerate handling of signaled submit fences the 3613 * expected behaviour for userpace? 3614 */ 3615 3616 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); 3617 3618 if (igt_spinner_init(&spin, gt)) 3619 return -ENOMEM; 3620 3621 err = 0; 3622 rq[0] = ERR_PTR(-ENOMEM); 3623 for_each_engine(master, gt, id) { 3624 struct i915_sw_fence fence = {}; 3625 struct intel_context *ce; 3626 3627 if (master->class == class) 3628 continue; 3629 3630 ce = intel_context_create(master); 3631 if (IS_ERR(ce)) { 3632 err = PTR_ERR(ce); 3633 goto out; 3634 } 3635 3636 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); 3637 3638 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); 3639 intel_context_put(ce); 3640 if (IS_ERR(rq[0])) { 3641 err = PTR_ERR(rq[0]); 3642 goto out; 3643 } 3644 i915_request_get(rq[0]); 3645 3646 if (flags & BOND_SCHEDULE) { 3647 onstack_fence_init(&fence); 3648 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, 3649 &fence, 3650 GFP_KERNEL); 3651 } 3652 3653 i915_request_add(rq[0]); 3654 if (err < 0) 3655 goto out; 3656 3657 if (!(flags & BOND_SCHEDULE) && 3658 !igt_wait_for_spinner(&spin, rq[0])) { 3659 err = -EIO; 3660 goto out; 3661 } 3662 3663 for (n = 0; n < nsibling; n++) { 3664 struct intel_context *ve; 3665 3666 ve = intel_execlists_create_virtual(siblings, nsibling); 3667 if (IS_ERR(ve)) { 3668 err = PTR_ERR(ve); 3669 onstack_fence_fini(&fence); 3670 goto out; 3671 } 3672 3673 err = intel_virtual_engine_attach_bond(ve->engine, 3674 master, 3675 siblings[n]); 3676 if (err) { 3677 intel_context_put(ve); 3678 onstack_fence_fini(&fence); 3679 goto out; 3680 } 3681 3682 err = intel_context_pin(ve); 3683 intel_context_put(ve); 3684 if (err) { 3685 onstack_fence_fini(&fence); 3686 goto out; 3687 } 3688 3689 rq[n + 1] = i915_request_create(ve); 3690 intel_context_unpin(ve); 3691 if (IS_ERR(rq[n + 1])) { 3692 err = PTR_ERR(rq[n + 1]); 3693 onstack_fence_fini(&fence); 3694 goto out; 3695 } 3696 i915_request_get(rq[n + 1]); 3697 3698 err = i915_request_await_execution(rq[n + 1], 3699 &rq[0]->fence, 3700 ve->engine->bond_execute); 3701 i915_request_add(rq[n + 1]); 3702 if (err < 0) { 3703 onstack_fence_fini(&fence); 3704 goto out; 3705 } 3706 } 3707 onstack_fence_fini(&fence); 3708 intel_engine_flush_submission(master); 3709 igt_spinner_end(&spin); 3710 3711 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { 3712 pr_err("Master request did not execute (on %s)!\n", 3713 rq[0]->engine->name); 3714 err = -EIO; 3715 goto out; 3716 } 3717 3718 for (n = 0; n < nsibling; n++) { 3719 if (i915_request_wait(rq[n + 1], 0, 3720 MAX_SCHEDULE_TIMEOUT) < 0) { 3721 err = -EIO; 3722 goto out; 3723 } 3724 3725 if (rq[n + 1]->engine != siblings[n]) { 3726 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", 3727 siblings[n]->name, 3728 rq[n + 1]->engine->name, 3729 rq[0]->engine->name); 3730 err = -EINVAL; 3731 goto out; 3732 } 3733 } 3734 3735 for (n = 0; !IS_ERR(rq[n]); n++) 3736 i915_request_put(rq[n]); 3737 rq[0] = ERR_PTR(-ENOMEM); 3738 } 3739 3740 out: 3741 for (n = 0; !IS_ERR(rq[n]); n++) 3742 i915_request_put(rq[n]); 3743 if (igt_flush_test(gt->i915)) 3744 err = -EIO; 3745 3746 igt_spinner_fini(&spin); 3747 return err; 3748 } 3749 3750 static int live_virtual_bond(void *arg) 3751 { 3752 static const struct phase { 3753 const char *name; 3754 unsigned int flags; 3755 } phases[] = { 3756 { "", 0 }, 3757 { "schedule", BOND_SCHEDULE }, 3758 { }, 3759 }; 3760 struct intel_gt *gt = arg; 3761 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3762 unsigned int class, inst; 3763 int err; 3764 3765 if (intel_uc_uses_guc_submission(>->uc)) 3766 return 0; 3767 3768 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3769 const struct phase *p; 3770 int nsibling; 3771 3772 nsibling = 0; 3773 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3774 if (!gt->engine_class[class][inst]) 3775 break; 3776 3777 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); 3778 siblings[nsibling++] = gt->engine_class[class][inst]; 3779 } 3780 if (nsibling < 2) 3781 continue; 3782 3783 for (p = phases; p->name; p++) { 3784 err = bond_virtual_engine(gt, 3785 class, siblings, nsibling, 3786 p->flags); 3787 if (err) { 3788 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", 3789 __func__, p->name, class, nsibling, err); 3790 return err; 3791 } 3792 } 3793 } 3794 3795 return 0; 3796 } 3797 3798 static int reset_virtual_engine(struct intel_gt *gt, 3799 struct intel_engine_cs **siblings, 3800 unsigned int nsibling) 3801 { 3802 struct intel_engine_cs *engine; 3803 struct intel_context *ve; 3804 unsigned long *heartbeat; 3805 struct igt_spinner spin; 3806 struct i915_request *rq; 3807 unsigned int n; 3808 int err = 0; 3809 3810 /* 3811 * In order to support offline error capture for fast preempt reset, 3812 * we need to decouple the guilty request and ensure that it and its 3813 * descendents are not executed while the capture is in progress. 3814 */ 3815 3816 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); 3817 if (!heartbeat) 3818 return -ENOMEM; 3819 3820 if (igt_spinner_init(&spin, gt)) { 3821 err = -ENOMEM; 3822 goto out_free; 3823 } 3824 3825 ve = intel_execlists_create_virtual(siblings, nsibling); 3826 if (IS_ERR(ve)) { 3827 err = PTR_ERR(ve); 3828 goto out_spin; 3829 } 3830 3831 for (n = 0; n < nsibling; n++) 3832 engine_heartbeat_disable(siblings[n], &heartbeat[n]); 3833 3834 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); 3835 if (IS_ERR(rq)) { 3836 err = PTR_ERR(rq); 3837 goto out_heartbeat; 3838 } 3839 i915_request_add(rq); 3840 3841 if (!igt_wait_for_spinner(&spin, rq)) { 3842 intel_gt_set_wedged(gt); 3843 err = -ETIME; 3844 goto out_heartbeat; 3845 } 3846 3847 engine = rq->engine; 3848 GEM_BUG_ON(engine == ve->engine); 3849 3850 /* Take ownership of the reset and tasklet */ 3851 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 3852 >->reset.flags)) { 3853 intel_gt_set_wedged(gt); 3854 err = -EBUSY; 3855 goto out_heartbeat; 3856 } 3857 tasklet_disable(&engine->execlists.tasklet); 3858 3859 engine->execlists.tasklet.func(engine->execlists.tasklet.data); 3860 GEM_BUG_ON(execlists_active(&engine->execlists) != rq); 3861 3862 /* Fake a preemption event; failed of course */ 3863 spin_lock_irq(&engine->active.lock); 3864 __unwind_incomplete_requests(engine); 3865 spin_unlock_irq(&engine->active.lock); 3866 GEM_BUG_ON(rq->engine != ve->engine); 3867 3868 /* Reset the engine while keeping our active request on hold */ 3869 execlists_hold(engine, rq); 3870 GEM_BUG_ON(!i915_request_on_hold(rq)); 3871 3872 intel_engine_reset(engine, NULL); 3873 GEM_BUG_ON(rq->fence.error != -EIO); 3874 3875 /* Release our grasp on the engine, letting CS flow again */ 3876 tasklet_enable(&engine->execlists.tasklet); 3877 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); 3878 3879 /* Check that we do not resubmit the held request */ 3880 i915_request_get(rq); 3881 if (!i915_request_wait(rq, 0, HZ / 5)) { 3882 pr_err("%s: on hold request completed!\n", 3883 engine->name); 3884 intel_gt_set_wedged(gt); 3885 err = -EIO; 3886 goto out_rq; 3887 } 3888 GEM_BUG_ON(!i915_request_on_hold(rq)); 3889 3890 /* But is resubmitted on release */ 3891 execlists_unhold(engine, rq); 3892 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 3893 pr_err("%s: held request did not complete!\n", 3894 engine->name); 3895 intel_gt_set_wedged(gt); 3896 err = -ETIME; 3897 } 3898 3899 out_rq: 3900 i915_request_put(rq); 3901 out_heartbeat: 3902 for (n = 0; n < nsibling; n++) 3903 engine_heartbeat_enable(siblings[n], heartbeat[n]); 3904 3905 intel_context_put(ve); 3906 out_spin: 3907 igt_spinner_fini(&spin); 3908 out_free: 3909 kfree(heartbeat); 3910 return err; 3911 } 3912 3913 static int live_virtual_reset(void *arg) 3914 { 3915 struct intel_gt *gt = arg; 3916 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; 3917 unsigned int class, inst; 3918 3919 /* 3920 * Check that we handle a reset event within a virtual engine. 3921 * Only the physical engine is reset, but we have to check the flow 3922 * of the virtual requests around the reset, and make sure it is not 3923 * forgotten. 3924 */ 3925 3926 if (intel_uc_uses_guc_submission(>->uc)) 3927 return 0; 3928 3929 if (!intel_has_reset_engine(gt)) 3930 return 0; 3931 3932 for (class = 0; class <= MAX_ENGINE_CLASS; class++) { 3933 int nsibling, err; 3934 3935 nsibling = 0; 3936 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { 3937 if (!gt->engine_class[class][inst]) 3938 continue; 3939 3940 siblings[nsibling++] = gt->engine_class[class][inst]; 3941 } 3942 if (nsibling < 2) 3943 continue; 3944 3945 err = reset_virtual_engine(gt, siblings, nsibling); 3946 if (err) 3947 return err; 3948 } 3949 3950 return 0; 3951 } 3952 3953 int intel_execlists_live_selftests(struct drm_i915_private *i915) 3954 { 3955 static const struct i915_subtest tests[] = { 3956 SUBTEST(live_sanitycheck), 3957 SUBTEST(live_unlite_switch), 3958 SUBTEST(live_unlite_preempt), 3959 SUBTEST(live_pin_rewind), 3960 SUBTEST(live_hold_reset), 3961 SUBTEST(live_error_interrupt), 3962 SUBTEST(live_timeslice_preempt), 3963 SUBTEST(live_timeslice_rewind), 3964 SUBTEST(live_timeslice_queue), 3965 SUBTEST(live_busywait_preempt), 3966 SUBTEST(live_preempt), 3967 SUBTEST(live_late_preempt), 3968 SUBTEST(live_nopreempt), 3969 SUBTEST(live_preempt_cancel), 3970 SUBTEST(live_suppress_self_preempt), 3971 SUBTEST(live_suppress_wait_preempt), 3972 SUBTEST(live_chain_preempt), 3973 SUBTEST(live_preempt_gang), 3974 SUBTEST(live_preempt_timeout), 3975 SUBTEST(live_preempt_smoke), 3976 SUBTEST(live_virtual_engine), 3977 SUBTEST(live_virtual_mask), 3978 SUBTEST(live_virtual_preserved), 3979 SUBTEST(live_virtual_bond), 3980 SUBTEST(live_virtual_reset), 3981 }; 3982 3983 if (!HAS_EXECLISTS(i915)) 3984 return 0; 3985 3986 if (intel_gt_is_wedged(&i915->gt)) 3987 return 0; 3988 3989 return intel_gt_live_subtests(tests, &i915->gt); 3990 } 3991 3992 static void hexdump(const void *buf, size_t len) 3993 { 3994 const size_t rowsize = 8 * sizeof(u32); 3995 const void *prev = NULL; 3996 bool skip = false; 3997 size_t pos; 3998 3999 for (pos = 0; pos < len; pos += rowsize) { 4000 char line[128]; 4001 4002 if (prev && !memcmp(prev, buf + pos, rowsize)) { 4003 if (!skip) { 4004 pr_info("*\n"); 4005 skip = true; 4006 } 4007 continue; 4008 } 4009 4010 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 4011 rowsize, sizeof(u32), 4012 line, sizeof(line), 4013 false) >= sizeof(line)); 4014 pr_info("[%04zx] %s\n", pos, line); 4015 4016 prev = buf + pos; 4017 skip = false; 4018 } 4019 } 4020 4021 static int emit_semaphore_signal(struct intel_context *ce, void *slot) 4022 { 4023 const u32 offset = 4024 i915_ggtt_offset(ce->engine->status_page.vma) + 4025 offset_in_page(slot); 4026 struct i915_request *rq; 4027 u32 *cs; 4028 4029 rq = intel_context_create_request(ce); 4030 if (IS_ERR(rq)) 4031 return PTR_ERR(rq); 4032 4033 cs = intel_ring_begin(rq, 4); 4034 if (IS_ERR(cs)) { 4035 i915_request_add(rq); 4036 return PTR_ERR(cs); 4037 } 4038 4039 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4040 *cs++ = offset; 4041 *cs++ = 0; 4042 *cs++ = 1; 4043 4044 intel_ring_advance(rq, cs); 4045 4046 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4047 i915_request_add(rq); 4048 return 0; 4049 } 4050 4051 static int context_flush(struct intel_context *ce, long timeout) 4052 { 4053 struct i915_request *rq; 4054 struct dma_fence *fence; 4055 int err = 0; 4056 4057 rq = intel_engine_create_kernel_request(ce->engine); 4058 if (IS_ERR(rq)) 4059 return PTR_ERR(rq); 4060 4061 fence = i915_active_fence_get(&ce->timeline->last_request); 4062 if (fence) { 4063 i915_request_await_dma_fence(rq, fence); 4064 dma_fence_put(fence); 4065 } 4066 4067 rq = i915_request_get(rq); 4068 i915_request_add(rq); 4069 if (i915_request_wait(rq, 0, timeout) < 0) 4070 err = -ETIME; 4071 i915_request_put(rq); 4072 4073 rmb(); /* We know the request is written, make sure all state is too! */ 4074 return err; 4075 } 4076 4077 static int live_lrc_layout(void *arg) 4078 { 4079 struct intel_gt *gt = arg; 4080 struct intel_engine_cs *engine; 4081 enum intel_engine_id id; 4082 u32 *lrc; 4083 int err; 4084 4085 /* 4086 * Check the registers offsets we use to create the initial reg state 4087 * match the layout saved by HW. 4088 */ 4089 4090 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); 4091 if (!lrc) 4092 return -ENOMEM; 4093 4094 err = 0; 4095 for_each_engine(engine, gt, id) { 4096 u32 *hw; 4097 int dw; 4098 4099 if (!engine->default_state) 4100 continue; 4101 4102 hw = i915_gem_object_pin_map(engine->default_state, 4103 I915_MAP_WB); 4104 if (IS_ERR(hw)) { 4105 err = PTR_ERR(hw); 4106 break; 4107 } 4108 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4109 4110 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), 4111 engine->kernel_context, 4112 engine, 4113 engine->kernel_context->ring, 4114 true); 4115 4116 dw = 0; 4117 do { 4118 u32 lri = hw[dw]; 4119 4120 if (lri == 0) { 4121 dw++; 4122 continue; 4123 } 4124 4125 if (lrc[dw] == 0) { 4126 pr_debug("%s: skipped instruction %x at dword %d\n", 4127 engine->name, lri, dw); 4128 dw++; 4129 continue; 4130 } 4131 4132 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4133 pr_err("%s: Expected LRI command at dword %d, found %08x\n", 4134 engine->name, dw, lri); 4135 err = -EINVAL; 4136 break; 4137 } 4138 4139 if (lrc[dw] != lri) { 4140 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", 4141 engine->name, dw, lri, lrc[dw]); 4142 err = -EINVAL; 4143 break; 4144 } 4145 4146 lri &= 0x7f; 4147 lri++; 4148 dw++; 4149 4150 while (lri) { 4151 if (hw[dw] != lrc[dw]) { 4152 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", 4153 engine->name, dw, hw[dw], lrc[dw]); 4154 err = -EINVAL; 4155 break; 4156 } 4157 4158 /* 4159 * Skip over the actual register value as we 4160 * expect that to differ. 4161 */ 4162 dw += 2; 4163 lri -= 2; 4164 } 4165 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4166 4167 if (err) { 4168 pr_info("%s: HW register image:\n", engine->name); 4169 hexdump(hw, PAGE_SIZE); 4170 4171 pr_info("%s: SW register image:\n", engine->name); 4172 hexdump(lrc, PAGE_SIZE); 4173 } 4174 4175 i915_gem_object_unpin_map(engine->default_state); 4176 if (err) 4177 break; 4178 } 4179 4180 kfree(lrc); 4181 return err; 4182 } 4183 4184 static int find_offset(const u32 *lri, u32 offset) 4185 { 4186 int i; 4187 4188 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) 4189 if (lri[i] == offset) 4190 return i; 4191 4192 return -1; 4193 } 4194 4195 static int live_lrc_fixed(void *arg) 4196 { 4197 struct intel_gt *gt = arg; 4198 struct intel_engine_cs *engine; 4199 enum intel_engine_id id; 4200 int err = 0; 4201 4202 /* 4203 * Check the assumed register offsets match the actual locations in 4204 * the context image. 4205 */ 4206 4207 for_each_engine(engine, gt, id) { 4208 const struct { 4209 u32 reg; 4210 u32 offset; 4211 const char *name; 4212 } tbl[] = { 4213 { 4214 i915_mmio_reg_offset(RING_START(engine->mmio_base)), 4215 CTX_RING_START - 1, 4216 "RING_START" 4217 }, 4218 { 4219 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)), 4220 CTX_RING_CTL - 1, 4221 "RING_CTL" 4222 }, 4223 { 4224 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)), 4225 CTX_RING_HEAD - 1, 4226 "RING_HEAD" 4227 }, 4228 { 4229 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)), 4230 CTX_RING_TAIL - 1, 4231 "RING_TAIL" 4232 }, 4233 { 4234 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)), 4235 lrc_ring_mi_mode(engine), 4236 "RING_MI_MODE" 4237 }, 4238 { 4239 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)), 4240 CTX_BB_STATE - 1, 4241 "BB_STATE" 4242 }, 4243 { 4244 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), 4245 CTX_TIMESTAMP - 1, 4246 "RING_CTX_TIMESTAMP" 4247 }, 4248 { }, 4249 }, *t; 4250 u32 *hw; 4251 4252 if (!engine->default_state) 4253 continue; 4254 4255 hw = i915_gem_object_pin_map(engine->default_state, 4256 I915_MAP_WB); 4257 if (IS_ERR(hw)) { 4258 err = PTR_ERR(hw); 4259 break; 4260 } 4261 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4262 4263 for (t = tbl; t->name; t++) { 4264 int dw = find_offset(hw, t->reg); 4265 4266 if (dw != t->offset) { 4267 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n", 4268 engine->name, 4269 t->name, 4270 t->reg, 4271 dw, 4272 t->offset); 4273 err = -EINVAL; 4274 } 4275 } 4276 4277 i915_gem_object_unpin_map(engine->default_state); 4278 } 4279 4280 return err; 4281 } 4282 4283 static int __live_lrc_state(struct intel_engine_cs *engine, 4284 struct i915_vma *scratch) 4285 { 4286 struct intel_context *ce; 4287 struct i915_request *rq; 4288 enum { 4289 RING_START_IDX = 0, 4290 RING_TAIL_IDX, 4291 MAX_IDX 4292 }; 4293 u32 expected[MAX_IDX]; 4294 u32 *cs; 4295 int err; 4296 int n; 4297 4298 ce = intel_context_create(engine); 4299 if (IS_ERR(ce)) 4300 return PTR_ERR(ce); 4301 4302 err = intel_context_pin(ce); 4303 if (err) 4304 goto err_put; 4305 4306 rq = i915_request_create(ce); 4307 if (IS_ERR(rq)) { 4308 err = PTR_ERR(rq); 4309 goto err_unpin; 4310 } 4311 4312 cs = intel_ring_begin(rq, 4 * MAX_IDX); 4313 if (IS_ERR(cs)) { 4314 err = PTR_ERR(cs); 4315 i915_request_add(rq); 4316 goto err_unpin; 4317 } 4318 4319 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4320 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); 4321 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); 4322 *cs++ = 0; 4323 4324 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); 4325 4326 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4327 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); 4328 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); 4329 *cs++ = 0; 4330 4331 i915_vma_lock(scratch); 4332 err = i915_request_await_object(rq, scratch->obj, true); 4333 if (!err) 4334 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4335 i915_vma_unlock(scratch); 4336 4337 i915_request_get(rq); 4338 i915_request_add(rq); 4339 if (err) 4340 goto err_rq; 4341 4342 intel_engine_flush_submission(engine); 4343 expected[RING_TAIL_IDX] = ce->ring->tail; 4344 4345 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4346 err = -ETIME; 4347 goto err_rq; 4348 } 4349 4350 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4351 if (IS_ERR(cs)) { 4352 err = PTR_ERR(cs); 4353 goto err_rq; 4354 } 4355 4356 for (n = 0; n < MAX_IDX; n++) { 4357 if (cs[n] != expected[n]) { 4358 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", 4359 engine->name, n, cs[n], expected[n]); 4360 err = -EINVAL; 4361 break; 4362 } 4363 } 4364 4365 i915_gem_object_unpin_map(scratch->obj); 4366 4367 err_rq: 4368 i915_request_put(rq); 4369 err_unpin: 4370 intel_context_unpin(ce); 4371 err_put: 4372 intel_context_put(ce); 4373 return err; 4374 } 4375 4376 static int live_lrc_state(void *arg) 4377 { 4378 struct intel_gt *gt = arg; 4379 struct intel_engine_cs *engine; 4380 struct i915_vma *scratch; 4381 enum intel_engine_id id; 4382 int err = 0; 4383 4384 /* 4385 * Check the live register state matches what we expect for this 4386 * intel_context. 4387 */ 4388 4389 scratch = create_scratch(gt); 4390 if (IS_ERR(scratch)) 4391 return PTR_ERR(scratch); 4392 4393 for_each_engine(engine, gt, id) { 4394 err = __live_lrc_state(engine, scratch); 4395 if (err) 4396 break; 4397 } 4398 4399 if (igt_flush_test(gt->i915)) 4400 err = -EIO; 4401 4402 i915_vma_unpin_and_release(&scratch, 0); 4403 return err; 4404 } 4405 4406 static int gpr_make_dirty(struct intel_context *ce) 4407 { 4408 struct i915_request *rq; 4409 u32 *cs; 4410 int n; 4411 4412 rq = intel_context_create_request(ce); 4413 if (IS_ERR(rq)) 4414 return PTR_ERR(rq); 4415 4416 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); 4417 if (IS_ERR(cs)) { 4418 i915_request_add(rq); 4419 return PTR_ERR(cs); 4420 } 4421 4422 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); 4423 for (n = 0; n < NUM_GPR_DW; n++) { 4424 *cs++ = CS_GPR(ce->engine, n); 4425 *cs++ = STACK_MAGIC; 4426 } 4427 *cs++ = MI_NOOP; 4428 4429 intel_ring_advance(rq, cs); 4430 4431 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 4432 i915_request_add(rq); 4433 4434 return 0; 4435 } 4436 4437 static struct i915_request * 4438 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot) 4439 { 4440 const u32 offset = 4441 i915_ggtt_offset(ce->engine->status_page.vma) + 4442 offset_in_page(slot); 4443 struct i915_request *rq; 4444 u32 *cs; 4445 int err; 4446 int n; 4447 4448 rq = intel_context_create_request(ce); 4449 if (IS_ERR(rq)) 4450 return rq; 4451 4452 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW); 4453 if (IS_ERR(cs)) { 4454 i915_request_add(rq); 4455 return ERR_CAST(cs); 4456 } 4457 4458 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4459 *cs++ = MI_NOOP; 4460 4461 *cs++ = MI_SEMAPHORE_WAIT | 4462 MI_SEMAPHORE_GLOBAL_GTT | 4463 MI_SEMAPHORE_POLL | 4464 MI_SEMAPHORE_SAD_NEQ_SDD; 4465 *cs++ = 0; 4466 *cs++ = offset; 4467 *cs++ = 0; 4468 4469 for (n = 0; n < NUM_GPR_DW; n++) { 4470 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4471 *cs++ = CS_GPR(ce->engine, n); 4472 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); 4473 *cs++ = 0; 4474 } 4475 4476 i915_vma_lock(scratch); 4477 err = i915_request_await_object(rq, scratch->obj, true); 4478 if (!err) 4479 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE); 4480 i915_vma_unlock(scratch); 4481 4482 i915_request_get(rq); 4483 i915_request_add(rq); 4484 if (err) { 4485 i915_request_put(rq); 4486 rq = ERR_PTR(err); 4487 } 4488 4489 return rq; 4490 } 4491 4492 static int __live_lrc_gpr(struct intel_engine_cs *engine, 4493 struct i915_vma *scratch, 4494 bool preempt) 4495 { 4496 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4); 4497 struct intel_context *ce; 4498 struct i915_request *rq; 4499 u32 *cs; 4500 int err; 4501 int n; 4502 4503 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) 4504 return 0; /* GPR only on rcs0 for gen8 */ 4505 4506 err = gpr_make_dirty(engine->kernel_context); 4507 if (err) 4508 return err; 4509 4510 ce = intel_context_create(engine); 4511 if (IS_ERR(ce)) 4512 return PTR_ERR(ce); 4513 4514 rq = __gpr_read(ce, scratch, slot); 4515 if (IS_ERR(rq)) { 4516 err = PTR_ERR(rq); 4517 goto err_put; 4518 } 4519 4520 err = wait_for_submit(engine, rq, HZ / 2); 4521 if (err) 4522 goto err_rq; 4523 4524 if (preempt) { 4525 err = gpr_make_dirty(engine->kernel_context); 4526 if (err) 4527 goto err_rq; 4528 4529 err = emit_semaphore_signal(engine->kernel_context, slot); 4530 if (err) 4531 goto err_rq; 4532 } else { 4533 slot[0] = 1; 4534 wmb(); 4535 } 4536 4537 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 4538 err = -ETIME; 4539 goto err_rq; 4540 } 4541 4542 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); 4543 if (IS_ERR(cs)) { 4544 err = PTR_ERR(cs); 4545 goto err_rq; 4546 } 4547 4548 for (n = 0; n < NUM_GPR_DW; n++) { 4549 if (cs[n]) { 4550 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", 4551 engine->name, 4552 n / 2, n & 1 ? "udw" : "ldw", 4553 cs[n]); 4554 err = -EINVAL; 4555 break; 4556 } 4557 } 4558 4559 i915_gem_object_unpin_map(scratch->obj); 4560 4561 err_rq: 4562 memset32(&slot[0], -1, 4); 4563 wmb(); 4564 i915_request_put(rq); 4565 err_put: 4566 intel_context_put(ce); 4567 return err; 4568 } 4569 4570 static int live_lrc_gpr(void *arg) 4571 { 4572 struct intel_gt *gt = arg; 4573 struct intel_engine_cs *engine; 4574 struct i915_vma *scratch; 4575 enum intel_engine_id id; 4576 int err = 0; 4577 4578 /* 4579 * Check that GPR registers are cleared in new contexts as we need 4580 * to avoid leaking any information from previous contexts. 4581 */ 4582 4583 scratch = create_scratch(gt); 4584 if (IS_ERR(scratch)) 4585 return PTR_ERR(scratch); 4586 4587 for_each_engine(engine, gt, id) { 4588 unsigned long heartbeat; 4589 4590 engine_heartbeat_disable(engine, &heartbeat); 4591 4592 err = __live_lrc_gpr(engine, scratch, false); 4593 if (err) 4594 goto err; 4595 4596 err = __live_lrc_gpr(engine, scratch, true); 4597 if (err) 4598 goto err; 4599 4600 err: 4601 engine_heartbeat_enable(engine, heartbeat); 4602 if (igt_flush_test(gt->i915)) 4603 err = -EIO; 4604 if (err) 4605 break; 4606 } 4607 4608 i915_vma_unpin_and_release(&scratch, 0); 4609 return err; 4610 } 4611 4612 static struct i915_request * 4613 create_timestamp(struct intel_context *ce, void *slot, int idx) 4614 { 4615 const u32 offset = 4616 i915_ggtt_offset(ce->engine->status_page.vma) + 4617 offset_in_page(slot); 4618 struct i915_request *rq; 4619 u32 *cs; 4620 int err; 4621 4622 rq = intel_context_create_request(ce); 4623 if (IS_ERR(rq)) 4624 return rq; 4625 4626 cs = intel_ring_begin(rq, 10); 4627 if (IS_ERR(cs)) { 4628 err = PTR_ERR(cs); 4629 goto err; 4630 } 4631 4632 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4633 *cs++ = MI_NOOP; 4634 4635 *cs++ = MI_SEMAPHORE_WAIT | 4636 MI_SEMAPHORE_GLOBAL_GTT | 4637 MI_SEMAPHORE_POLL | 4638 MI_SEMAPHORE_SAD_NEQ_SDD; 4639 *cs++ = 0; 4640 *cs++ = offset; 4641 *cs++ = 0; 4642 4643 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 4644 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base)); 4645 *cs++ = offset + idx * sizeof(u32); 4646 *cs++ = 0; 4647 4648 intel_ring_advance(rq, cs); 4649 4650 rq->sched.attr.priority = I915_PRIORITY_MASK; 4651 err = 0; 4652 err: 4653 i915_request_get(rq); 4654 i915_request_add(rq); 4655 if (err) { 4656 i915_request_put(rq); 4657 return ERR_PTR(err); 4658 } 4659 4660 return rq; 4661 } 4662 4663 struct lrc_timestamp { 4664 struct intel_engine_cs *engine; 4665 struct intel_context *ce[2]; 4666 u32 poison; 4667 }; 4668 4669 static bool timestamp_advanced(u32 start, u32 end) 4670 { 4671 return (s32)(end - start) > 0; 4672 } 4673 4674 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt) 4675 { 4676 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4); 4677 struct i915_request *rq; 4678 u32 timestamp; 4679 int err = 0; 4680 4681 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison; 4682 rq = create_timestamp(arg->ce[0], slot, 1); 4683 if (IS_ERR(rq)) 4684 return PTR_ERR(rq); 4685 4686 err = wait_for_submit(rq->engine, rq, HZ / 2); 4687 if (err) 4688 goto err; 4689 4690 if (preempt) { 4691 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef; 4692 err = emit_semaphore_signal(arg->ce[1], slot); 4693 if (err) 4694 goto err; 4695 } else { 4696 slot[0] = 1; 4697 wmb(); 4698 } 4699 4700 /* And wait for switch to kernel (to save our context to memory) */ 4701 err = context_flush(arg->ce[0], HZ / 2); 4702 if (err) 4703 goto err; 4704 4705 if (!timestamp_advanced(arg->poison, slot[1])) { 4706 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n", 4707 arg->engine->name, preempt ? "preempt" : "simple", 4708 arg->poison, slot[1]); 4709 err = -EINVAL; 4710 } 4711 4712 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]); 4713 if (!timestamp_advanced(slot[1], timestamp)) { 4714 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n", 4715 arg->engine->name, preempt ? "preempt" : "simple", 4716 slot[1], timestamp); 4717 err = -EINVAL; 4718 } 4719 4720 err: 4721 memset32(slot, -1, 4); 4722 i915_request_put(rq); 4723 return err; 4724 } 4725 4726 static int live_lrc_timestamp(void *arg) 4727 { 4728 struct lrc_timestamp data = {}; 4729 struct intel_gt *gt = arg; 4730 enum intel_engine_id id; 4731 const u32 poison[] = { 4732 0, 4733 S32_MAX, 4734 (u32)S32_MAX + 1, 4735 U32_MAX, 4736 }; 4737 4738 /* 4739 * We want to verify that the timestamp is saved and restore across 4740 * context switches and is monotonic. 4741 * 4742 * So we do this with a little bit of LRC poisoning to check various 4743 * boundary conditions, and see what happens if we preempt the context 4744 * with a second request (carrying more poison into the timestamp). 4745 */ 4746 4747 for_each_engine(data.engine, gt, id) { 4748 unsigned long heartbeat; 4749 int i, err = 0; 4750 4751 engine_heartbeat_disable(data.engine, &heartbeat); 4752 4753 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4754 struct intel_context *tmp; 4755 4756 tmp = intel_context_create(data.engine); 4757 if (IS_ERR(tmp)) { 4758 err = PTR_ERR(tmp); 4759 goto err; 4760 } 4761 4762 err = intel_context_pin(tmp); 4763 if (err) { 4764 intel_context_put(tmp); 4765 goto err; 4766 } 4767 4768 data.ce[i] = tmp; 4769 } 4770 4771 for (i = 0; i < ARRAY_SIZE(poison); i++) { 4772 data.poison = poison[i]; 4773 4774 err = __lrc_timestamp(&data, false); 4775 if (err) 4776 break; 4777 4778 err = __lrc_timestamp(&data, true); 4779 if (err) 4780 break; 4781 } 4782 4783 err: 4784 engine_heartbeat_enable(data.engine, heartbeat); 4785 for (i = 0; i < ARRAY_SIZE(data.ce); i++) { 4786 if (!data.ce[i]) 4787 break; 4788 4789 intel_context_unpin(data.ce[i]); 4790 intel_context_put(data.ce[i]); 4791 } 4792 4793 if (igt_flush_test(gt->i915)) 4794 err = -EIO; 4795 if (err) 4796 return err; 4797 } 4798 4799 return 0; 4800 } 4801 4802 static struct i915_vma * 4803 create_user_vma(struct i915_address_space *vm, unsigned long size) 4804 { 4805 struct drm_i915_gem_object *obj; 4806 struct i915_vma *vma; 4807 int err; 4808 4809 obj = i915_gem_object_create_internal(vm->i915, size); 4810 if (IS_ERR(obj)) 4811 return ERR_CAST(obj); 4812 4813 vma = i915_vma_instance(obj, vm, NULL); 4814 if (IS_ERR(vma)) { 4815 i915_gem_object_put(obj); 4816 return vma; 4817 } 4818 4819 err = i915_vma_pin(vma, 0, 0, PIN_USER); 4820 if (err) { 4821 i915_gem_object_put(obj); 4822 return ERR_PTR(err); 4823 } 4824 4825 return vma; 4826 } 4827 4828 static struct i915_vma * 4829 store_context(struct intel_context *ce, struct i915_vma *scratch) 4830 { 4831 struct i915_vma *batch; 4832 u32 dw, x, *cs, *hw; 4833 4834 batch = create_user_vma(ce->vm, SZ_64K); 4835 if (IS_ERR(batch)) 4836 return batch; 4837 4838 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 4839 if (IS_ERR(cs)) { 4840 i915_vma_put(batch); 4841 return ERR_CAST(cs); 4842 } 4843 4844 x = 0; 4845 dw = 0; 4846 hw = ce->engine->pinned_default_state; 4847 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 4848 do { 4849 u32 len = hw[dw] & 0x7f; 4850 4851 if (hw[dw] == 0) { 4852 dw++; 4853 continue; 4854 } 4855 4856 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 4857 dw += len + 2; 4858 continue; 4859 } 4860 4861 dw++; 4862 len = (len + 1) / 2; 4863 while (len--) { 4864 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 4865 *cs++ = hw[dw]; 4866 *cs++ = lower_32_bits(scratch->node.start + x); 4867 *cs++ = upper_32_bits(scratch->node.start + x); 4868 4869 dw += 2; 4870 x += 4; 4871 } 4872 } while (dw < PAGE_SIZE / sizeof(u32) && 4873 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 4874 4875 *cs++ = MI_BATCH_BUFFER_END; 4876 4877 i915_gem_object_flush_map(batch->obj); 4878 i915_gem_object_unpin_map(batch->obj); 4879 4880 return batch; 4881 } 4882 4883 static int move_to_active(struct i915_request *rq, 4884 struct i915_vma *vma, 4885 unsigned int flags) 4886 { 4887 int err; 4888 4889 i915_vma_lock(vma); 4890 err = i915_request_await_object(rq, vma->obj, flags); 4891 if (!err) 4892 err = i915_vma_move_to_active(vma, rq, flags); 4893 i915_vma_unlock(vma); 4894 4895 return err; 4896 } 4897 4898 static struct i915_request * 4899 record_registers(struct intel_context *ce, 4900 struct i915_vma *before, 4901 struct i915_vma *after, 4902 u32 *sema) 4903 { 4904 struct i915_vma *b_before, *b_after; 4905 struct i915_request *rq; 4906 u32 *cs; 4907 int err; 4908 4909 b_before = store_context(ce, before); 4910 if (IS_ERR(b_before)) 4911 return ERR_CAST(b_before); 4912 4913 b_after = store_context(ce, after); 4914 if (IS_ERR(b_after)) { 4915 rq = ERR_CAST(b_after); 4916 goto err_before; 4917 } 4918 4919 rq = intel_context_create_request(ce); 4920 if (IS_ERR(rq)) 4921 goto err_after; 4922 4923 err = move_to_active(rq, before, EXEC_OBJECT_WRITE); 4924 if (err) 4925 goto err_rq; 4926 4927 err = move_to_active(rq, b_before, 0); 4928 if (err) 4929 goto err_rq; 4930 4931 err = move_to_active(rq, after, EXEC_OBJECT_WRITE); 4932 if (err) 4933 goto err_rq; 4934 4935 err = move_to_active(rq, b_after, 0); 4936 if (err) 4937 goto err_rq; 4938 4939 cs = intel_ring_begin(rq, 14); 4940 if (IS_ERR(cs)) { 4941 err = PTR_ERR(cs); 4942 goto err_rq; 4943 } 4944 4945 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4946 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 4947 *cs++ = lower_32_bits(b_before->node.start); 4948 *cs++ = upper_32_bits(b_before->node.start); 4949 4950 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 4951 *cs++ = MI_SEMAPHORE_WAIT | 4952 MI_SEMAPHORE_GLOBAL_GTT | 4953 MI_SEMAPHORE_POLL | 4954 MI_SEMAPHORE_SAD_NEQ_SDD; 4955 *cs++ = 0; 4956 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 4957 offset_in_page(sema); 4958 *cs++ = 0; 4959 *cs++ = MI_NOOP; 4960 4961 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 4962 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 4963 *cs++ = lower_32_bits(b_after->node.start); 4964 *cs++ = upper_32_bits(b_after->node.start); 4965 4966 intel_ring_advance(rq, cs); 4967 4968 WRITE_ONCE(*sema, 0); 4969 i915_request_get(rq); 4970 i915_request_add(rq); 4971 err_after: 4972 i915_vma_put(b_after); 4973 err_before: 4974 i915_vma_put(b_before); 4975 return rq; 4976 4977 err_rq: 4978 i915_request_add(rq); 4979 rq = ERR_PTR(err); 4980 goto err_after; 4981 } 4982 4983 static struct i915_vma *load_context(struct intel_context *ce, u32 poison) 4984 { 4985 struct i915_vma *batch; 4986 u32 dw, *cs, *hw; 4987 4988 batch = create_user_vma(ce->vm, SZ_64K); 4989 if (IS_ERR(batch)) 4990 return batch; 4991 4992 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 4993 if (IS_ERR(cs)) { 4994 i915_vma_put(batch); 4995 return ERR_CAST(cs); 4996 } 4997 4998 dw = 0; 4999 hw = ce->engine->pinned_default_state; 5000 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 5001 do { 5002 u32 len = hw[dw] & 0x7f; 5003 5004 if (hw[dw] == 0) { 5005 dw++; 5006 continue; 5007 } 5008 5009 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5010 dw += len + 2; 5011 continue; 5012 } 5013 5014 dw++; 5015 len = (len + 1) / 2; 5016 *cs++ = MI_LOAD_REGISTER_IMM(len); 5017 while (len--) { 5018 *cs++ = hw[dw]; 5019 *cs++ = poison; 5020 dw += 2; 5021 } 5022 } while (dw < PAGE_SIZE / sizeof(u32) && 5023 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5024 5025 *cs++ = MI_BATCH_BUFFER_END; 5026 5027 i915_gem_object_flush_map(batch->obj); 5028 i915_gem_object_unpin_map(batch->obj); 5029 5030 return batch; 5031 } 5032 5033 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) 5034 { 5035 struct i915_request *rq; 5036 struct i915_vma *batch; 5037 u32 *cs; 5038 int err; 5039 5040 batch = load_context(ce, poison); 5041 if (IS_ERR(batch)) 5042 return PTR_ERR(batch); 5043 5044 rq = intel_context_create_request(ce); 5045 if (IS_ERR(rq)) { 5046 err = PTR_ERR(rq); 5047 goto err_batch; 5048 } 5049 5050 err = move_to_active(rq, batch, 0); 5051 if (err) 5052 goto err_rq; 5053 5054 cs = intel_ring_begin(rq, 8); 5055 if (IS_ERR(cs)) { 5056 err = PTR_ERR(cs); 5057 goto err_rq; 5058 } 5059 5060 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5061 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8); 5062 *cs++ = lower_32_bits(batch->node.start); 5063 *cs++ = upper_32_bits(batch->node.start); 5064 5065 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 5066 *cs++ = i915_ggtt_offset(ce->engine->status_page.vma) + 5067 offset_in_page(sema); 5068 *cs++ = 0; 5069 *cs++ = 1; 5070 5071 intel_ring_advance(rq, cs); 5072 5073 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 5074 err_rq: 5075 i915_request_add(rq); 5076 err_batch: 5077 i915_vma_put(batch); 5078 return err; 5079 } 5080 5081 static bool is_moving(u32 a, u32 b) 5082 { 5083 return a != b; 5084 } 5085 5086 static int compare_isolation(struct intel_engine_cs *engine, 5087 struct i915_vma *ref[2], 5088 struct i915_vma *result[2], 5089 struct intel_context *ce, 5090 u32 poison) 5091 { 5092 u32 x, dw, *hw, *lrc; 5093 u32 *A[2], *B[2]; 5094 int err = 0; 5095 5096 A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); 5097 if (IS_ERR(A[0])) 5098 return PTR_ERR(A[0]); 5099 5100 A[1] = i915_gem_object_pin_map(ref[1]->obj, I915_MAP_WC); 5101 if (IS_ERR(A[1])) { 5102 err = PTR_ERR(A[1]); 5103 goto err_A0; 5104 } 5105 5106 B[0] = i915_gem_object_pin_map(result[0]->obj, I915_MAP_WC); 5107 if (IS_ERR(B[0])) { 5108 err = PTR_ERR(B[0]); 5109 goto err_A1; 5110 } 5111 5112 B[1] = i915_gem_object_pin_map(result[1]->obj, I915_MAP_WC); 5113 if (IS_ERR(B[1])) { 5114 err = PTR_ERR(B[1]); 5115 goto err_B0; 5116 } 5117 5118 lrc = i915_gem_object_pin_map(ce->state->obj, 5119 i915_coherent_map_type(engine->i915)); 5120 if (IS_ERR(lrc)) { 5121 err = PTR_ERR(lrc); 5122 goto err_B1; 5123 } 5124 lrc += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 5125 5126 x = 0; 5127 dw = 0; 5128 hw = engine->pinned_default_state; 5129 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); 5130 do { 5131 u32 len = hw[dw] & 0x7f; 5132 5133 if (hw[dw] == 0) { 5134 dw++; 5135 continue; 5136 } 5137 5138 if ((hw[dw] & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { 5139 dw += len + 2; 5140 continue; 5141 } 5142 5143 dw++; 5144 len = (len + 1) / 2; 5145 while (len--) { 5146 if (!is_moving(A[0][x], A[1][x]) && 5147 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) { 5148 switch (hw[dw] & 4095) { 5149 case 0x30: /* RING_HEAD */ 5150 case 0x34: /* RING_TAIL */ 5151 break; 5152 5153 default: 5154 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n", 5155 engine->name, dw, 5156 hw[dw], hw[dw + 1], 5157 A[0][x], B[0][x], B[1][x], 5158 poison, lrc[dw + 1]); 5159 err = -EINVAL; 5160 break; 5161 } 5162 } 5163 dw += 2; 5164 x++; 5165 } 5166 } while (dw < PAGE_SIZE / sizeof(u32) && 5167 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); 5168 5169 i915_gem_object_unpin_map(ce->state->obj); 5170 err_B1: 5171 i915_gem_object_unpin_map(result[1]->obj); 5172 err_B0: 5173 i915_gem_object_unpin_map(result[0]->obj); 5174 err_A1: 5175 i915_gem_object_unpin_map(ref[1]->obj); 5176 err_A0: 5177 i915_gem_object_unpin_map(ref[0]->obj); 5178 return err; 5179 } 5180 5181 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison) 5182 { 5183 u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1); 5184 struct i915_vma *ref[2], *result[2]; 5185 struct intel_context *A, *B; 5186 struct i915_request *rq; 5187 int err; 5188 5189 A = intel_context_create(engine); 5190 if (IS_ERR(A)) 5191 return PTR_ERR(A); 5192 5193 B = intel_context_create(engine); 5194 if (IS_ERR(B)) { 5195 err = PTR_ERR(B); 5196 goto err_A; 5197 } 5198 5199 ref[0] = create_user_vma(A->vm, SZ_64K); 5200 if (IS_ERR(ref[0])) { 5201 err = PTR_ERR(ref[0]); 5202 goto err_B; 5203 } 5204 5205 ref[1] = create_user_vma(A->vm, SZ_64K); 5206 if (IS_ERR(ref[1])) { 5207 err = PTR_ERR(ref[1]); 5208 goto err_ref0; 5209 } 5210 5211 rq = record_registers(A, ref[0], ref[1], sema); 5212 if (IS_ERR(rq)) { 5213 err = PTR_ERR(rq); 5214 goto err_ref1; 5215 } 5216 5217 WRITE_ONCE(*sema, 1); 5218 wmb(); 5219 5220 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5221 i915_request_put(rq); 5222 err = -ETIME; 5223 goto err_ref1; 5224 } 5225 i915_request_put(rq); 5226 5227 result[0] = create_user_vma(A->vm, SZ_64K); 5228 if (IS_ERR(result[0])) { 5229 err = PTR_ERR(result[0]); 5230 goto err_ref1; 5231 } 5232 5233 result[1] = create_user_vma(A->vm, SZ_64K); 5234 if (IS_ERR(result[1])) { 5235 err = PTR_ERR(result[1]); 5236 goto err_result0; 5237 } 5238 5239 rq = record_registers(A, result[0], result[1], sema); 5240 if (IS_ERR(rq)) { 5241 err = PTR_ERR(rq); 5242 goto err_result1; 5243 } 5244 5245 err = poison_registers(B, poison, sema); 5246 if (err) { 5247 WRITE_ONCE(*sema, -1); 5248 i915_request_put(rq); 5249 goto err_result1; 5250 } 5251 5252 if (i915_request_wait(rq, 0, HZ / 2) < 0) { 5253 i915_request_put(rq); 5254 err = -ETIME; 5255 goto err_result1; 5256 } 5257 i915_request_put(rq); 5258 5259 err = compare_isolation(engine, ref, result, A, poison); 5260 5261 err_result1: 5262 i915_vma_put(result[1]); 5263 err_result0: 5264 i915_vma_put(result[0]); 5265 err_ref1: 5266 i915_vma_put(ref[1]); 5267 err_ref0: 5268 i915_vma_put(ref[0]); 5269 err_B: 5270 intel_context_put(B); 5271 err_A: 5272 intel_context_put(A); 5273 return err; 5274 } 5275 5276 static bool skip_isolation(const struct intel_engine_cs *engine) 5277 { 5278 if (engine->class == COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) == 9) 5279 return true; 5280 5281 if (engine->class == RENDER_CLASS && INTEL_GEN(engine->i915) == 11) 5282 return true; 5283 5284 return false; 5285 } 5286 5287 static int live_lrc_isolation(void *arg) 5288 { 5289 struct intel_gt *gt = arg; 5290 struct intel_engine_cs *engine; 5291 enum intel_engine_id id; 5292 const u32 poison[] = { 5293 STACK_MAGIC, 5294 0x3a3a3a3a, 5295 0x5c5c5c5c, 5296 0xffffffff, 5297 0xffff0000, 5298 }; 5299 5300 /* 5301 * Our goal is try and verify that per-context state cannot be 5302 * tampered with by another non-privileged client. 5303 * 5304 * We take the list of context registers from the LRI in the default 5305 * context image and attempt to modify that list from a remote context. 5306 */ 5307 5308 for_each_engine(engine, gt, id) { 5309 int err = 0; 5310 int i; 5311 5312 /* Just don't even ask */ 5313 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) && 5314 skip_isolation(engine)) 5315 continue; 5316 5317 intel_engine_pm_get(engine); 5318 if (engine->pinned_default_state) { 5319 for (i = 0; i < ARRAY_SIZE(poison); i++) { 5320 err = __lrc_isolation(engine, poison[i]); 5321 if (err) 5322 break; 5323 5324 err = __lrc_isolation(engine, ~poison[i]); 5325 if (err) 5326 break; 5327 } 5328 } 5329 intel_engine_pm_put(engine); 5330 if (igt_flush_test(gt->i915)) 5331 err = -EIO; 5332 if (err) 5333 return err; 5334 } 5335 5336 return 0; 5337 } 5338 5339 static void garbage_reset(struct intel_engine_cs *engine, 5340 struct i915_request *rq) 5341 { 5342 const unsigned int bit = I915_RESET_ENGINE + engine->id; 5343 unsigned long *lock = &engine->gt->reset.flags; 5344 5345 if (test_and_set_bit(bit, lock)) 5346 return; 5347 5348 tasklet_disable(&engine->execlists.tasklet); 5349 5350 if (!rq->fence.error) 5351 intel_engine_reset(engine, NULL); 5352 5353 tasklet_enable(&engine->execlists.tasklet); 5354 clear_and_wake_up_bit(bit, lock); 5355 } 5356 5357 static struct i915_request *garbage(struct intel_context *ce, 5358 struct rnd_state *prng) 5359 { 5360 struct i915_request *rq; 5361 int err; 5362 5363 err = intel_context_pin(ce); 5364 if (err) 5365 return ERR_PTR(err); 5366 5367 prandom_bytes_state(prng, 5368 ce->lrc_reg_state, 5369 ce->engine->context_size - 5370 LRC_STATE_PN * PAGE_SIZE); 5371 5372 rq = intel_context_create_request(ce); 5373 if (IS_ERR(rq)) { 5374 err = PTR_ERR(rq); 5375 goto err_unpin; 5376 } 5377 5378 i915_request_get(rq); 5379 i915_request_add(rq); 5380 return rq; 5381 5382 err_unpin: 5383 intel_context_unpin(ce); 5384 return ERR_PTR(err); 5385 } 5386 5387 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng) 5388 { 5389 struct intel_context *ce; 5390 struct i915_request *hang; 5391 int err = 0; 5392 5393 ce = intel_context_create(engine); 5394 if (IS_ERR(ce)) 5395 return PTR_ERR(ce); 5396 5397 hang = garbage(ce, prng); 5398 if (IS_ERR(hang)) { 5399 err = PTR_ERR(hang); 5400 goto err_ce; 5401 } 5402 5403 if (wait_for_submit(engine, hang, HZ / 2)) { 5404 i915_request_put(hang); 5405 err = -ETIME; 5406 goto err_ce; 5407 } 5408 5409 intel_context_set_banned(ce); 5410 garbage_reset(engine, hang); 5411 5412 intel_engine_flush_submission(engine); 5413 if (!hang->fence.error) { 5414 i915_request_put(hang); 5415 pr_err("%s: corrupted context was not reset\n", 5416 engine->name); 5417 err = -EINVAL; 5418 goto err_ce; 5419 } 5420 5421 if (i915_request_wait(hang, 0, HZ / 2) < 0) { 5422 pr_err("%s: corrupted context did not recover\n", 5423 engine->name); 5424 i915_request_put(hang); 5425 err = -EIO; 5426 goto err_ce; 5427 } 5428 i915_request_put(hang); 5429 5430 err_ce: 5431 intel_context_put(ce); 5432 return err; 5433 } 5434 5435 static int live_lrc_garbage(void *arg) 5436 { 5437 struct intel_gt *gt = arg; 5438 struct intel_engine_cs *engine; 5439 enum intel_engine_id id; 5440 5441 /* 5442 * Verify that we can recover if one context state is completely 5443 * corrupted. 5444 */ 5445 5446 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN)) 5447 return 0; 5448 5449 for_each_engine(engine, gt, id) { 5450 I915_RND_STATE(prng); 5451 int err = 0, i; 5452 5453 if (!intel_has_reset_engine(engine->gt)) 5454 continue; 5455 5456 intel_engine_pm_get(engine); 5457 for (i = 0; i < 3; i++) { 5458 err = __lrc_garbage(engine, &prng); 5459 if (err) 5460 break; 5461 } 5462 intel_engine_pm_put(engine); 5463 5464 if (igt_flush_test(gt->i915)) 5465 err = -EIO; 5466 if (err) 5467 return err; 5468 } 5469 5470 return 0; 5471 } 5472 5473 static int __live_pphwsp_runtime(struct intel_engine_cs *engine) 5474 { 5475 struct intel_context *ce; 5476 struct i915_request *rq; 5477 IGT_TIMEOUT(end_time); 5478 int err; 5479 5480 ce = intel_context_create(engine); 5481 if (IS_ERR(ce)) 5482 return PTR_ERR(ce); 5483 5484 ce->runtime.num_underflow = 0; 5485 ce->runtime.max_underflow = 0; 5486 5487 do { 5488 unsigned int loop = 1024; 5489 5490 while (loop) { 5491 rq = intel_context_create_request(ce); 5492 if (IS_ERR(rq)) { 5493 err = PTR_ERR(rq); 5494 goto err_rq; 5495 } 5496 5497 if (--loop == 0) 5498 i915_request_get(rq); 5499 5500 i915_request_add(rq); 5501 } 5502 5503 if (__igt_timeout(end_time, NULL)) 5504 break; 5505 5506 i915_request_put(rq); 5507 } while (1); 5508 5509 err = i915_request_wait(rq, 0, HZ / 5); 5510 if (err < 0) { 5511 pr_err("%s: request not completed!\n", engine->name); 5512 goto err_wait; 5513 } 5514 5515 igt_flush_test(engine->i915); 5516 5517 pr_info("%s: pphwsp runtime %lluns, average %lluns\n", 5518 engine->name, 5519 intel_context_get_total_runtime_ns(ce), 5520 intel_context_get_avg_runtime_ns(ce)); 5521 5522 err = 0; 5523 if (ce->runtime.num_underflow) { 5524 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", 5525 engine->name, 5526 ce->runtime.num_underflow, 5527 ce->runtime.max_underflow); 5528 GEM_TRACE_DUMP(); 5529 err = -EOVERFLOW; 5530 } 5531 5532 err_wait: 5533 i915_request_put(rq); 5534 err_rq: 5535 intel_context_put(ce); 5536 return err; 5537 } 5538 5539 static int live_pphwsp_runtime(void *arg) 5540 { 5541 struct intel_gt *gt = arg; 5542 struct intel_engine_cs *engine; 5543 enum intel_engine_id id; 5544 int err = 0; 5545 5546 /* 5547 * Check that cumulative context runtime as stored in the pphwsp[16] 5548 * is monotonic. 5549 */ 5550 5551 for_each_engine(engine, gt, id) { 5552 err = __live_pphwsp_runtime(engine); 5553 if (err) 5554 break; 5555 } 5556 5557 if (igt_flush_test(gt->i915)) 5558 err = -EIO; 5559 5560 return err; 5561 } 5562 5563 int intel_lrc_live_selftests(struct drm_i915_private *i915) 5564 { 5565 static const struct i915_subtest tests[] = { 5566 SUBTEST(live_lrc_layout), 5567 SUBTEST(live_lrc_fixed), 5568 SUBTEST(live_lrc_state), 5569 SUBTEST(live_lrc_gpr), 5570 SUBTEST(live_lrc_isolation), 5571 SUBTEST(live_lrc_timestamp), 5572 SUBTEST(live_lrc_garbage), 5573 SUBTEST(live_pphwsp_runtime), 5574 }; 5575 5576 if (!HAS_LOGICAL_RING_CONTEXTS(i915)) 5577 return 0; 5578 5579 return intel_gt_live_subtests(tests, &i915->gt); 5580 } 5581