1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2018 Intel Corporation
4 */
5
6 #include <linux/prime_numbers.h>
7
8 #include "gem/i915_gem_pm.h"
9 #include "gt/intel_engine_heartbeat.h"
10 #include "gt/intel_reset.h"
11 #include "gt/selftest_engine_heartbeat.h"
12
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
19
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
22
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR 16
25 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
26
is_active(struct i915_request * rq)27 static bool is_active(struct i915_request *rq)
28 {
29 if (i915_request_is_active(rq))
30 return true;
31
32 if (i915_request_on_hold(rq))
33 return true;
34
35 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
36 return true;
37
38 return false;
39 }
40
wait_for_submit(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)41 static int wait_for_submit(struct intel_engine_cs *engine,
42 struct i915_request *rq,
43 unsigned long timeout)
44 {
45 /* Ignore our own attempts to suppress excess tasklets */
46 tasklet_hi_schedule(&engine->execlists.tasklet);
47
48 timeout += jiffies;
49 do {
50 bool done = time_after(jiffies, timeout);
51
52 if (i915_request_completed(rq)) /* that was quick! */
53 return 0;
54
55 /* Wait until the HW has acknowleged the submission (or err) */
56 intel_engine_flush_submission(engine);
57 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
58 return 0;
59
60 if (done)
61 return -ETIME;
62
63 cond_resched();
64 } while (1);
65 }
66
wait_for_reset(struct intel_engine_cs * engine,struct i915_request * rq,unsigned long timeout)67 static int wait_for_reset(struct intel_engine_cs *engine,
68 struct i915_request *rq,
69 unsigned long timeout)
70 {
71 timeout += jiffies;
72
73 do {
74 cond_resched();
75 intel_engine_flush_submission(engine);
76
77 if (READ_ONCE(engine->execlists.pending[0]))
78 continue;
79
80 if (i915_request_completed(rq))
81 break;
82
83 if (READ_ONCE(rq->fence.error))
84 break;
85 } while (time_before(jiffies, timeout));
86
87 flush_scheduled_work();
88
89 if (rq->fence.error != -EIO) {
90 pr_err("%s: hanging request %llx:%lld not reset\n",
91 engine->name,
92 rq->fence.context,
93 rq->fence.seqno);
94 return -EINVAL;
95 }
96
97 /* Give the request a jiffie to complete after flushing the worker */
98 if (i915_request_wait(rq, 0,
99 max(0l, (long)(timeout - jiffies)) + 1) < 0) {
100 pr_err("%s: hanging request %llx:%lld did not complete\n",
101 engine->name,
102 rq->fence.context,
103 rq->fence.seqno);
104 return -ETIME;
105 }
106
107 return 0;
108 }
109
live_sanitycheck(void * arg)110 static int live_sanitycheck(void *arg)
111 {
112 struct intel_gt *gt = arg;
113 struct intel_engine_cs *engine;
114 enum intel_engine_id id;
115 struct igt_spinner spin;
116 int err = 0;
117
118 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
119 return 0;
120
121 if (igt_spinner_init(&spin, gt))
122 return -ENOMEM;
123
124 for_each_engine(engine, gt, id) {
125 struct intel_context *ce;
126 struct i915_request *rq;
127
128 ce = intel_context_create(engine);
129 if (IS_ERR(ce)) {
130 err = PTR_ERR(ce);
131 break;
132 }
133
134 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
135 if (IS_ERR(rq)) {
136 err = PTR_ERR(rq);
137 goto out_ctx;
138 }
139
140 i915_request_add(rq);
141 if (!igt_wait_for_spinner(&spin, rq)) {
142 GEM_TRACE("spinner failed to start\n");
143 GEM_TRACE_DUMP();
144 intel_gt_set_wedged(gt);
145 err = -EIO;
146 goto out_ctx;
147 }
148
149 igt_spinner_end(&spin);
150 if (igt_flush_test(gt->i915)) {
151 err = -EIO;
152 goto out_ctx;
153 }
154
155 out_ctx:
156 intel_context_put(ce);
157 if (err)
158 break;
159 }
160
161 igt_spinner_fini(&spin);
162 return err;
163 }
164
live_unlite_restore(struct intel_gt * gt,int prio)165 static int live_unlite_restore(struct intel_gt *gt, int prio)
166 {
167 struct intel_engine_cs *engine;
168 enum intel_engine_id id;
169 struct igt_spinner spin;
170 int err = -ENOMEM;
171
172 /*
173 * Check that we can correctly context switch between 2 instances
174 * on the same engine from the same parent context.
175 */
176
177 if (igt_spinner_init(&spin, gt))
178 return err;
179
180 err = 0;
181 for_each_engine(engine, gt, id) {
182 struct intel_context *ce[2] = {};
183 struct i915_request *rq[2];
184 struct igt_live_test t;
185 int n;
186
187 if (prio && !intel_engine_has_preemption(engine))
188 continue;
189
190 if (!intel_engine_can_store_dword(engine))
191 continue;
192
193 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
194 err = -EIO;
195 break;
196 }
197 st_engine_heartbeat_disable(engine);
198
199 for (n = 0; n < ARRAY_SIZE(ce); n++) {
200 struct intel_context *tmp;
201
202 tmp = intel_context_create(engine);
203 if (IS_ERR(tmp)) {
204 err = PTR_ERR(tmp);
205 goto err_ce;
206 }
207
208 err = intel_context_pin(tmp);
209 if (err) {
210 intel_context_put(tmp);
211 goto err_ce;
212 }
213
214 /*
215 * Setup the pair of contexts such that if we
216 * lite-restore using the RING_TAIL from ce[1] it
217 * will execute garbage from ce[0]->ring.
218 */
219 memset(tmp->ring->vaddr,
220 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
221 tmp->ring->vma->size);
222
223 ce[n] = tmp;
224 }
225 GEM_BUG_ON(!ce[1]->ring->size);
226 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
227 lrc_update_regs(ce[1], engine, ce[1]->ring->head);
228
229 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
230 if (IS_ERR(rq[0])) {
231 err = PTR_ERR(rq[0]);
232 goto err_ce;
233 }
234
235 i915_request_get(rq[0]);
236 i915_request_add(rq[0]);
237 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
238
239 if (!igt_wait_for_spinner(&spin, rq[0])) {
240 i915_request_put(rq[0]);
241 goto err_ce;
242 }
243
244 rq[1] = i915_request_create(ce[1]);
245 if (IS_ERR(rq[1])) {
246 err = PTR_ERR(rq[1]);
247 i915_request_put(rq[0]);
248 goto err_ce;
249 }
250
251 if (!prio) {
252 /*
253 * Ensure we do the switch to ce[1] on completion.
254 *
255 * rq[0] is already submitted, so this should reduce
256 * to a no-op (a wait on a request on the same engine
257 * uses the submit fence, not the completion fence),
258 * but it will install a dependency on rq[1] for rq[0]
259 * that will prevent the pair being reordered by
260 * timeslicing.
261 */
262 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
263 }
264
265 i915_request_get(rq[1]);
266 i915_request_add(rq[1]);
267 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
268 i915_request_put(rq[0]);
269
270 if (prio) {
271 struct i915_sched_attr attr = {
272 .priority = prio,
273 };
274
275 /* Alternatively preempt the spinner with ce[1] */
276 engine->schedule(rq[1], &attr);
277 }
278
279 /* And switch back to ce[0] for good measure */
280 rq[0] = i915_request_create(ce[0]);
281 if (IS_ERR(rq[0])) {
282 err = PTR_ERR(rq[0]);
283 i915_request_put(rq[1]);
284 goto err_ce;
285 }
286
287 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
288 i915_request_get(rq[0]);
289 i915_request_add(rq[0]);
290 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
291 i915_request_put(rq[1]);
292 i915_request_put(rq[0]);
293
294 err_ce:
295 intel_engine_flush_submission(engine);
296 igt_spinner_end(&spin);
297 for (n = 0; n < ARRAY_SIZE(ce); n++) {
298 if (IS_ERR_OR_NULL(ce[n]))
299 break;
300
301 intel_context_unpin(ce[n]);
302 intel_context_put(ce[n]);
303 }
304
305 st_engine_heartbeat_enable(engine);
306 if (igt_live_test_end(&t))
307 err = -EIO;
308 if (err)
309 break;
310 }
311
312 igt_spinner_fini(&spin);
313 return err;
314 }
315
live_unlite_switch(void * arg)316 static int live_unlite_switch(void *arg)
317 {
318 return live_unlite_restore(arg, 0);
319 }
320
live_unlite_preempt(void * arg)321 static int live_unlite_preempt(void *arg)
322 {
323 return live_unlite_restore(arg, I915_PRIORITY_MAX);
324 }
325
live_unlite_ring(void * arg)326 static int live_unlite_ring(void *arg)
327 {
328 struct intel_gt *gt = arg;
329 struct intel_engine_cs *engine;
330 struct igt_spinner spin;
331 enum intel_engine_id id;
332 int err = 0;
333
334 /*
335 * Setup a preemption event that will cause almost the entire ring
336 * to be unwound, potentially fooling our intel_ring_direction()
337 * into emitting a forward lite-restore instead of the rollback.
338 */
339
340 if (igt_spinner_init(&spin, gt))
341 return -ENOMEM;
342
343 for_each_engine(engine, gt, id) {
344 struct intel_context *ce[2] = {};
345 struct i915_request *rq;
346 struct igt_live_test t;
347 int n;
348
349 if (!intel_engine_has_preemption(engine))
350 continue;
351
352 if (!intel_engine_can_store_dword(engine))
353 continue;
354
355 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
356 err = -EIO;
357 break;
358 }
359 st_engine_heartbeat_disable(engine);
360
361 for (n = 0; n < ARRAY_SIZE(ce); n++) {
362 struct intel_context *tmp;
363
364 tmp = intel_context_create(engine);
365 if (IS_ERR(tmp)) {
366 err = PTR_ERR(tmp);
367 goto err_ce;
368 }
369
370 err = intel_context_pin(tmp);
371 if (err) {
372 intel_context_put(tmp);
373 goto err_ce;
374 }
375
376 memset32(tmp->ring->vaddr,
377 0xdeadbeef, /* trigger a hang if executed */
378 tmp->ring->vma->size / sizeof(u32));
379
380 ce[n] = tmp;
381 }
382
383 /* Create max prio spinner, followed by N low prio nops */
384 rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
385 if (IS_ERR(rq)) {
386 err = PTR_ERR(rq);
387 goto err_ce;
388 }
389
390 i915_request_get(rq);
391 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
392 i915_request_add(rq);
393
394 if (!igt_wait_for_spinner(&spin, rq)) {
395 intel_gt_set_wedged(gt);
396 i915_request_put(rq);
397 err = -ETIME;
398 goto err_ce;
399 }
400
401 /* Fill the ring, until we will cause a wrap */
402 n = 0;
403 while (intel_ring_direction(ce[0]->ring,
404 rq->wa_tail,
405 ce[0]->ring->tail) <= 0) {
406 struct i915_request *tmp;
407
408 tmp = intel_context_create_request(ce[0]);
409 if (IS_ERR(tmp)) {
410 err = PTR_ERR(tmp);
411 i915_request_put(rq);
412 goto err_ce;
413 }
414
415 i915_request_add(tmp);
416 intel_engine_flush_submission(engine);
417 n++;
418 }
419 intel_engine_flush_submission(engine);
420 pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
421 engine->name, n,
422 ce[0]->ring->size,
423 ce[0]->ring->tail,
424 ce[0]->ring->emit,
425 rq->tail);
426 GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
427 rq->tail,
428 ce[0]->ring->tail) <= 0);
429 i915_request_put(rq);
430
431 /* Create a second ring to preempt the first ring after rq[0] */
432 rq = intel_context_create_request(ce[1]);
433 if (IS_ERR(rq)) {
434 err = PTR_ERR(rq);
435 goto err_ce;
436 }
437
438 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
439 i915_request_get(rq);
440 i915_request_add(rq);
441
442 err = wait_for_submit(engine, rq, HZ / 2);
443 i915_request_put(rq);
444 if (err) {
445 pr_err("%s: preemption request was not submitted\n",
446 engine->name);
447 err = -ETIME;
448 }
449
450 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
451 engine->name,
452 ce[0]->ring->tail, ce[0]->ring->emit,
453 ce[1]->ring->tail, ce[1]->ring->emit);
454
455 err_ce:
456 intel_engine_flush_submission(engine);
457 igt_spinner_end(&spin);
458 for (n = 0; n < ARRAY_SIZE(ce); n++) {
459 if (IS_ERR_OR_NULL(ce[n]))
460 break;
461
462 intel_context_unpin(ce[n]);
463 intel_context_put(ce[n]);
464 }
465 st_engine_heartbeat_enable(engine);
466 if (igt_live_test_end(&t))
467 err = -EIO;
468 if (err)
469 break;
470 }
471
472 igt_spinner_fini(&spin);
473 return err;
474 }
475
live_pin_rewind(void * arg)476 static int live_pin_rewind(void *arg)
477 {
478 struct intel_gt *gt = arg;
479 struct intel_engine_cs *engine;
480 enum intel_engine_id id;
481 int err = 0;
482
483 /*
484 * We have to be careful not to trust intel_ring too much, for example
485 * ring->head is updated upon retire which is out of sync with pinning
486 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
487 * or else we risk writing an older, stale value.
488 *
489 * To simulate this, let's apply a bit of deliberate sabotague.
490 */
491
492 for_each_engine(engine, gt, id) {
493 struct intel_context *ce;
494 struct i915_request *rq;
495 struct intel_ring *ring;
496 struct igt_live_test t;
497
498 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
499 err = -EIO;
500 break;
501 }
502
503 ce = intel_context_create(engine);
504 if (IS_ERR(ce)) {
505 err = PTR_ERR(ce);
506 break;
507 }
508
509 err = intel_context_pin(ce);
510 if (err) {
511 intel_context_put(ce);
512 break;
513 }
514
515 /* Keep the context awake while we play games */
516 err = i915_active_acquire(&ce->active);
517 if (err) {
518 intel_context_unpin(ce);
519 intel_context_put(ce);
520 break;
521 }
522 ring = ce->ring;
523
524 /* Poison the ring, and offset the next request from HEAD */
525 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
526 ring->emit = ring->size / 2;
527 ring->tail = ring->emit;
528 GEM_BUG_ON(ring->head);
529
530 intel_context_unpin(ce);
531
532 /* Submit a simple nop request */
533 GEM_BUG_ON(intel_context_is_pinned(ce));
534 rq = intel_context_create_request(ce);
535 i915_active_release(&ce->active); /* e.g. async retire */
536 intel_context_put(ce);
537 if (IS_ERR(rq)) {
538 err = PTR_ERR(rq);
539 break;
540 }
541 GEM_BUG_ON(!rq->head);
542 i915_request_add(rq);
543
544 /* Expect not to hang! */
545 if (igt_live_test_end(&t)) {
546 err = -EIO;
547 break;
548 }
549 }
550
551 return err;
552 }
553
live_hold_reset(void * arg)554 static int live_hold_reset(void *arg)
555 {
556 struct intel_gt *gt = arg;
557 struct intel_engine_cs *engine;
558 enum intel_engine_id id;
559 struct igt_spinner spin;
560 int err = 0;
561
562 /*
563 * In order to support offline error capture for fast preempt reset,
564 * we need to decouple the guilty request and ensure that it and its
565 * descendents are not executed while the capture is in progress.
566 */
567
568 if (!intel_has_reset_engine(gt))
569 return 0;
570
571 if (igt_spinner_init(&spin, gt))
572 return -ENOMEM;
573
574 for_each_engine(engine, gt, id) {
575 struct intel_context *ce;
576 struct i915_request *rq;
577
578 ce = intel_context_create(engine);
579 if (IS_ERR(ce)) {
580 err = PTR_ERR(ce);
581 break;
582 }
583
584 st_engine_heartbeat_disable(engine);
585
586 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
587 if (IS_ERR(rq)) {
588 err = PTR_ERR(rq);
589 goto out;
590 }
591 i915_request_add(rq);
592
593 if (!igt_wait_for_spinner(&spin, rq)) {
594 intel_gt_set_wedged(gt);
595 err = -ETIME;
596 goto out;
597 }
598
599 /* We have our request executing, now remove it and reset */
600
601 local_bh_disable();
602 if (test_and_set_bit(I915_RESET_ENGINE + id,
603 >->reset.flags)) {
604 local_bh_enable();
605 intel_gt_set_wedged(gt);
606 err = -EBUSY;
607 goto out;
608 }
609 tasklet_disable(&engine->execlists.tasklet);
610
611 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
612 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
613
614 i915_request_get(rq);
615 execlists_hold(engine, rq);
616 GEM_BUG_ON(!i915_request_on_hold(rq));
617
618 __intel_engine_reset_bh(engine, NULL);
619 GEM_BUG_ON(rq->fence.error != -EIO);
620
621 tasklet_enable(&engine->execlists.tasklet);
622 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
623 >->reset.flags);
624 local_bh_enable();
625
626 /* Check that we do not resubmit the held request */
627 if (!i915_request_wait(rq, 0, HZ / 5)) {
628 pr_err("%s: on hold request completed!\n",
629 engine->name);
630 i915_request_put(rq);
631 err = -EIO;
632 goto out;
633 }
634 GEM_BUG_ON(!i915_request_on_hold(rq));
635
636 /* But is resubmitted on release */
637 execlists_unhold(engine, rq);
638 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
639 pr_err("%s: held request did not complete!\n",
640 engine->name);
641 intel_gt_set_wedged(gt);
642 err = -ETIME;
643 }
644 i915_request_put(rq);
645
646 out:
647 st_engine_heartbeat_enable(engine);
648 intel_context_put(ce);
649 if (err)
650 break;
651 }
652
653 igt_spinner_fini(&spin);
654 return err;
655 }
656
error_repr(int err)657 static const char *error_repr(int err)
658 {
659 return err ? "bad" : "good";
660 }
661
live_error_interrupt(void * arg)662 static int live_error_interrupt(void *arg)
663 {
664 static const struct error_phase {
665 enum { GOOD = 0, BAD = -EIO } error[2];
666 } phases[] = {
667 { { BAD, GOOD } },
668 { { BAD, BAD } },
669 { { BAD, GOOD } },
670 { { GOOD, GOOD } }, /* sentinel */
671 };
672 struct intel_gt *gt = arg;
673 struct intel_engine_cs *engine;
674 enum intel_engine_id id;
675
676 /*
677 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
678 * of invalid commands in user batches that will cause a GPU hang.
679 * This is a faster mechanism than using hangcheck/heartbeats, but
680 * only detects problems the HW knows about -- it will not warn when
681 * we kill the HW!
682 *
683 * To verify our detection and reset, we throw some invalid commands
684 * at the HW and wait for the interrupt.
685 */
686
687 if (!intel_has_reset_engine(gt))
688 return 0;
689
690 for_each_engine(engine, gt, id) {
691 const struct error_phase *p;
692 int err = 0;
693
694 st_engine_heartbeat_disable(engine);
695
696 for (p = phases; p->error[0] != GOOD; p++) {
697 struct i915_request *client[ARRAY_SIZE(phases->error)];
698 u32 *cs;
699 int i;
700
701 memset(client, 0, sizeof(*client));
702 for (i = 0; i < ARRAY_SIZE(client); i++) {
703 struct intel_context *ce;
704 struct i915_request *rq;
705
706 ce = intel_context_create(engine);
707 if (IS_ERR(ce)) {
708 err = PTR_ERR(ce);
709 goto out;
710 }
711
712 rq = intel_context_create_request(ce);
713 intel_context_put(ce);
714 if (IS_ERR(rq)) {
715 err = PTR_ERR(rq);
716 goto out;
717 }
718
719 if (rq->engine->emit_init_breadcrumb) {
720 err = rq->engine->emit_init_breadcrumb(rq);
721 if (err) {
722 i915_request_add(rq);
723 goto out;
724 }
725 }
726
727 cs = intel_ring_begin(rq, 2);
728 if (IS_ERR(cs)) {
729 i915_request_add(rq);
730 err = PTR_ERR(cs);
731 goto out;
732 }
733
734 if (p->error[i]) {
735 *cs++ = 0xdeadbeef;
736 *cs++ = 0xdeadbeef;
737 } else {
738 *cs++ = MI_NOOP;
739 *cs++ = MI_NOOP;
740 }
741
742 client[i] = i915_request_get(rq);
743 i915_request_add(rq);
744 }
745
746 err = wait_for_submit(engine, client[0], HZ / 2);
747 if (err) {
748 pr_err("%s: first request did not start within time!\n",
749 engine->name);
750 err = -ETIME;
751 goto out;
752 }
753
754 for (i = 0; i < ARRAY_SIZE(client); i++) {
755 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
756 pr_debug("%s: %s request incomplete!\n",
757 engine->name,
758 error_repr(p->error[i]));
759
760 if (!i915_request_started(client[i])) {
761 pr_err("%s: %s request not started!\n",
762 engine->name,
763 error_repr(p->error[i]));
764 err = -ETIME;
765 goto out;
766 }
767
768 /* Kick the tasklet to process the error */
769 intel_engine_flush_submission(engine);
770 if (client[i]->fence.error != p->error[i]) {
771 pr_err("%s: %s request (%s) with wrong error code: %d\n",
772 engine->name,
773 error_repr(p->error[i]),
774 i915_request_completed(client[i]) ? "completed" : "running",
775 client[i]->fence.error);
776 err = -EINVAL;
777 goto out;
778 }
779 }
780
781 out:
782 for (i = 0; i < ARRAY_SIZE(client); i++)
783 if (client[i])
784 i915_request_put(client[i]);
785 if (err) {
786 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
787 engine->name, p - phases,
788 p->error[0], p->error[1]);
789 break;
790 }
791 }
792
793 st_engine_heartbeat_enable(engine);
794 if (err) {
795 intel_gt_set_wedged(gt);
796 return err;
797 }
798 }
799
800 return 0;
801 }
802
803 static int
emit_semaphore_chain(struct i915_request * rq,struct i915_vma * vma,int idx)804 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
805 {
806 u32 *cs;
807
808 cs = intel_ring_begin(rq, 10);
809 if (IS_ERR(cs))
810 return PTR_ERR(cs);
811
812 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
813
814 *cs++ = MI_SEMAPHORE_WAIT |
815 MI_SEMAPHORE_GLOBAL_GTT |
816 MI_SEMAPHORE_POLL |
817 MI_SEMAPHORE_SAD_NEQ_SDD;
818 *cs++ = 0;
819 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
820 *cs++ = 0;
821
822 if (idx > 0) {
823 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
824 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
825 *cs++ = 0;
826 *cs++ = 1;
827 } else {
828 *cs++ = MI_NOOP;
829 *cs++ = MI_NOOP;
830 *cs++ = MI_NOOP;
831 *cs++ = MI_NOOP;
832 }
833
834 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
835
836 intel_ring_advance(rq, cs);
837 return 0;
838 }
839
840 static struct i915_request *
semaphore_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx)841 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
842 {
843 struct intel_context *ce;
844 struct i915_request *rq;
845 int err;
846
847 ce = intel_context_create(engine);
848 if (IS_ERR(ce))
849 return ERR_CAST(ce);
850
851 rq = intel_context_create_request(ce);
852 if (IS_ERR(rq))
853 goto out_ce;
854
855 err = 0;
856 if (rq->engine->emit_init_breadcrumb)
857 err = rq->engine->emit_init_breadcrumb(rq);
858 if (err == 0)
859 err = emit_semaphore_chain(rq, vma, idx);
860 if (err == 0)
861 i915_request_get(rq);
862 i915_request_add(rq);
863 if (err)
864 rq = ERR_PTR(err);
865
866 out_ce:
867 intel_context_put(ce);
868 return rq;
869 }
870
871 static int
release_queue(struct intel_engine_cs * engine,struct i915_vma * vma,int idx,int prio)872 release_queue(struct intel_engine_cs *engine,
873 struct i915_vma *vma,
874 int idx, int prio)
875 {
876 struct i915_sched_attr attr = {
877 .priority = prio,
878 };
879 struct i915_request *rq;
880 u32 *cs;
881
882 rq = intel_engine_create_kernel_request(engine);
883 if (IS_ERR(rq))
884 return PTR_ERR(rq);
885
886 cs = intel_ring_begin(rq, 4);
887 if (IS_ERR(cs)) {
888 i915_request_add(rq);
889 return PTR_ERR(cs);
890 }
891
892 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
893 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
894 *cs++ = 0;
895 *cs++ = 1;
896
897 intel_ring_advance(rq, cs);
898
899 i915_request_get(rq);
900 i915_request_add(rq);
901
902 local_bh_disable();
903 engine->schedule(rq, &attr);
904 local_bh_enable(); /* kick tasklet */
905
906 i915_request_put(rq);
907
908 return 0;
909 }
910
911 static int
slice_semaphore_queue(struct intel_engine_cs * outer,struct i915_vma * vma,int count)912 slice_semaphore_queue(struct intel_engine_cs *outer,
913 struct i915_vma *vma,
914 int count)
915 {
916 struct intel_engine_cs *engine;
917 struct i915_request *head;
918 enum intel_engine_id id;
919 int err, i, n = 0;
920
921 head = semaphore_queue(outer, vma, n++);
922 if (IS_ERR(head))
923 return PTR_ERR(head);
924
925 for_each_engine(engine, outer->gt, id) {
926 if (!intel_engine_has_preemption(engine))
927 continue;
928
929 for (i = 0; i < count; i++) {
930 struct i915_request *rq;
931
932 rq = semaphore_queue(engine, vma, n++);
933 if (IS_ERR(rq)) {
934 err = PTR_ERR(rq);
935 goto out;
936 }
937
938 i915_request_put(rq);
939 }
940 }
941
942 err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
943 if (err)
944 goto out;
945
946 if (i915_request_wait(head, 0,
947 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
948 pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
949 outer->name, count, n);
950 GEM_TRACE_DUMP();
951 intel_gt_set_wedged(outer->gt);
952 err = -EIO;
953 }
954
955 out:
956 i915_request_put(head);
957 return err;
958 }
959
live_timeslice_preempt(void * arg)960 static int live_timeslice_preempt(void *arg)
961 {
962 struct intel_gt *gt = arg;
963 struct drm_i915_gem_object *obj;
964 struct intel_engine_cs *engine;
965 enum intel_engine_id id;
966 struct i915_vma *vma;
967 void *vaddr;
968 int err = 0;
969
970 /*
971 * If a request takes too long, we would like to give other users
972 * a fair go on the GPU. In particular, users may create batches
973 * that wait upon external input, where that input may even be
974 * supplied by another GPU job. To avoid blocking forever, we
975 * need to preempt the current task and replace it with another
976 * ready task.
977 */
978 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
979 return 0;
980
981 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
982 if (IS_ERR(obj))
983 return PTR_ERR(obj);
984
985 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
986 if (IS_ERR(vma)) {
987 err = PTR_ERR(vma);
988 goto err_obj;
989 }
990
991 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
992 if (IS_ERR(vaddr)) {
993 err = PTR_ERR(vaddr);
994 goto err_obj;
995 }
996
997 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
998 if (err)
999 goto err_map;
1000
1001 err = i915_vma_sync(vma);
1002 if (err)
1003 goto err_pin;
1004
1005 for_each_engine(engine, gt, id) {
1006 if (!intel_engine_has_preemption(engine))
1007 continue;
1008
1009 memset(vaddr, 0, PAGE_SIZE);
1010
1011 st_engine_heartbeat_disable(engine);
1012 err = slice_semaphore_queue(engine, vma, 5);
1013 st_engine_heartbeat_enable(engine);
1014 if (err)
1015 goto err_pin;
1016
1017 if (igt_flush_test(gt->i915)) {
1018 err = -EIO;
1019 goto err_pin;
1020 }
1021 }
1022
1023 err_pin:
1024 i915_vma_unpin(vma);
1025 err_map:
1026 i915_gem_object_unpin_map(obj);
1027 err_obj:
1028 i915_gem_object_put(obj);
1029 return err;
1030 }
1031
1032 static struct i915_request *
create_rewinder(struct intel_context * ce,struct i915_request * wait,void * slot,int idx)1033 create_rewinder(struct intel_context *ce,
1034 struct i915_request *wait,
1035 void *slot, int idx)
1036 {
1037 const u32 offset =
1038 i915_ggtt_offset(ce->engine->status_page.vma) +
1039 offset_in_page(slot);
1040 struct i915_request *rq;
1041 u32 *cs;
1042 int err;
1043
1044 rq = intel_context_create_request(ce);
1045 if (IS_ERR(rq))
1046 return rq;
1047
1048 if (wait) {
1049 err = i915_request_await_dma_fence(rq, &wait->fence);
1050 if (err)
1051 goto err;
1052 }
1053
1054 cs = intel_ring_begin(rq, 14);
1055 if (IS_ERR(cs)) {
1056 err = PTR_ERR(cs);
1057 goto err;
1058 }
1059
1060 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1061 *cs++ = MI_NOOP;
1062
1063 *cs++ = MI_SEMAPHORE_WAIT |
1064 MI_SEMAPHORE_GLOBAL_GTT |
1065 MI_SEMAPHORE_POLL |
1066 MI_SEMAPHORE_SAD_GTE_SDD;
1067 *cs++ = idx;
1068 *cs++ = offset;
1069 *cs++ = 0;
1070
1071 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1072 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
1073 *cs++ = offset + idx * sizeof(u32);
1074 *cs++ = 0;
1075
1076 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1077 *cs++ = offset;
1078 *cs++ = 0;
1079 *cs++ = idx + 1;
1080
1081 intel_ring_advance(rq, cs);
1082
1083 err = 0;
1084 err:
1085 i915_request_get(rq);
1086 i915_request_add(rq);
1087 if (err) {
1088 i915_request_put(rq);
1089 return ERR_PTR(err);
1090 }
1091
1092 return rq;
1093 }
1094
live_timeslice_rewind(void * arg)1095 static int live_timeslice_rewind(void *arg)
1096 {
1097 struct intel_gt *gt = arg;
1098 struct intel_engine_cs *engine;
1099 enum intel_engine_id id;
1100
1101 /*
1102 * The usual presumption on timeslice expiration is that we replace
1103 * the active context with another. However, given a chain of
1104 * dependencies we may end up with replacing the context with itself,
1105 * but only a few of those requests, forcing us to rewind the
1106 * RING_TAIL of the original request.
1107 */
1108 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1109 return 0;
1110
1111 for_each_engine(engine, gt, id) {
1112 enum { A1, A2, B1 };
1113 enum { X = 1, Z, Y };
1114 struct i915_request *rq[3] = {};
1115 struct intel_context *ce;
1116 unsigned long timeslice;
1117 int i, err = 0;
1118 u32 *slot;
1119
1120 if (!intel_engine_has_timeslices(engine))
1121 continue;
1122
1123 /*
1124 * A:rq1 -- semaphore wait, timestamp X
1125 * A:rq2 -- write timestamp Y
1126 *
1127 * B:rq1 [await A:rq1] -- write timestamp Z
1128 *
1129 * Force timeslice, release semaphore.
1130 *
1131 * Expect execution/evaluation order XZY
1132 */
1133
1134 st_engine_heartbeat_disable(engine);
1135 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1136
1137 slot = memset32(engine->status_page.addr + 1000, 0, 4);
1138
1139 ce = intel_context_create(engine);
1140 if (IS_ERR(ce)) {
1141 err = PTR_ERR(ce);
1142 goto err;
1143 }
1144
1145 rq[A1] = create_rewinder(ce, NULL, slot, X);
1146 if (IS_ERR(rq[A1])) {
1147 intel_context_put(ce);
1148 goto err;
1149 }
1150
1151 rq[A2] = create_rewinder(ce, NULL, slot, Y);
1152 intel_context_put(ce);
1153 if (IS_ERR(rq[A2]))
1154 goto err;
1155
1156 err = wait_for_submit(engine, rq[A2], HZ / 2);
1157 if (err) {
1158 pr_err("%s: failed to submit first context\n",
1159 engine->name);
1160 goto err;
1161 }
1162
1163 ce = intel_context_create(engine);
1164 if (IS_ERR(ce)) {
1165 err = PTR_ERR(ce);
1166 goto err;
1167 }
1168
1169 rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
1170 intel_context_put(ce);
1171 if (IS_ERR(rq[2]))
1172 goto err;
1173
1174 err = wait_for_submit(engine, rq[B1], HZ / 2);
1175 if (err) {
1176 pr_err("%s: failed to submit second context\n",
1177 engine->name);
1178 goto err;
1179 }
1180
1181 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1182 ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
1183 while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
1184 /* Wait for the timeslice to kick in */
1185 del_timer(&engine->execlists.timer);
1186 tasklet_hi_schedule(&engine->execlists.tasklet);
1187 intel_engine_flush_submission(engine);
1188 }
1189 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1190 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1191 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1192 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1193
1194 /* Release the hounds! */
1195 slot[0] = 1;
1196 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1197
1198 for (i = 1; i <= 3; i++) {
1199 unsigned long timeout = jiffies + HZ / 2;
1200
1201 while (!READ_ONCE(slot[i]) &&
1202 time_before(jiffies, timeout))
1203 ;
1204
1205 if (!time_before(jiffies, timeout)) {
1206 pr_err("%s: rq[%d] timed out\n",
1207 engine->name, i - 1);
1208 err = -ETIME;
1209 goto err;
1210 }
1211
1212 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1213 }
1214
1215 /* XZY: XZ < XY */
1216 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1217 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1218 engine->name,
1219 slot[Z] - slot[X],
1220 slot[Y] - slot[X]);
1221 err = -EINVAL;
1222 }
1223
1224 err:
1225 memset32(&slot[0], -1, 4);
1226 wmb();
1227
1228 engine->props.timeslice_duration_ms = timeslice;
1229 st_engine_heartbeat_enable(engine);
1230 for (i = 0; i < 3; i++)
1231 i915_request_put(rq[i]);
1232 if (igt_flush_test(gt->i915))
1233 err = -EIO;
1234 if (err)
1235 return err;
1236 }
1237
1238 return 0;
1239 }
1240
nop_request(struct intel_engine_cs * engine)1241 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1242 {
1243 struct i915_request *rq;
1244
1245 rq = intel_engine_create_kernel_request(engine);
1246 if (IS_ERR(rq))
1247 return rq;
1248
1249 i915_request_get(rq);
1250 i915_request_add(rq);
1251
1252 return rq;
1253 }
1254
slice_timeout(struct intel_engine_cs * engine)1255 static long slice_timeout(struct intel_engine_cs *engine)
1256 {
1257 long timeout;
1258
1259 /* Enough time for a timeslice to kick in, and kick out */
1260 timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
1261
1262 /* Enough time for the nop request to complete */
1263 timeout += HZ / 5;
1264
1265 return timeout + 1;
1266 }
1267
live_timeslice_queue(void * arg)1268 static int live_timeslice_queue(void *arg)
1269 {
1270 struct intel_gt *gt = arg;
1271 struct drm_i915_gem_object *obj;
1272 struct intel_engine_cs *engine;
1273 enum intel_engine_id id;
1274 struct i915_vma *vma;
1275 void *vaddr;
1276 int err = 0;
1277
1278 /*
1279 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1280 * timeslicing between them disabled, we *do* enable timeslicing
1281 * if the queue demands it. (Normally, we do not submit if
1282 * ELSP[1] is already occupied, so must rely on timeslicing to
1283 * eject ELSP[0] in favour of the queue.)
1284 */
1285 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1286 return 0;
1287
1288 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1289 if (IS_ERR(obj))
1290 return PTR_ERR(obj);
1291
1292 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1293 if (IS_ERR(vma)) {
1294 err = PTR_ERR(vma);
1295 goto err_obj;
1296 }
1297
1298 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1299 if (IS_ERR(vaddr)) {
1300 err = PTR_ERR(vaddr);
1301 goto err_obj;
1302 }
1303
1304 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1305 if (err)
1306 goto err_map;
1307
1308 err = i915_vma_sync(vma);
1309 if (err)
1310 goto err_pin;
1311
1312 for_each_engine(engine, gt, id) {
1313 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
1314 struct i915_request *rq, *nop;
1315
1316 if (!intel_engine_has_preemption(engine))
1317 continue;
1318
1319 st_engine_heartbeat_disable(engine);
1320 memset(vaddr, 0, PAGE_SIZE);
1321
1322 /* ELSP[0]: semaphore wait */
1323 rq = semaphore_queue(engine, vma, 0);
1324 if (IS_ERR(rq)) {
1325 err = PTR_ERR(rq);
1326 goto err_heartbeat;
1327 }
1328 engine->schedule(rq, &attr);
1329 err = wait_for_submit(engine, rq, HZ / 2);
1330 if (err) {
1331 pr_err("%s: Timed out trying to submit semaphores\n",
1332 engine->name);
1333 goto err_rq;
1334 }
1335
1336 /* ELSP[1]: nop request */
1337 nop = nop_request(engine);
1338 if (IS_ERR(nop)) {
1339 err = PTR_ERR(nop);
1340 goto err_rq;
1341 }
1342 err = wait_for_submit(engine, nop, HZ / 2);
1343 i915_request_put(nop);
1344 if (err) {
1345 pr_err("%s: Timed out trying to submit nop\n",
1346 engine->name);
1347 goto err_rq;
1348 }
1349
1350 GEM_BUG_ON(i915_request_completed(rq));
1351 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1352
1353 /* Queue: semaphore signal, matching priority as semaphore */
1354 err = release_queue(engine, vma, 1, effective_prio(rq));
1355 if (err)
1356 goto err_rq;
1357
1358 /* Wait until we ack the release_queue and start timeslicing */
1359 do {
1360 cond_resched();
1361 intel_engine_flush_submission(engine);
1362 } while (READ_ONCE(engine->execlists.pending[0]));
1363
1364 /* Timeslice every jiffy, so within 2 we should signal */
1365 if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
1366 struct drm_printer p =
1367 drm_info_printer(gt->i915->drm.dev);
1368
1369 pr_err("%s: Failed to timeslice into queue\n",
1370 engine->name);
1371 intel_engine_dump(engine, &p,
1372 "%s\n", engine->name);
1373
1374 memset(vaddr, 0xff, PAGE_SIZE);
1375 err = -EIO;
1376 }
1377 err_rq:
1378 i915_request_put(rq);
1379 err_heartbeat:
1380 st_engine_heartbeat_enable(engine);
1381 if (err)
1382 break;
1383 }
1384
1385 err_pin:
1386 i915_vma_unpin(vma);
1387 err_map:
1388 i915_gem_object_unpin_map(obj);
1389 err_obj:
1390 i915_gem_object_put(obj);
1391 return err;
1392 }
1393
live_timeslice_nopreempt(void * arg)1394 static int live_timeslice_nopreempt(void *arg)
1395 {
1396 struct intel_gt *gt = arg;
1397 struct intel_engine_cs *engine;
1398 enum intel_engine_id id;
1399 struct igt_spinner spin;
1400 int err = 0;
1401
1402 /*
1403 * We should not timeslice into a request that is marked with
1404 * I915_REQUEST_NOPREEMPT.
1405 */
1406 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1407 return 0;
1408
1409 if (igt_spinner_init(&spin, gt))
1410 return -ENOMEM;
1411
1412 for_each_engine(engine, gt, id) {
1413 struct intel_context *ce;
1414 struct i915_request *rq;
1415 unsigned long timeslice;
1416
1417 if (!intel_engine_has_preemption(engine))
1418 continue;
1419
1420 ce = intel_context_create(engine);
1421 if (IS_ERR(ce)) {
1422 err = PTR_ERR(ce);
1423 break;
1424 }
1425
1426 st_engine_heartbeat_disable(engine);
1427 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
1428
1429 /* Create an unpreemptible spinner */
1430
1431 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
1432 intel_context_put(ce);
1433 if (IS_ERR(rq)) {
1434 err = PTR_ERR(rq);
1435 goto out_heartbeat;
1436 }
1437
1438 i915_request_get(rq);
1439 i915_request_add(rq);
1440
1441 if (!igt_wait_for_spinner(&spin, rq)) {
1442 i915_request_put(rq);
1443 err = -ETIME;
1444 goto out_spin;
1445 }
1446
1447 set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
1448 i915_request_put(rq);
1449
1450 /* Followed by a maximum priority barrier (heartbeat) */
1451
1452 ce = intel_context_create(engine);
1453 if (IS_ERR(ce)) {
1454 err = PTR_ERR(ce);
1455 goto out_spin;
1456 }
1457
1458 rq = intel_context_create_request(ce);
1459 intel_context_put(ce);
1460 if (IS_ERR(rq)) {
1461 err = PTR_ERR(rq);
1462 goto out_spin;
1463 }
1464
1465 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1466 i915_request_get(rq);
1467 i915_request_add(rq);
1468
1469 /*
1470 * Wait until the barrier is in ELSP, and we know timeslicing
1471 * will have been activated.
1472 */
1473 if (wait_for_submit(engine, rq, HZ / 2)) {
1474 i915_request_put(rq);
1475 err = -ETIME;
1476 goto out_spin;
1477 }
1478
1479 /*
1480 * Since the ELSP[0] request is unpreemptible, it should not
1481 * allow the maximum priority barrier through. Wait long
1482 * enough to see if it is timesliced in by mistake.
1483 */
1484 if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
1485 pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
1486 engine->name);
1487 err = -EINVAL;
1488 }
1489 i915_request_put(rq);
1490
1491 out_spin:
1492 igt_spinner_end(&spin);
1493 out_heartbeat:
1494 xchg(&engine->props.timeslice_duration_ms, timeslice);
1495 st_engine_heartbeat_enable(engine);
1496 if (err)
1497 break;
1498
1499 if (igt_flush_test(gt->i915)) {
1500 err = -EIO;
1501 break;
1502 }
1503 }
1504
1505 igt_spinner_fini(&spin);
1506 return err;
1507 }
1508
live_busywait_preempt(void * arg)1509 static int live_busywait_preempt(void *arg)
1510 {
1511 struct intel_gt *gt = arg;
1512 struct i915_gem_context *ctx_hi, *ctx_lo;
1513 struct intel_engine_cs *engine;
1514 struct drm_i915_gem_object *obj;
1515 struct i915_vma *vma;
1516 enum intel_engine_id id;
1517 int err = -ENOMEM;
1518 u32 *map;
1519
1520 /*
1521 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1522 * preempt the busywaits used to synchronise between rings.
1523 */
1524
1525 ctx_hi = kernel_context(gt->i915);
1526 if (!ctx_hi)
1527 return -ENOMEM;
1528 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1529
1530 ctx_lo = kernel_context(gt->i915);
1531 if (!ctx_lo)
1532 goto err_ctx_hi;
1533 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1534
1535 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1536 if (IS_ERR(obj)) {
1537 err = PTR_ERR(obj);
1538 goto err_ctx_lo;
1539 }
1540
1541 map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1542 if (IS_ERR(map)) {
1543 err = PTR_ERR(map);
1544 goto err_obj;
1545 }
1546
1547 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1548 if (IS_ERR(vma)) {
1549 err = PTR_ERR(vma);
1550 goto err_map;
1551 }
1552
1553 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1554 if (err)
1555 goto err_map;
1556
1557 err = i915_vma_sync(vma);
1558 if (err)
1559 goto err_vma;
1560
1561 for_each_engine(engine, gt, id) {
1562 struct i915_request *lo, *hi;
1563 struct igt_live_test t;
1564 u32 *cs;
1565
1566 if (!intel_engine_has_preemption(engine))
1567 continue;
1568
1569 if (!intel_engine_can_store_dword(engine))
1570 continue;
1571
1572 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1573 err = -EIO;
1574 goto err_vma;
1575 }
1576
1577 /*
1578 * We create two requests. The low priority request
1579 * busywaits on a semaphore (inside the ringbuffer where
1580 * is should be preemptible) and the high priority requests
1581 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1582 * allowing the first request to complete. If preemption
1583 * fails, we hang instead.
1584 */
1585
1586 lo = igt_request_alloc(ctx_lo, engine);
1587 if (IS_ERR(lo)) {
1588 err = PTR_ERR(lo);
1589 goto err_vma;
1590 }
1591
1592 cs = intel_ring_begin(lo, 8);
1593 if (IS_ERR(cs)) {
1594 err = PTR_ERR(cs);
1595 i915_request_add(lo);
1596 goto err_vma;
1597 }
1598
1599 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1600 *cs++ = i915_ggtt_offset(vma);
1601 *cs++ = 0;
1602 *cs++ = 1;
1603
1604 /* XXX Do we need a flush + invalidate here? */
1605
1606 *cs++ = MI_SEMAPHORE_WAIT |
1607 MI_SEMAPHORE_GLOBAL_GTT |
1608 MI_SEMAPHORE_POLL |
1609 MI_SEMAPHORE_SAD_EQ_SDD;
1610 *cs++ = 0;
1611 *cs++ = i915_ggtt_offset(vma);
1612 *cs++ = 0;
1613
1614 intel_ring_advance(lo, cs);
1615
1616 i915_request_get(lo);
1617 i915_request_add(lo);
1618
1619 if (wait_for(READ_ONCE(*map), 10)) {
1620 i915_request_put(lo);
1621 err = -ETIMEDOUT;
1622 goto err_vma;
1623 }
1624
1625 /* Low priority request should be busywaiting now */
1626 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1627 i915_request_put(lo);
1628 pr_err("%s: Busywaiting request did not!\n",
1629 engine->name);
1630 err = -EIO;
1631 goto err_vma;
1632 }
1633
1634 hi = igt_request_alloc(ctx_hi, engine);
1635 if (IS_ERR(hi)) {
1636 err = PTR_ERR(hi);
1637 i915_request_put(lo);
1638 goto err_vma;
1639 }
1640
1641 cs = intel_ring_begin(hi, 4);
1642 if (IS_ERR(cs)) {
1643 err = PTR_ERR(cs);
1644 i915_request_add(hi);
1645 i915_request_put(lo);
1646 goto err_vma;
1647 }
1648
1649 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1650 *cs++ = i915_ggtt_offset(vma);
1651 *cs++ = 0;
1652 *cs++ = 0;
1653
1654 intel_ring_advance(hi, cs);
1655 i915_request_add(hi);
1656
1657 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1658 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1659
1660 pr_err("%s: Failed to preempt semaphore busywait!\n",
1661 engine->name);
1662
1663 intel_engine_dump(engine, &p, "%s\n", engine->name);
1664 GEM_TRACE_DUMP();
1665
1666 i915_request_put(lo);
1667 intel_gt_set_wedged(gt);
1668 err = -EIO;
1669 goto err_vma;
1670 }
1671 GEM_BUG_ON(READ_ONCE(*map));
1672 i915_request_put(lo);
1673
1674 if (igt_live_test_end(&t)) {
1675 err = -EIO;
1676 goto err_vma;
1677 }
1678 }
1679
1680 err = 0;
1681 err_vma:
1682 i915_vma_unpin(vma);
1683 err_map:
1684 i915_gem_object_unpin_map(obj);
1685 err_obj:
1686 i915_gem_object_put(obj);
1687 err_ctx_lo:
1688 kernel_context_close(ctx_lo);
1689 err_ctx_hi:
1690 kernel_context_close(ctx_hi);
1691 return err;
1692 }
1693
1694 static struct i915_request *
spinner_create_request(struct igt_spinner * spin,struct i915_gem_context * ctx,struct intel_engine_cs * engine,u32 arb)1695 spinner_create_request(struct igt_spinner *spin,
1696 struct i915_gem_context *ctx,
1697 struct intel_engine_cs *engine,
1698 u32 arb)
1699 {
1700 struct intel_context *ce;
1701 struct i915_request *rq;
1702
1703 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1704 if (IS_ERR(ce))
1705 return ERR_CAST(ce);
1706
1707 rq = igt_spinner_create_request(spin, ce, arb);
1708 intel_context_put(ce);
1709 return rq;
1710 }
1711
live_preempt(void * arg)1712 static int live_preempt(void *arg)
1713 {
1714 struct intel_gt *gt = arg;
1715 struct i915_gem_context *ctx_hi, *ctx_lo;
1716 struct igt_spinner spin_hi, spin_lo;
1717 struct intel_engine_cs *engine;
1718 enum intel_engine_id id;
1719 int err = -ENOMEM;
1720
1721 if (igt_spinner_init(&spin_hi, gt))
1722 return -ENOMEM;
1723
1724 if (igt_spinner_init(&spin_lo, gt))
1725 goto err_spin_hi;
1726
1727 ctx_hi = kernel_context(gt->i915);
1728 if (!ctx_hi)
1729 goto err_spin_lo;
1730 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
1731
1732 ctx_lo = kernel_context(gt->i915);
1733 if (!ctx_lo)
1734 goto err_ctx_hi;
1735 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
1736
1737 for_each_engine(engine, gt, id) {
1738 struct igt_live_test t;
1739 struct i915_request *rq;
1740
1741 if (!intel_engine_has_preemption(engine))
1742 continue;
1743
1744 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1745 err = -EIO;
1746 goto err_ctx_lo;
1747 }
1748
1749 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1750 MI_ARB_CHECK);
1751 if (IS_ERR(rq)) {
1752 err = PTR_ERR(rq);
1753 goto err_ctx_lo;
1754 }
1755
1756 i915_request_add(rq);
1757 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1758 GEM_TRACE("lo spinner failed to start\n");
1759 GEM_TRACE_DUMP();
1760 intel_gt_set_wedged(gt);
1761 err = -EIO;
1762 goto err_ctx_lo;
1763 }
1764
1765 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1766 MI_ARB_CHECK);
1767 if (IS_ERR(rq)) {
1768 igt_spinner_end(&spin_lo);
1769 err = PTR_ERR(rq);
1770 goto err_ctx_lo;
1771 }
1772
1773 i915_request_add(rq);
1774 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1775 GEM_TRACE("hi spinner failed to start\n");
1776 GEM_TRACE_DUMP();
1777 intel_gt_set_wedged(gt);
1778 err = -EIO;
1779 goto err_ctx_lo;
1780 }
1781
1782 igt_spinner_end(&spin_hi);
1783 igt_spinner_end(&spin_lo);
1784
1785 if (igt_live_test_end(&t)) {
1786 err = -EIO;
1787 goto err_ctx_lo;
1788 }
1789 }
1790
1791 err = 0;
1792 err_ctx_lo:
1793 kernel_context_close(ctx_lo);
1794 err_ctx_hi:
1795 kernel_context_close(ctx_hi);
1796 err_spin_lo:
1797 igt_spinner_fini(&spin_lo);
1798 err_spin_hi:
1799 igt_spinner_fini(&spin_hi);
1800 return err;
1801 }
1802
live_late_preempt(void * arg)1803 static int live_late_preempt(void *arg)
1804 {
1805 struct intel_gt *gt = arg;
1806 struct i915_gem_context *ctx_hi, *ctx_lo;
1807 struct igt_spinner spin_hi, spin_lo;
1808 struct intel_engine_cs *engine;
1809 struct i915_sched_attr attr = {};
1810 enum intel_engine_id id;
1811 int err = -ENOMEM;
1812
1813 if (igt_spinner_init(&spin_hi, gt))
1814 return -ENOMEM;
1815
1816 if (igt_spinner_init(&spin_lo, gt))
1817 goto err_spin_hi;
1818
1819 ctx_hi = kernel_context(gt->i915);
1820 if (!ctx_hi)
1821 goto err_spin_lo;
1822
1823 ctx_lo = kernel_context(gt->i915);
1824 if (!ctx_lo)
1825 goto err_ctx_hi;
1826
1827 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1828 ctx_lo->sched.priority = 1;
1829
1830 for_each_engine(engine, gt, id) {
1831 struct igt_live_test t;
1832 struct i915_request *rq;
1833
1834 if (!intel_engine_has_preemption(engine))
1835 continue;
1836
1837 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1838 err = -EIO;
1839 goto err_ctx_lo;
1840 }
1841
1842 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1843 MI_ARB_CHECK);
1844 if (IS_ERR(rq)) {
1845 err = PTR_ERR(rq);
1846 goto err_ctx_lo;
1847 }
1848
1849 i915_request_add(rq);
1850 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1851 pr_err("First context failed to start\n");
1852 goto err_wedged;
1853 }
1854
1855 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1856 MI_NOOP);
1857 if (IS_ERR(rq)) {
1858 igt_spinner_end(&spin_lo);
1859 err = PTR_ERR(rq);
1860 goto err_ctx_lo;
1861 }
1862
1863 i915_request_add(rq);
1864 if (igt_wait_for_spinner(&spin_hi, rq)) {
1865 pr_err("Second context overtook first?\n");
1866 goto err_wedged;
1867 }
1868
1869 attr.priority = I915_PRIORITY_MAX;
1870 engine->schedule(rq, &attr);
1871
1872 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1873 pr_err("High priority context failed to preempt the low priority context\n");
1874 GEM_TRACE_DUMP();
1875 goto err_wedged;
1876 }
1877
1878 igt_spinner_end(&spin_hi);
1879 igt_spinner_end(&spin_lo);
1880
1881 if (igt_live_test_end(&t)) {
1882 err = -EIO;
1883 goto err_ctx_lo;
1884 }
1885 }
1886
1887 err = 0;
1888 err_ctx_lo:
1889 kernel_context_close(ctx_lo);
1890 err_ctx_hi:
1891 kernel_context_close(ctx_hi);
1892 err_spin_lo:
1893 igt_spinner_fini(&spin_lo);
1894 err_spin_hi:
1895 igt_spinner_fini(&spin_hi);
1896 return err;
1897
1898 err_wedged:
1899 igt_spinner_end(&spin_hi);
1900 igt_spinner_end(&spin_lo);
1901 intel_gt_set_wedged(gt);
1902 err = -EIO;
1903 goto err_ctx_lo;
1904 }
1905
1906 struct preempt_client {
1907 struct igt_spinner spin;
1908 struct i915_gem_context *ctx;
1909 };
1910
preempt_client_init(struct intel_gt * gt,struct preempt_client * c)1911 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1912 {
1913 c->ctx = kernel_context(gt->i915);
1914 if (!c->ctx)
1915 return -ENOMEM;
1916
1917 if (igt_spinner_init(&c->spin, gt))
1918 goto err_ctx;
1919
1920 return 0;
1921
1922 err_ctx:
1923 kernel_context_close(c->ctx);
1924 return -ENOMEM;
1925 }
1926
preempt_client_fini(struct preempt_client * c)1927 static void preempt_client_fini(struct preempt_client *c)
1928 {
1929 igt_spinner_fini(&c->spin);
1930 kernel_context_close(c->ctx);
1931 }
1932
live_nopreempt(void * arg)1933 static int live_nopreempt(void *arg)
1934 {
1935 struct intel_gt *gt = arg;
1936 struct intel_engine_cs *engine;
1937 struct preempt_client a, b;
1938 enum intel_engine_id id;
1939 int err = -ENOMEM;
1940
1941 /*
1942 * Verify that we can disable preemption for an individual request
1943 * that may be being observed and not want to be interrupted.
1944 */
1945
1946 if (preempt_client_init(gt, &a))
1947 return -ENOMEM;
1948 if (preempt_client_init(gt, &b))
1949 goto err_client_a;
1950 b.ctx->sched.priority = I915_PRIORITY_MAX;
1951
1952 for_each_engine(engine, gt, id) {
1953 struct i915_request *rq_a, *rq_b;
1954
1955 if (!intel_engine_has_preemption(engine))
1956 continue;
1957
1958 engine->execlists.preempt_hang.count = 0;
1959
1960 rq_a = spinner_create_request(&a.spin,
1961 a.ctx, engine,
1962 MI_ARB_CHECK);
1963 if (IS_ERR(rq_a)) {
1964 err = PTR_ERR(rq_a);
1965 goto err_client_b;
1966 }
1967
1968 /* Low priority client, but unpreemptable! */
1969 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1970
1971 i915_request_add(rq_a);
1972 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1973 pr_err("First client failed to start\n");
1974 goto err_wedged;
1975 }
1976
1977 rq_b = spinner_create_request(&b.spin,
1978 b.ctx, engine,
1979 MI_ARB_CHECK);
1980 if (IS_ERR(rq_b)) {
1981 err = PTR_ERR(rq_b);
1982 goto err_client_b;
1983 }
1984
1985 i915_request_add(rq_b);
1986
1987 /* B is much more important than A! (But A is unpreemptable.) */
1988 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1989
1990 /* Wait long enough for preemption and timeslicing */
1991 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1992 pr_err("Second client started too early!\n");
1993 goto err_wedged;
1994 }
1995
1996 igt_spinner_end(&a.spin);
1997
1998 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1999 pr_err("Second client failed to start\n");
2000 goto err_wedged;
2001 }
2002
2003 igt_spinner_end(&b.spin);
2004
2005 if (engine->execlists.preempt_hang.count) {
2006 pr_err("Preemption recorded x%d; should have been suppressed!\n",
2007 engine->execlists.preempt_hang.count);
2008 err = -EINVAL;
2009 goto err_wedged;
2010 }
2011
2012 if (igt_flush_test(gt->i915))
2013 goto err_wedged;
2014 }
2015
2016 err = 0;
2017 err_client_b:
2018 preempt_client_fini(&b);
2019 err_client_a:
2020 preempt_client_fini(&a);
2021 return err;
2022
2023 err_wedged:
2024 igt_spinner_end(&b.spin);
2025 igt_spinner_end(&a.spin);
2026 intel_gt_set_wedged(gt);
2027 err = -EIO;
2028 goto err_client_b;
2029 }
2030
2031 struct live_preempt_cancel {
2032 struct intel_engine_cs *engine;
2033 struct preempt_client a, b;
2034 };
2035
__cancel_active0(struct live_preempt_cancel * arg)2036 static int __cancel_active0(struct live_preempt_cancel *arg)
2037 {
2038 struct i915_request *rq;
2039 struct igt_live_test t;
2040 int err;
2041
2042 /* Preempt cancel of ELSP0 */
2043 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2044 if (igt_live_test_begin(&t, arg->engine->i915,
2045 __func__, arg->engine->name))
2046 return -EIO;
2047
2048 rq = spinner_create_request(&arg->a.spin,
2049 arg->a.ctx, arg->engine,
2050 MI_ARB_CHECK);
2051 if (IS_ERR(rq))
2052 return PTR_ERR(rq);
2053
2054 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2055 i915_request_get(rq);
2056 i915_request_add(rq);
2057 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2058 err = -EIO;
2059 goto out;
2060 }
2061
2062 intel_context_set_banned(rq->context);
2063 err = intel_engine_pulse(arg->engine);
2064 if (err)
2065 goto out;
2066
2067 err = wait_for_reset(arg->engine, rq, HZ / 2);
2068 if (err) {
2069 pr_err("Cancelled inflight0 request did not reset\n");
2070 goto out;
2071 }
2072
2073 out:
2074 i915_request_put(rq);
2075 if (igt_live_test_end(&t))
2076 err = -EIO;
2077 return err;
2078 }
2079
__cancel_active1(struct live_preempt_cancel * arg)2080 static int __cancel_active1(struct live_preempt_cancel *arg)
2081 {
2082 struct i915_request *rq[2] = {};
2083 struct igt_live_test t;
2084 int err;
2085
2086 /* Preempt cancel of ELSP1 */
2087 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2088 if (igt_live_test_begin(&t, arg->engine->i915,
2089 __func__, arg->engine->name))
2090 return -EIO;
2091
2092 rq[0] = spinner_create_request(&arg->a.spin,
2093 arg->a.ctx, arg->engine,
2094 MI_NOOP); /* no preemption */
2095 if (IS_ERR(rq[0]))
2096 return PTR_ERR(rq[0]);
2097
2098 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2099 i915_request_get(rq[0]);
2100 i915_request_add(rq[0]);
2101 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2102 err = -EIO;
2103 goto out;
2104 }
2105
2106 rq[1] = spinner_create_request(&arg->b.spin,
2107 arg->b.ctx, arg->engine,
2108 MI_ARB_CHECK);
2109 if (IS_ERR(rq[1])) {
2110 err = PTR_ERR(rq[1]);
2111 goto out;
2112 }
2113
2114 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2115 i915_request_get(rq[1]);
2116 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2117 i915_request_add(rq[1]);
2118 if (err)
2119 goto out;
2120
2121 intel_context_set_banned(rq[1]->context);
2122 err = intel_engine_pulse(arg->engine);
2123 if (err)
2124 goto out;
2125
2126 igt_spinner_end(&arg->a.spin);
2127 err = wait_for_reset(arg->engine, rq[1], HZ / 2);
2128 if (err)
2129 goto out;
2130
2131 if (rq[0]->fence.error != 0) {
2132 pr_err("Normal inflight0 request did not complete\n");
2133 err = -EINVAL;
2134 goto out;
2135 }
2136
2137 if (rq[1]->fence.error != -EIO) {
2138 pr_err("Cancelled inflight1 request did not report -EIO\n");
2139 err = -EINVAL;
2140 goto out;
2141 }
2142
2143 out:
2144 i915_request_put(rq[1]);
2145 i915_request_put(rq[0]);
2146 if (igt_live_test_end(&t))
2147 err = -EIO;
2148 return err;
2149 }
2150
__cancel_queued(struct live_preempt_cancel * arg)2151 static int __cancel_queued(struct live_preempt_cancel *arg)
2152 {
2153 struct i915_request *rq[3] = {};
2154 struct igt_live_test t;
2155 int err;
2156
2157 /* Full ELSP and one in the wings */
2158 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2159 if (igt_live_test_begin(&t, arg->engine->i915,
2160 __func__, arg->engine->name))
2161 return -EIO;
2162
2163 rq[0] = spinner_create_request(&arg->a.spin,
2164 arg->a.ctx, arg->engine,
2165 MI_ARB_CHECK);
2166 if (IS_ERR(rq[0]))
2167 return PTR_ERR(rq[0]);
2168
2169 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
2170 i915_request_get(rq[0]);
2171 i915_request_add(rq[0]);
2172 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
2173 err = -EIO;
2174 goto out;
2175 }
2176
2177 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
2178 if (IS_ERR(rq[1])) {
2179 err = PTR_ERR(rq[1]);
2180 goto out;
2181 }
2182
2183 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
2184 i915_request_get(rq[1]);
2185 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
2186 i915_request_add(rq[1]);
2187 if (err)
2188 goto out;
2189
2190 rq[2] = spinner_create_request(&arg->b.spin,
2191 arg->a.ctx, arg->engine,
2192 MI_ARB_CHECK);
2193 if (IS_ERR(rq[2])) {
2194 err = PTR_ERR(rq[2]);
2195 goto out;
2196 }
2197
2198 i915_request_get(rq[2]);
2199 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
2200 i915_request_add(rq[2]);
2201 if (err)
2202 goto out;
2203
2204 intel_context_set_banned(rq[2]->context);
2205 err = intel_engine_pulse(arg->engine);
2206 if (err)
2207 goto out;
2208
2209 err = wait_for_reset(arg->engine, rq[2], HZ / 2);
2210 if (err)
2211 goto out;
2212
2213 if (rq[0]->fence.error != -EIO) {
2214 pr_err("Cancelled inflight0 request did not report -EIO\n");
2215 err = -EINVAL;
2216 goto out;
2217 }
2218
2219 if (rq[1]->fence.error != 0) {
2220 pr_err("Normal inflight1 request did not complete\n");
2221 err = -EINVAL;
2222 goto out;
2223 }
2224
2225 if (rq[2]->fence.error != -EIO) {
2226 pr_err("Cancelled queued request did not report -EIO\n");
2227 err = -EINVAL;
2228 goto out;
2229 }
2230
2231 out:
2232 i915_request_put(rq[2]);
2233 i915_request_put(rq[1]);
2234 i915_request_put(rq[0]);
2235 if (igt_live_test_end(&t))
2236 err = -EIO;
2237 return err;
2238 }
2239
__cancel_hostile(struct live_preempt_cancel * arg)2240 static int __cancel_hostile(struct live_preempt_cancel *arg)
2241 {
2242 struct i915_request *rq;
2243 int err;
2244
2245 /* Preempt cancel non-preemptible spinner in ELSP0 */
2246 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2247 return 0;
2248
2249 if (!intel_has_reset_engine(arg->engine->gt))
2250 return 0;
2251
2252 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
2253 rq = spinner_create_request(&arg->a.spin,
2254 arg->a.ctx, arg->engine,
2255 MI_NOOP); /* preemption disabled */
2256 if (IS_ERR(rq))
2257 return PTR_ERR(rq);
2258
2259 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2260 i915_request_get(rq);
2261 i915_request_add(rq);
2262 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2263 err = -EIO;
2264 goto out;
2265 }
2266
2267 intel_context_set_banned(rq->context);
2268 err = intel_engine_pulse(arg->engine); /* force reset */
2269 if (err)
2270 goto out;
2271
2272 err = wait_for_reset(arg->engine, rq, HZ / 2);
2273 if (err) {
2274 pr_err("Cancelled inflight0 request did not reset\n");
2275 goto out;
2276 }
2277
2278 out:
2279 i915_request_put(rq);
2280 if (igt_flush_test(arg->engine->i915))
2281 err = -EIO;
2282 return err;
2283 }
2284
force_reset_timeout(struct intel_engine_cs * engine)2285 static void force_reset_timeout(struct intel_engine_cs *engine)
2286 {
2287 engine->reset_timeout.probability = 999;
2288 atomic_set(&engine->reset_timeout.times, -1);
2289 }
2290
cancel_reset_timeout(struct intel_engine_cs * engine)2291 static void cancel_reset_timeout(struct intel_engine_cs *engine)
2292 {
2293 memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
2294 }
2295
__cancel_fail(struct live_preempt_cancel * arg)2296 static int __cancel_fail(struct live_preempt_cancel *arg)
2297 {
2298 struct intel_engine_cs *engine = arg->engine;
2299 struct i915_request *rq;
2300 int err;
2301
2302 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2303 return 0;
2304
2305 if (!intel_has_reset_engine(engine->gt))
2306 return 0;
2307
2308 GEM_TRACE("%s(%s)\n", __func__, engine->name);
2309 rq = spinner_create_request(&arg->a.spin,
2310 arg->a.ctx, engine,
2311 MI_NOOP); /* preemption disabled */
2312 if (IS_ERR(rq))
2313 return PTR_ERR(rq);
2314
2315 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2316 i915_request_get(rq);
2317 i915_request_add(rq);
2318 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2319 err = -EIO;
2320 goto out;
2321 }
2322
2323 intel_context_set_banned(rq->context);
2324
2325 err = intel_engine_pulse(engine);
2326 if (err)
2327 goto out;
2328
2329 force_reset_timeout(engine);
2330
2331 /* force preempt reset [failure] */
2332 while (!engine->execlists.pending[0])
2333 intel_engine_flush_submission(engine);
2334 del_timer_sync(&engine->execlists.preempt);
2335 intel_engine_flush_submission(engine);
2336
2337 cancel_reset_timeout(engine);
2338
2339 /* after failure, require heartbeats to reset device */
2340 intel_engine_set_heartbeat(engine, 1);
2341 err = wait_for_reset(engine, rq, HZ / 2);
2342 intel_engine_set_heartbeat(engine,
2343 engine->defaults.heartbeat_interval_ms);
2344 if (err) {
2345 pr_err("Cancelled inflight0 request did not reset\n");
2346 goto out;
2347 }
2348
2349 out:
2350 i915_request_put(rq);
2351 if (igt_flush_test(engine->i915))
2352 err = -EIO;
2353 return err;
2354 }
2355
live_preempt_cancel(void * arg)2356 static int live_preempt_cancel(void *arg)
2357 {
2358 struct intel_gt *gt = arg;
2359 struct live_preempt_cancel data;
2360 enum intel_engine_id id;
2361 int err = -ENOMEM;
2362
2363 /*
2364 * To cancel an inflight context, we need to first remove it from the
2365 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2366 */
2367
2368 if (preempt_client_init(gt, &data.a))
2369 return -ENOMEM;
2370 if (preempt_client_init(gt, &data.b))
2371 goto err_client_a;
2372
2373 for_each_engine(data.engine, gt, id) {
2374 if (!intel_engine_has_preemption(data.engine))
2375 continue;
2376
2377 err = __cancel_active0(&data);
2378 if (err)
2379 goto err_wedged;
2380
2381 err = __cancel_active1(&data);
2382 if (err)
2383 goto err_wedged;
2384
2385 err = __cancel_queued(&data);
2386 if (err)
2387 goto err_wedged;
2388
2389 err = __cancel_hostile(&data);
2390 if (err)
2391 goto err_wedged;
2392
2393 err = __cancel_fail(&data);
2394 if (err)
2395 goto err_wedged;
2396 }
2397
2398 err = 0;
2399 err_client_b:
2400 preempt_client_fini(&data.b);
2401 err_client_a:
2402 preempt_client_fini(&data.a);
2403 return err;
2404
2405 err_wedged:
2406 GEM_TRACE_DUMP();
2407 igt_spinner_end(&data.b.spin);
2408 igt_spinner_end(&data.a.spin);
2409 intel_gt_set_wedged(gt);
2410 goto err_client_b;
2411 }
2412
live_suppress_self_preempt(void * arg)2413 static int live_suppress_self_preempt(void *arg)
2414 {
2415 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2416 struct intel_gt *gt = arg;
2417 struct intel_engine_cs *engine;
2418 struct preempt_client a, b;
2419 enum intel_engine_id id;
2420 int err = -ENOMEM;
2421
2422 /*
2423 * Verify that if a preemption request does not cause a change in
2424 * the current execution order, the preempt-to-idle injection is
2425 * skipped and that we do not accidentally apply it after the CS
2426 * completion event.
2427 */
2428
2429 if (intel_uc_uses_guc_submission(>->uc))
2430 return 0; /* presume black blox */
2431
2432 if (intel_vgpu_active(gt->i915))
2433 return 0; /* GVT forces single port & request submission */
2434
2435 if (preempt_client_init(gt, &a))
2436 return -ENOMEM;
2437 if (preempt_client_init(gt, &b))
2438 goto err_client_a;
2439
2440 for_each_engine(engine, gt, id) {
2441 struct i915_request *rq_a, *rq_b;
2442 int depth;
2443
2444 if (!intel_engine_has_preemption(engine))
2445 continue;
2446
2447 if (igt_flush_test(gt->i915))
2448 goto err_wedged;
2449
2450 st_engine_heartbeat_disable(engine);
2451 engine->execlists.preempt_hang.count = 0;
2452
2453 rq_a = spinner_create_request(&a.spin,
2454 a.ctx, engine,
2455 MI_NOOP);
2456 if (IS_ERR(rq_a)) {
2457 err = PTR_ERR(rq_a);
2458 st_engine_heartbeat_enable(engine);
2459 goto err_client_b;
2460 }
2461
2462 i915_request_add(rq_a);
2463 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2464 pr_err("First client failed to start\n");
2465 st_engine_heartbeat_enable(engine);
2466 goto err_wedged;
2467 }
2468
2469 /* Keep postponing the timer to avoid premature slicing */
2470 mod_timer(&engine->execlists.timer, jiffies + HZ);
2471 for (depth = 0; depth < 8; depth++) {
2472 rq_b = spinner_create_request(&b.spin,
2473 b.ctx, engine,
2474 MI_NOOP);
2475 if (IS_ERR(rq_b)) {
2476 err = PTR_ERR(rq_b);
2477 st_engine_heartbeat_enable(engine);
2478 goto err_client_b;
2479 }
2480 i915_request_add(rq_b);
2481
2482 GEM_BUG_ON(i915_request_completed(rq_a));
2483 engine->schedule(rq_a, &attr);
2484 igt_spinner_end(&a.spin);
2485
2486 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2487 pr_err("Second client failed to start\n");
2488 st_engine_heartbeat_enable(engine);
2489 goto err_wedged;
2490 }
2491
2492 swap(a, b);
2493 rq_a = rq_b;
2494 }
2495 igt_spinner_end(&a.spin);
2496
2497 if (engine->execlists.preempt_hang.count) {
2498 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2499 engine->name,
2500 engine->execlists.preempt_hang.count,
2501 depth);
2502 st_engine_heartbeat_enable(engine);
2503 err = -EINVAL;
2504 goto err_client_b;
2505 }
2506
2507 st_engine_heartbeat_enable(engine);
2508 if (igt_flush_test(gt->i915))
2509 goto err_wedged;
2510 }
2511
2512 err = 0;
2513 err_client_b:
2514 preempt_client_fini(&b);
2515 err_client_a:
2516 preempt_client_fini(&a);
2517 return err;
2518
2519 err_wedged:
2520 igt_spinner_end(&b.spin);
2521 igt_spinner_end(&a.spin);
2522 intel_gt_set_wedged(gt);
2523 err = -EIO;
2524 goto err_client_b;
2525 }
2526
live_chain_preempt(void * arg)2527 static int live_chain_preempt(void *arg)
2528 {
2529 struct intel_gt *gt = arg;
2530 struct intel_engine_cs *engine;
2531 struct preempt_client hi, lo;
2532 enum intel_engine_id id;
2533 int err = -ENOMEM;
2534
2535 /*
2536 * Build a chain AB...BA between two contexts (A, B) and request
2537 * preemption of the last request. It should then complete before
2538 * the previously submitted spinner in B.
2539 */
2540
2541 if (preempt_client_init(gt, &hi))
2542 return -ENOMEM;
2543
2544 if (preempt_client_init(gt, &lo))
2545 goto err_client_hi;
2546
2547 for_each_engine(engine, gt, id) {
2548 struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
2549 struct igt_live_test t;
2550 struct i915_request *rq;
2551 int ring_size, count, i;
2552
2553 if (!intel_engine_has_preemption(engine))
2554 continue;
2555
2556 rq = spinner_create_request(&lo.spin,
2557 lo.ctx, engine,
2558 MI_ARB_CHECK);
2559 if (IS_ERR(rq))
2560 goto err_wedged;
2561
2562 i915_request_get(rq);
2563 i915_request_add(rq);
2564
2565 ring_size = rq->wa_tail - rq->head;
2566 if (ring_size < 0)
2567 ring_size += rq->ring->size;
2568 ring_size = rq->ring->size / ring_size;
2569 pr_debug("%s(%s): Using maximum of %d requests\n",
2570 __func__, engine->name, ring_size);
2571
2572 igt_spinner_end(&lo.spin);
2573 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2574 pr_err("Timed out waiting to flush %s\n", engine->name);
2575 i915_request_put(rq);
2576 goto err_wedged;
2577 }
2578 i915_request_put(rq);
2579
2580 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2581 err = -EIO;
2582 goto err_wedged;
2583 }
2584
2585 for_each_prime_number_from(count, 1, ring_size) {
2586 rq = spinner_create_request(&hi.spin,
2587 hi.ctx, engine,
2588 MI_ARB_CHECK);
2589 if (IS_ERR(rq))
2590 goto err_wedged;
2591 i915_request_add(rq);
2592 if (!igt_wait_for_spinner(&hi.spin, rq))
2593 goto err_wedged;
2594
2595 rq = spinner_create_request(&lo.spin,
2596 lo.ctx, engine,
2597 MI_ARB_CHECK);
2598 if (IS_ERR(rq))
2599 goto err_wedged;
2600 i915_request_add(rq);
2601
2602 for (i = 0; i < count; i++) {
2603 rq = igt_request_alloc(lo.ctx, engine);
2604 if (IS_ERR(rq))
2605 goto err_wedged;
2606 i915_request_add(rq);
2607 }
2608
2609 rq = igt_request_alloc(hi.ctx, engine);
2610 if (IS_ERR(rq))
2611 goto err_wedged;
2612
2613 i915_request_get(rq);
2614 i915_request_add(rq);
2615 engine->schedule(rq, &attr);
2616
2617 igt_spinner_end(&hi.spin);
2618 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2619 struct drm_printer p =
2620 drm_info_printer(gt->i915->drm.dev);
2621
2622 pr_err("Failed to preempt over chain of %d\n",
2623 count);
2624 intel_engine_dump(engine, &p,
2625 "%s\n", engine->name);
2626 i915_request_put(rq);
2627 goto err_wedged;
2628 }
2629 igt_spinner_end(&lo.spin);
2630 i915_request_put(rq);
2631
2632 rq = igt_request_alloc(lo.ctx, engine);
2633 if (IS_ERR(rq))
2634 goto err_wedged;
2635
2636 i915_request_get(rq);
2637 i915_request_add(rq);
2638
2639 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2640 struct drm_printer p =
2641 drm_info_printer(gt->i915->drm.dev);
2642
2643 pr_err("Failed to flush low priority chain of %d requests\n",
2644 count);
2645 intel_engine_dump(engine, &p,
2646 "%s\n", engine->name);
2647
2648 i915_request_put(rq);
2649 goto err_wedged;
2650 }
2651 i915_request_put(rq);
2652 }
2653
2654 if (igt_live_test_end(&t)) {
2655 err = -EIO;
2656 goto err_wedged;
2657 }
2658 }
2659
2660 err = 0;
2661 err_client_lo:
2662 preempt_client_fini(&lo);
2663 err_client_hi:
2664 preempt_client_fini(&hi);
2665 return err;
2666
2667 err_wedged:
2668 igt_spinner_end(&hi.spin);
2669 igt_spinner_end(&lo.spin);
2670 intel_gt_set_wedged(gt);
2671 err = -EIO;
2672 goto err_client_lo;
2673 }
2674
create_gang(struct intel_engine_cs * engine,struct i915_request ** prev)2675 static int create_gang(struct intel_engine_cs *engine,
2676 struct i915_request **prev)
2677 {
2678 struct drm_i915_gem_object *obj;
2679 struct intel_context *ce;
2680 struct i915_request *rq;
2681 struct i915_vma *vma;
2682 u32 *cs;
2683 int err;
2684
2685 ce = intel_context_create(engine);
2686 if (IS_ERR(ce))
2687 return PTR_ERR(ce);
2688
2689 obj = i915_gem_object_create_internal(engine->i915, 4096);
2690 if (IS_ERR(obj)) {
2691 err = PTR_ERR(obj);
2692 goto err_ce;
2693 }
2694
2695 vma = i915_vma_instance(obj, ce->vm, NULL);
2696 if (IS_ERR(vma)) {
2697 err = PTR_ERR(vma);
2698 goto err_obj;
2699 }
2700
2701 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2702 if (err)
2703 goto err_obj;
2704
2705 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
2706 if (IS_ERR(cs)) {
2707 err = PTR_ERR(cs);
2708 goto err_obj;
2709 }
2710
2711 /* Semaphore target: spin until zero */
2712 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2713
2714 *cs++ = MI_SEMAPHORE_WAIT |
2715 MI_SEMAPHORE_POLL |
2716 MI_SEMAPHORE_SAD_EQ_SDD;
2717 *cs++ = 0;
2718 *cs++ = lower_32_bits(vma->node.start);
2719 *cs++ = upper_32_bits(vma->node.start);
2720
2721 if (*prev) {
2722 u64 offset = (*prev)->batch->node.start;
2723
2724 /* Terminate the spinner in the next lower priority batch. */
2725 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2726 *cs++ = lower_32_bits(offset);
2727 *cs++ = upper_32_bits(offset);
2728 *cs++ = 0;
2729 }
2730
2731 *cs++ = MI_BATCH_BUFFER_END;
2732 i915_gem_object_flush_map(obj);
2733 i915_gem_object_unpin_map(obj);
2734
2735 rq = intel_context_create_request(ce);
2736 if (IS_ERR(rq)) {
2737 err = PTR_ERR(rq);
2738 goto err_obj;
2739 }
2740
2741 rq->batch = i915_vma_get(vma);
2742 i915_request_get(rq);
2743
2744 i915_vma_lock(vma);
2745 err = i915_request_await_object(rq, vma->obj, false);
2746 if (!err)
2747 err = i915_vma_move_to_active(vma, rq, 0);
2748 if (!err)
2749 err = rq->engine->emit_bb_start(rq,
2750 vma->node.start,
2751 PAGE_SIZE, 0);
2752 i915_vma_unlock(vma);
2753 i915_request_add(rq);
2754 if (err)
2755 goto err_rq;
2756
2757 i915_gem_object_put(obj);
2758 intel_context_put(ce);
2759
2760 rq->mock.link.next = &(*prev)->mock.link;
2761 *prev = rq;
2762 return 0;
2763
2764 err_rq:
2765 i915_vma_put(rq->batch);
2766 i915_request_put(rq);
2767 err_obj:
2768 i915_gem_object_put(obj);
2769 err_ce:
2770 intel_context_put(ce);
2771 return err;
2772 }
2773
__live_preempt_ring(struct intel_engine_cs * engine,struct igt_spinner * spin,int queue_sz,int ring_sz)2774 static int __live_preempt_ring(struct intel_engine_cs *engine,
2775 struct igt_spinner *spin,
2776 int queue_sz, int ring_sz)
2777 {
2778 struct intel_context *ce[2] = {};
2779 struct i915_request *rq;
2780 struct igt_live_test t;
2781 int err = 0;
2782 int n;
2783
2784 if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
2785 return -EIO;
2786
2787 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2788 struct intel_context *tmp;
2789
2790 tmp = intel_context_create(engine);
2791 if (IS_ERR(tmp)) {
2792 err = PTR_ERR(tmp);
2793 goto err_ce;
2794 }
2795
2796 tmp->ring = __intel_context_ring_size(ring_sz);
2797
2798 err = intel_context_pin(tmp);
2799 if (err) {
2800 intel_context_put(tmp);
2801 goto err_ce;
2802 }
2803
2804 memset32(tmp->ring->vaddr,
2805 0xdeadbeef, /* trigger a hang if executed */
2806 tmp->ring->vma->size / sizeof(u32));
2807
2808 ce[n] = tmp;
2809 }
2810
2811 rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
2812 if (IS_ERR(rq)) {
2813 err = PTR_ERR(rq);
2814 goto err_ce;
2815 }
2816
2817 i915_request_get(rq);
2818 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2819 i915_request_add(rq);
2820
2821 if (!igt_wait_for_spinner(spin, rq)) {
2822 intel_gt_set_wedged(engine->gt);
2823 i915_request_put(rq);
2824 err = -ETIME;
2825 goto err_ce;
2826 }
2827
2828 /* Fill the ring, until we will cause a wrap */
2829 n = 0;
2830 while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
2831 struct i915_request *tmp;
2832
2833 tmp = intel_context_create_request(ce[0]);
2834 if (IS_ERR(tmp)) {
2835 err = PTR_ERR(tmp);
2836 i915_request_put(rq);
2837 goto err_ce;
2838 }
2839
2840 i915_request_add(tmp);
2841 intel_engine_flush_submission(engine);
2842 n++;
2843 }
2844 intel_engine_flush_submission(engine);
2845 pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
2846 engine->name, queue_sz, n,
2847 ce[0]->ring->size,
2848 ce[0]->ring->tail,
2849 ce[0]->ring->emit,
2850 rq->tail);
2851 i915_request_put(rq);
2852
2853 /* Create a second request to preempt the first ring */
2854 rq = intel_context_create_request(ce[1]);
2855 if (IS_ERR(rq)) {
2856 err = PTR_ERR(rq);
2857 goto err_ce;
2858 }
2859
2860 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2861 i915_request_get(rq);
2862 i915_request_add(rq);
2863
2864 err = wait_for_submit(engine, rq, HZ / 2);
2865 i915_request_put(rq);
2866 if (err) {
2867 pr_err("%s: preemption request was not submitted\n",
2868 engine->name);
2869 err = -ETIME;
2870 }
2871
2872 pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
2873 engine->name,
2874 ce[0]->ring->tail, ce[0]->ring->emit,
2875 ce[1]->ring->tail, ce[1]->ring->emit);
2876
2877 err_ce:
2878 intel_engine_flush_submission(engine);
2879 igt_spinner_end(spin);
2880 for (n = 0; n < ARRAY_SIZE(ce); n++) {
2881 if (IS_ERR_OR_NULL(ce[n]))
2882 break;
2883
2884 intel_context_unpin(ce[n]);
2885 intel_context_put(ce[n]);
2886 }
2887 if (igt_live_test_end(&t))
2888 err = -EIO;
2889 return err;
2890 }
2891
live_preempt_ring(void * arg)2892 static int live_preempt_ring(void *arg)
2893 {
2894 struct intel_gt *gt = arg;
2895 struct intel_engine_cs *engine;
2896 struct igt_spinner spin;
2897 enum intel_engine_id id;
2898 int err = 0;
2899
2900 /*
2901 * Check that we rollback large chunks of a ring in order to do a
2902 * preemption event. Similar to live_unlite_ring, but looking at
2903 * ring size rather than the impact of intel_ring_direction().
2904 */
2905
2906 if (igt_spinner_init(&spin, gt))
2907 return -ENOMEM;
2908
2909 for_each_engine(engine, gt, id) {
2910 int n;
2911
2912 if (!intel_engine_has_preemption(engine))
2913 continue;
2914
2915 if (!intel_engine_can_store_dword(engine))
2916 continue;
2917
2918 st_engine_heartbeat_disable(engine);
2919
2920 for (n = 0; n <= 3; n++) {
2921 err = __live_preempt_ring(engine, &spin,
2922 n * SZ_4K / 4, SZ_4K);
2923 if (err)
2924 break;
2925 }
2926
2927 st_engine_heartbeat_enable(engine);
2928 if (err)
2929 break;
2930 }
2931
2932 igt_spinner_fini(&spin);
2933 return err;
2934 }
2935
live_preempt_gang(void * arg)2936 static int live_preempt_gang(void *arg)
2937 {
2938 struct intel_gt *gt = arg;
2939 struct intel_engine_cs *engine;
2940 enum intel_engine_id id;
2941
2942 /*
2943 * Build as long a chain of preempters as we can, with each
2944 * request higher priority than the last. Once we are ready, we release
2945 * the last batch which then precolates down the chain, each releasing
2946 * the next oldest in turn. The intent is to simply push as hard as we
2947 * can with the number of preemptions, trying to exceed narrow HW
2948 * limits. At a minimum, we insist that we can sort all the user
2949 * high priority levels into execution order.
2950 */
2951
2952 for_each_engine(engine, gt, id) {
2953 struct i915_request *rq = NULL;
2954 struct igt_live_test t;
2955 IGT_TIMEOUT(end_time);
2956 int prio = 0;
2957 int err = 0;
2958 u32 *cs;
2959
2960 if (!intel_engine_has_preemption(engine))
2961 continue;
2962
2963 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2964 return -EIO;
2965
2966 do {
2967 struct i915_sched_attr attr = { .priority = prio++ };
2968
2969 err = create_gang(engine, &rq);
2970 if (err)
2971 break;
2972
2973 /* Submit each spinner at increasing priority */
2974 engine->schedule(rq, &attr);
2975 } while (prio <= I915_PRIORITY_MAX &&
2976 !__igt_timeout(end_time, NULL));
2977 pr_debug("%s: Preempt chain of %d requests\n",
2978 engine->name, prio);
2979
2980 /*
2981 * Such that the last spinner is the highest priority and
2982 * should execute first. When that spinner completes,
2983 * it will terminate the next lowest spinner until there
2984 * are no more spinners and the gang is complete.
2985 */
2986 cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
2987 if (!IS_ERR(cs)) {
2988 *cs = 0;
2989 i915_gem_object_unpin_map(rq->batch->obj);
2990 } else {
2991 err = PTR_ERR(cs);
2992 intel_gt_set_wedged(gt);
2993 }
2994
2995 while (rq) { /* wait for each rq from highest to lowest prio */
2996 struct i915_request *n = list_next_entry(rq, mock.link);
2997
2998 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2999 struct drm_printer p =
3000 drm_info_printer(engine->i915->drm.dev);
3001
3002 pr_err("Failed to flush chain of %d requests, at %d\n",
3003 prio, rq_prio(rq));
3004 intel_engine_dump(engine, &p,
3005 "%s\n", engine->name);
3006
3007 err = -ETIME;
3008 }
3009
3010 i915_vma_put(rq->batch);
3011 i915_request_put(rq);
3012 rq = n;
3013 }
3014
3015 if (igt_live_test_end(&t))
3016 err = -EIO;
3017 if (err)
3018 return err;
3019 }
3020
3021 return 0;
3022 }
3023
3024 static struct i915_vma *
create_gpr_user(struct intel_engine_cs * engine,struct i915_vma * result,unsigned int offset)3025 create_gpr_user(struct intel_engine_cs *engine,
3026 struct i915_vma *result,
3027 unsigned int offset)
3028 {
3029 struct drm_i915_gem_object *obj;
3030 struct i915_vma *vma;
3031 u32 *cs;
3032 int err;
3033 int i;
3034
3035 obj = i915_gem_object_create_internal(engine->i915, 4096);
3036 if (IS_ERR(obj))
3037 return ERR_CAST(obj);
3038
3039 vma = i915_vma_instance(obj, result->vm, NULL);
3040 if (IS_ERR(vma)) {
3041 i915_gem_object_put(obj);
3042 return vma;
3043 }
3044
3045 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3046 if (err) {
3047 i915_vma_put(vma);
3048 return ERR_PTR(err);
3049 }
3050
3051 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
3052 if (IS_ERR(cs)) {
3053 i915_vma_put(vma);
3054 return ERR_CAST(cs);
3055 }
3056
3057 /* All GPR are clear for new contexts. We use GPR(0) as a constant */
3058 *cs++ = MI_LOAD_REGISTER_IMM(1);
3059 *cs++ = CS_GPR(engine, 0);
3060 *cs++ = 1;
3061
3062 for (i = 1; i < NUM_GPR; i++) {
3063 u64 addr;
3064
3065 /*
3066 * Perform: GPR[i]++
3067 *
3068 * As we read and write into the context saved GPR[i], if
3069 * we restart this batch buffer from an earlier point, we
3070 * will repeat the increment and store a value > 1.
3071 */
3072 *cs++ = MI_MATH(4);
3073 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
3074 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
3075 *cs++ = MI_MATH_ADD;
3076 *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
3077
3078 addr = result->node.start + offset + i * sizeof(*cs);
3079 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
3080 *cs++ = CS_GPR(engine, 2 * i);
3081 *cs++ = lower_32_bits(addr);
3082 *cs++ = upper_32_bits(addr);
3083
3084 *cs++ = MI_SEMAPHORE_WAIT |
3085 MI_SEMAPHORE_POLL |
3086 MI_SEMAPHORE_SAD_GTE_SDD;
3087 *cs++ = i;
3088 *cs++ = lower_32_bits(result->node.start);
3089 *cs++ = upper_32_bits(result->node.start);
3090 }
3091
3092 *cs++ = MI_BATCH_BUFFER_END;
3093 i915_gem_object_flush_map(obj);
3094 i915_gem_object_unpin_map(obj);
3095
3096 return vma;
3097 }
3098
create_global(struct intel_gt * gt,size_t sz)3099 static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
3100 {
3101 struct drm_i915_gem_object *obj;
3102 struct i915_vma *vma;
3103 int err;
3104
3105 obj = i915_gem_object_create_internal(gt->i915, sz);
3106 if (IS_ERR(obj))
3107 return ERR_CAST(obj);
3108
3109 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
3110 if (IS_ERR(vma)) {
3111 i915_gem_object_put(obj);
3112 return vma;
3113 }
3114
3115 err = i915_ggtt_pin(vma, NULL, 0, 0);
3116 if (err) {
3117 i915_vma_put(vma);
3118 return ERR_PTR(err);
3119 }
3120
3121 return vma;
3122 }
3123
3124 static struct i915_request *
create_gpr_client(struct intel_engine_cs * engine,struct i915_vma * global,unsigned int offset)3125 create_gpr_client(struct intel_engine_cs *engine,
3126 struct i915_vma *global,
3127 unsigned int offset)
3128 {
3129 struct i915_vma *batch, *vma;
3130 struct intel_context *ce;
3131 struct i915_request *rq;
3132 int err;
3133
3134 ce = intel_context_create(engine);
3135 if (IS_ERR(ce))
3136 return ERR_CAST(ce);
3137
3138 vma = i915_vma_instance(global->obj, ce->vm, NULL);
3139 if (IS_ERR(vma)) {
3140 err = PTR_ERR(vma);
3141 goto out_ce;
3142 }
3143
3144 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3145 if (err)
3146 goto out_ce;
3147
3148 batch = create_gpr_user(engine, vma, offset);
3149 if (IS_ERR(batch)) {
3150 err = PTR_ERR(batch);
3151 goto out_vma;
3152 }
3153
3154 rq = intel_context_create_request(ce);
3155 if (IS_ERR(rq)) {
3156 err = PTR_ERR(rq);
3157 goto out_batch;
3158 }
3159
3160 i915_vma_lock(vma);
3161 err = i915_request_await_object(rq, vma->obj, false);
3162 if (!err)
3163 err = i915_vma_move_to_active(vma, rq, 0);
3164 i915_vma_unlock(vma);
3165
3166 i915_vma_lock(batch);
3167 if (!err)
3168 err = i915_request_await_object(rq, batch->obj, false);
3169 if (!err)
3170 err = i915_vma_move_to_active(batch, rq, 0);
3171 if (!err)
3172 err = rq->engine->emit_bb_start(rq,
3173 batch->node.start,
3174 PAGE_SIZE, 0);
3175 i915_vma_unlock(batch);
3176 i915_vma_unpin(batch);
3177
3178 if (!err)
3179 i915_request_get(rq);
3180 i915_request_add(rq);
3181
3182 out_batch:
3183 i915_vma_put(batch);
3184 out_vma:
3185 i915_vma_unpin(vma);
3186 out_ce:
3187 intel_context_put(ce);
3188 return err ? ERR_PTR(err) : rq;
3189 }
3190
preempt_user(struct intel_engine_cs * engine,struct i915_vma * global,int id)3191 static int preempt_user(struct intel_engine_cs *engine,
3192 struct i915_vma *global,
3193 int id)
3194 {
3195 struct i915_sched_attr attr = {
3196 .priority = I915_PRIORITY_MAX
3197 };
3198 struct i915_request *rq;
3199 int err = 0;
3200 u32 *cs;
3201
3202 rq = intel_engine_create_kernel_request(engine);
3203 if (IS_ERR(rq))
3204 return PTR_ERR(rq);
3205
3206 cs = intel_ring_begin(rq, 4);
3207 if (IS_ERR(cs)) {
3208 i915_request_add(rq);
3209 return PTR_ERR(cs);
3210 }
3211
3212 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3213 *cs++ = i915_ggtt_offset(global);
3214 *cs++ = 0;
3215 *cs++ = id;
3216
3217 intel_ring_advance(rq, cs);
3218
3219 i915_request_get(rq);
3220 i915_request_add(rq);
3221
3222 engine->schedule(rq, &attr);
3223
3224 if (i915_request_wait(rq, 0, HZ / 2) < 0)
3225 err = -ETIME;
3226 i915_request_put(rq);
3227
3228 return err;
3229 }
3230
live_preempt_user(void * arg)3231 static int live_preempt_user(void *arg)
3232 {
3233 struct intel_gt *gt = arg;
3234 struct intel_engine_cs *engine;
3235 struct i915_vma *global;
3236 enum intel_engine_id id;
3237 u32 *result;
3238 int err = 0;
3239
3240 /*
3241 * In our other tests, we look at preemption in carefully
3242 * controlled conditions in the ringbuffer. Since most of the
3243 * time is spent in user batches, most of our preemptions naturally
3244 * occur there. We want to verify that when we preempt inside a batch
3245 * we continue on from the current instruction and do not roll back
3246 * to the start, or another earlier arbitration point.
3247 *
3248 * To verify this, we create a batch which is a mixture of
3249 * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with
3250 * a few preempting contexts thrown into the mix, we look for any
3251 * repeated instructions (which show up as incorrect values).
3252 */
3253
3254 global = create_global(gt, 4096);
3255 if (IS_ERR(global))
3256 return PTR_ERR(global);
3257
3258 result = i915_gem_object_pin_map_unlocked(global->obj, I915_MAP_WC);
3259 if (IS_ERR(result)) {
3260 i915_vma_unpin_and_release(&global, 0);
3261 return PTR_ERR(result);
3262 }
3263
3264 for_each_engine(engine, gt, id) {
3265 struct i915_request *client[3] = {};
3266 struct igt_live_test t;
3267 int i;
3268
3269 if (!intel_engine_has_preemption(engine))
3270 continue;
3271
3272 if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS)
3273 continue; /* we need per-context GPR */
3274
3275 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
3276 err = -EIO;
3277 break;
3278 }
3279
3280 memset(result, 0, 4096);
3281
3282 for (i = 0; i < ARRAY_SIZE(client); i++) {
3283 struct i915_request *rq;
3284
3285 rq = create_gpr_client(engine, global,
3286 NUM_GPR * i * sizeof(u32));
3287 if (IS_ERR(rq)) {
3288 err = PTR_ERR(rq);
3289 goto end_test;
3290 }
3291
3292 client[i] = rq;
3293 }
3294
3295 /* Continuously preempt the set of 3 running contexts */
3296 for (i = 1; i <= NUM_GPR; i++) {
3297 err = preempt_user(engine, global, i);
3298 if (err)
3299 goto end_test;
3300 }
3301
3302 if (READ_ONCE(result[0]) != NUM_GPR) {
3303 pr_err("%s: Failed to release semaphore\n",
3304 engine->name);
3305 err = -EIO;
3306 goto end_test;
3307 }
3308
3309 for (i = 0; i < ARRAY_SIZE(client); i++) {
3310 int gpr;
3311
3312 if (i915_request_wait(client[i], 0, HZ / 2) < 0) {
3313 err = -ETIME;
3314 goto end_test;
3315 }
3316
3317 for (gpr = 1; gpr < NUM_GPR; gpr++) {
3318 if (result[NUM_GPR * i + gpr] != 1) {
3319 pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n",
3320 engine->name,
3321 i, gpr, result[NUM_GPR * i + gpr]);
3322 err = -EINVAL;
3323 goto end_test;
3324 }
3325 }
3326 }
3327
3328 end_test:
3329 for (i = 0; i < ARRAY_SIZE(client); i++) {
3330 if (!client[i])
3331 break;
3332
3333 i915_request_put(client[i]);
3334 }
3335
3336 /* Flush the semaphores on error */
3337 smp_store_mb(result[0], -1);
3338 if (igt_live_test_end(&t))
3339 err = -EIO;
3340 if (err)
3341 break;
3342 }
3343
3344 i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP);
3345 return err;
3346 }
3347
live_preempt_timeout(void * arg)3348 static int live_preempt_timeout(void *arg)
3349 {
3350 struct intel_gt *gt = arg;
3351 struct i915_gem_context *ctx_hi, *ctx_lo;
3352 struct igt_spinner spin_lo;
3353 struct intel_engine_cs *engine;
3354 enum intel_engine_id id;
3355 int err = -ENOMEM;
3356
3357 /*
3358 * Check that we force preemption to occur by cancelling the previous
3359 * context if it refuses to yield the GPU.
3360 */
3361 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
3362 return 0;
3363
3364 if (!intel_has_reset_engine(gt))
3365 return 0;
3366
3367 if (igt_spinner_init(&spin_lo, gt))
3368 return -ENOMEM;
3369
3370 ctx_hi = kernel_context(gt->i915);
3371 if (!ctx_hi)
3372 goto err_spin_lo;
3373 ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
3374
3375 ctx_lo = kernel_context(gt->i915);
3376 if (!ctx_lo)
3377 goto err_ctx_hi;
3378 ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
3379
3380 for_each_engine(engine, gt, id) {
3381 unsigned long saved_timeout;
3382 struct i915_request *rq;
3383
3384 if (!intel_engine_has_preemption(engine))
3385 continue;
3386
3387 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
3388 MI_NOOP); /* preemption disabled */
3389 if (IS_ERR(rq)) {
3390 err = PTR_ERR(rq);
3391 goto err_ctx_lo;
3392 }
3393
3394 i915_request_add(rq);
3395 if (!igt_wait_for_spinner(&spin_lo, rq)) {
3396 intel_gt_set_wedged(gt);
3397 err = -EIO;
3398 goto err_ctx_lo;
3399 }
3400
3401 rq = igt_request_alloc(ctx_hi, engine);
3402 if (IS_ERR(rq)) {
3403 igt_spinner_end(&spin_lo);
3404 err = PTR_ERR(rq);
3405 goto err_ctx_lo;
3406 }
3407
3408 /* Flush the previous CS ack before changing timeouts */
3409 while (READ_ONCE(engine->execlists.pending[0]))
3410 cpu_relax();
3411
3412 saved_timeout = engine->props.preempt_timeout_ms;
3413 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
3414
3415 i915_request_get(rq);
3416 i915_request_add(rq);
3417
3418 intel_engine_flush_submission(engine);
3419 engine->props.preempt_timeout_ms = saved_timeout;
3420
3421 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
3422 intel_gt_set_wedged(gt);
3423 i915_request_put(rq);
3424 err = -ETIME;
3425 goto err_ctx_lo;
3426 }
3427
3428 igt_spinner_end(&spin_lo);
3429 i915_request_put(rq);
3430 }
3431
3432 err = 0;
3433 err_ctx_lo:
3434 kernel_context_close(ctx_lo);
3435 err_ctx_hi:
3436 kernel_context_close(ctx_hi);
3437 err_spin_lo:
3438 igt_spinner_fini(&spin_lo);
3439 return err;
3440 }
3441
random_range(struct rnd_state * rnd,int min,int max)3442 static int random_range(struct rnd_state *rnd, int min, int max)
3443 {
3444 return i915_prandom_u32_max_state(max - min, rnd) + min;
3445 }
3446
random_priority(struct rnd_state * rnd)3447 static int random_priority(struct rnd_state *rnd)
3448 {
3449 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
3450 }
3451
3452 struct preempt_smoke {
3453 struct intel_gt *gt;
3454 struct i915_gem_context **contexts;
3455 struct intel_engine_cs *engine;
3456 struct drm_i915_gem_object *batch;
3457 unsigned int ncontext;
3458 struct rnd_state prng;
3459 unsigned long count;
3460 };
3461
smoke_context(struct preempt_smoke * smoke)3462 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
3463 {
3464 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
3465 &smoke->prng)];
3466 }
3467
smoke_submit(struct preempt_smoke * smoke,struct i915_gem_context * ctx,int prio,struct drm_i915_gem_object * batch)3468 static int smoke_submit(struct preempt_smoke *smoke,
3469 struct i915_gem_context *ctx, int prio,
3470 struct drm_i915_gem_object *batch)
3471 {
3472 struct i915_request *rq;
3473 struct i915_vma *vma = NULL;
3474 int err = 0;
3475
3476 if (batch) {
3477 struct i915_address_space *vm;
3478
3479 vm = i915_gem_context_get_vm_rcu(ctx);
3480 vma = i915_vma_instance(batch, vm, NULL);
3481 i915_vm_put(vm);
3482 if (IS_ERR(vma))
3483 return PTR_ERR(vma);
3484
3485 err = i915_vma_pin(vma, 0, 0, PIN_USER);
3486 if (err)
3487 return err;
3488 }
3489
3490 ctx->sched.priority = prio;
3491
3492 rq = igt_request_alloc(ctx, smoke->engine);
3493 if (IS_ERR(rq)) {
3494 err = PTR_ERR(rq);
3495 goto unpin;
3496 }
3497
3498 if (vma) {
3499 i915_vma_lock(vma);
3500 err = i915_request_await_object(rq, vma->obj, false);
3501 if (!err)
3502 err = i915_vma_move_to_active(vma, rq, 0);
3503 if (!err)
3504 err = rq->engine->emit_bb_start(rq,
3505 vma->node.start,
3506 PAGE_SIZE, 0);
3507 i915_vma_unlock(vma);
3508 }
3509
3510 i915_request_add(rq);
3511
3512 unpin:
3513 if (vma)
3514 i915_vma_unpin(vma);
3515
3516 return err;
3517 }
3518
smoke_crescendo_thread(void * arg)3519 static int smoke_crescendo_thread(void *arg)
3520 {
3521 struct preempt_smoke *smoke = arg;
3522 IGT_TIMEOUT(end_time);
3523 unsigned long count;
3524
3525 count = 0;
3526 do {
3527 struct i915_gem_context *ctx = smoke_context(smoke);
3528 int err;
3529
3530 err = smoke_submit(smoke,
3531 ctx, count % I915_PRIORITY_MAX,
3532 smoke->batch);
3533 if (err)
3534 return err;
3535
3536 count++;
3537 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3538
3539 smoke->count = count;
3540 return 0;
3541 }
3542
smoke_crescendo(struct preempt_smoke * smoke,unsigned int flags)3543 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
3544 #define BATCH BIT(0)
3545 {
3546 struct task_struct *tsk[I915_NUM_ENGINES] = {};
3547 struct preempt_smoke arg[I915_NUM_ENGINES];
3548 struct intel_engine_cs *engine;
3549 enum intel_engine_id id;
3550 unsigned long count;
3551 int err = 0;
3552
3553 for_each_engine(engine, smoke->gt, id) {
3554 arg[id] = *smoke;
3555 arg[id].engine = engine;
3556 if (!(flags & BATCH))
3557 arg[id].batch = NULL;
3558 arg[id].count = 0;
3559
3560 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
3561 "igt/smoke:%d", id);
3562 if (IS_ERR(tsk[id])) {
3563 err = PTR_ERR(tsk[id]);
3564 break;
3565 }
3566 get_task_struct(tsk[id]);
3567 }
3568
3569 yield(); /* start all threads before we kthread_stop() */
3570
3571 count = 0;
3572 for_each_engine(engine, smoke->gt, id) {
3573 int status;
3574
3575 if (IS_ERR_OR_NULL(tsk[id]))
3576 continue;
3577
3578 status = kthread_stop(tsk[id]);
3579 if (status && !err)
3580 err = status;
3581
3582 count += arg[id].count;
3583
3584 put_task_struct(tsk[id]);
3585 }
3586
3587 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
3588 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3589 return 0;
3590 }
3591
smoke_random(struct preempt_smoke * smoke,unsigned int flags)3592 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
3593 {
3594 enum intel_engine_id id;
3595 IGT_TIMEOUT(end_time);
3596 unsigned long count;
3597
3598 count = 0;
3599 do {
3600 for_each_engine(smoke->engine, smoke->gt, id) {
3601 struct i915_gem_context *ctx = smoke_context(smoke);
3602 int err;
3603
3604 err = smoke_submit(smoke,
3605 ctx, random_priority(&smoke->prng),
3606 flags & BATCH ? smoke->batch : NULL);
3607 if (err)
3608 return err;
3609
3610 count++;
3611 }
3612 } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
3613
3614 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3615 count, flags, smoke->gt->info.num_engines, smoke->ncontext);
3616 return 0;
3617 }
3618
live_preempt_smoke(void * arg)3619 static int live_preempt_smoke(void *arg)
3620 {
3621 struct preempt_smoke smoke = {
3622 .gt = arg,
3623 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3624 .ncontext = 256,
3625 };
3626 const unsigned int phase[] = { 0, BATCH };
3627 struct igt_live_test t;
3628 int err = -ENOMEM;
3629 u32 *cs;
3630 int n;
3631
3632 smoke.contexts = kmalloc_array(smoke.ncontext,
3633 sizeof(*smoke.contexts),
3634 GFP_KERNEL);
3635 if (!smoke.contexts)
3636 return -ENOMEM;
3637
3638 smoke.batch =
3639 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3640 if (IS_ERR(smoke.batch)) {
3641 err = PTR_ERR(smoke.batch);
3642 goto err_free;
3643 }
3644
3645 cs = i915_gem_object_pin_map_unlocked(smoke.batch, I915_MAP_WB);
3646 if (IS_ERR(cs)) {
3647 err = PTR_ERR(cs);
3648 goto err_batch;
3649 }
3650 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3651 cs[n] = MI_ARB_CHECK;
3652 cs[n] = MI_BATCH_BUFFER_END;
3653 i915_gem_object_flush_map(smoke.batch);
3654 i915_gem_object_unpin_map(smoke.batch);
3655
3656 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3657 err = -EIO;
3658 goto err_batch;
3659 }
3660
3661 for (n = 0; n < smoke.ncontext; n++) {
3662 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3663 if (!smoke.contexts[n])
3664 goto err_ctx;
3665 }
3666
3667 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3668 err = smoke_crescendo(&smoke, phase[n]);
3669 if (err)
3670 goto err_ctx;
3671
3672 err = smoke_random(&smoke, phase[n]);
3673 if (err)
3674 goto err_ctx;
3675 }
3676
3677 err_ctx:
3678 if (igt_live_test_end(&t))
3679 err = -EIO;
3680
3681 for (n = 0; n < smoke.ncontext; n++) {
3682 if (!smoke.contexts[n])
3683 break;
3684 kernel_context_close(smoke.contexts[n]);
3685 }
3686
3687 err_batch:
3688 i915_gem_object_put(smoke.batch);
3689 err_free:
3690 kfree(smoke.contexts);
3691
3692 return err;
3693 }
3694
nop_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int nctx,unsigned int flags)3695 static int nop_virtual_engine(struct intel_gt *gt,
3696 struct intel_engine_cs **siblings,
3697 unsigned int nsibling,
3698 unsigned int nctx,
3699 unsigned int flags)
3700 #define CHAIN BIT(0)
3701 {
3702 IGT_TIMEOUT(end_time);
3703 struct i915_request *request[16] = {};
3704 struct intel_context *ve[16];
3705 unsigned long n, prime, nc;
3706 struct igt_live_test t;
3707 ktime_t times[2] = {};
3708 int err;
3709
3710 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3711
3712 for (n = 0; n < nctx; n++) {
3713 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3714 if (IS_ERR(ve[n])) {
3715 err = PTR_ERR(ve[n]);
3716 nctx = n;
3717 goto out;
3718 }
3719
3720 err = intel_context_pin(ve[n]);
3721 if (err) {
3722 intel_context_put(ve[n]);
3723 nctx = n;
3724 goto out;
3725 }
3726 }
3727
3728 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3729 if (err)
3730 goto out;
3731
3732 for_each_prime_number_from(prime, 1, 8192) {
3733 times[1] = ktime_get_raw();
3734
3735 if (flags & CHAIN) {
3736 for (nc = 0; nc < nctx; nc++) {
3737 for (n = 0; n < prime; n++) {
3738 struct i915_request *rq;
3739
3740 rq = i915_request_create(ve[nc]);
3741 if (IS_ERR(rq)) {
3742 err = PTR_ERR(rq);
3743 goto out;
3744 }
3745
3746 if (request[nc])
3747 i915_request_put(request[nc]);
3748 request[nc] = i915_request_get(rq);
3749 i915_request_add(rq);
3750 }
3751 }
3752 } else {
3753 for (n = 0; n < prime; n++) {
3754 for (nc = 0; nc < nctx; nc++) {
3755 struct i915_request *rq;
3756
3757 rq = i915_request_create(ve[nc]);
3758 if (IS_ERR(rq)) {
3759 err = PTR_ERR(rq);
3760 goto out;
3761 }
3762
3763 if (request[nc])
3764 i915_request_put(request[nc]);
3765 request[nc] = i915_request_get(rq);
3766 i915_request_add(rq);
3767 }
3768 }
3769 }
3770
3771 for (nc = 0; nc < nctx; nc++) {
3772 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3773 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3774 __func__, ve[0]->engine->name,
3775 request[nc]->fence.context,
3776 request[nc]->fence.seqno);
3777
3778 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3779 __func__, ve[0]->engine->name,
3780 request[nc]->fence.context,
3781 request[nc]->fence.seqno);
3782 GEM_TRACE_DUMP();
3783 intel_gt_set_wedged(gt);
3784 break;
3785 }
3786 }
3787
3788 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3789 if (prime == 1)
3790 times[0] = times[1];
3791
3792 for (nc = 0; nc < nctx; nc++) {
3793 i915_request_put(request[nc]);
3794 request[nc] = NULL;
3795 }
3796
3797 if (__igt_timeout(end_time, NULL))
3798 break;
3799 }
3800
3801 err = igt_live_test_end(&t);
3802 if (err)
3803 goto out;
3804
3805 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3806 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3807 prime, div64_u64(ktime_to_ns(times[1]), prime));
3808
3809 out:
3810 if (igt_flush_test(gt->i915))
3811 err = -EIO;
3812
3813 for (nc = 0; nc < nctx; nc++) {
3814 i915_request_put(request[nc]);
3815 intel_context_unpin(ve[nc]);
3816 intel_context_put(ve[nc]);
3817 }
3818 return err;
3819 }
3820
3821 static unsigned int
__select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,bool (* filter)(const struct intel_engine_cs *))3822 __select_siblings(struct intel_gt *gt,
3823 unsigned int class,
3824 struct intel_engine_cs **siblings,
3825 bool (*filter)(const struct intel_engine_cs *))
3826 {
3827 unsigned int n = 0;
3828 unsigned int inst;
3829
3830 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3831 if (!gt->engine_class[class][inst])
3832 continue;
3833
3834 if (filter && !filter(gt->engine_class[class][inst]))
3835 continue;
3836
3837 siblings[n++] = gt->engine_class[class][inst];
3838 }
3839
3840 return n;
3841 }
3842
3843 static unsigned int
select_siblings(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings)3844 select_siblings(struct intel_gt *gt,
3845 unsigned int class,
3846 struct intel_engine_cs **siblings)
3847 {
3848 return __select_siblings(gt, class, siblings, NULL);
3849 }
3850
live_virtual_engine(void * arg)3851 static int live_virtual_engine(void *arg)
3852 {
3853 struct intel_gt *gt = arg;
3854 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3855 struct intel_engine_cs *engine;
3856 enum intel_engine_id id;
3857 unsigned int class;
3858 int err;
3859
3860 if (intel_uc_uses_guc_submission(>->uc))
3861 return 0;
3862
3863 for_each_engine(engine, gt, id) {
3864 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3865 if (err) {
3866 pr_err("Failed to wrap engine %s: err=%d\n",
3867 engine->name, err);
3868 return err;
3869 }
3870 }
3871
3872 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3873 int nsibling, n;
3874
3875 nsibling = select_siblings(gt, class, siblings);
3876 if (nsibling < 2)
3877 continue;
3878
3879 for (n = 1; n <= nsibling + 1; n++) {
3880 err = nop_virtual_engine(gt, siblings, nsibling,
3881 n, 0);
3882 if (err)
3883 return err;
3884 }
3885
3886 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3887 if (err)
3888 return err;
3889 }
3890
3891 return 0;
3892 }
3893
mask_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)3894 static int mask_virtual_engine(struct intel_gt *gt,
3895 struct intel_engine_cs **siblings,
3896 unsigned int nsibling)
3897 {
3898 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3899 struct intel_context *ve;
3900 struct igt_live_test t;
3901 unsigned int n;
3902 int err;
3903
3904 /*
3905 * Check that by setting the execution mask on a request, we can
3906 * restrict it to our desired engine within the virtual engine.
3907 */
3908
3909 ve = intel_execlists_create_virtual(siblings, nsibling);
3910 if (IS_ERR(ve)) {
3911 err = PTR_ERR(ve);
3912 goto out_close;
3913 }
3914
3915 err = intel_context_pin(ve);
3916 if (err)
3917 goto out_put;
3918
3919 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3920 if (err)
3921 goto out_unpin;
3922
3923 for (n = 0; n < nsibling; n++) {
3924 request[n] = i915_request_create(ve);
3925 if (IS_ERR(request[n])) {
3926 err = PTR_ERR(request[n]);
3927 nsibling = n;
3928 goto out;
3929 }
3930
3931 /* Reverse order as it's more likely to be unnatural */
3932 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3933
3934 i915_request_get(request[n]);
3935 i915_request_add(request[n]);
3936 }
3937
3938 for (n = 0; n < nsibling; n++) {
3939 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3940 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3941 __func__, ve->engine->name,
3942 request[n]->fence.context,
3943 request[n]->fence.seqno);
3944
3945 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3946 __func__, ve->engine->name,
3947 request[n]->fence.context,
3948 request[n]->fence.seqno);
3949 GEM_TRACE_DUMP();
3950 intel_gt_set_wedged(gt);
3951 err = -EIO;
3952 goto out;
3953 }
3954
3955 if (request[n]->engine != siblings[nsibling - n - 1]) {
3956 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3957 request[n]->engine->name,
3958 siblings[nsibling - n - 1]->name);
3959 err = -EINVAL;
3960 goto out;
3961 }
3962 }
3963
3964 err = igt_live_test_end(&t);
3965 out:
3966 if (igt_flush_test(gt->i915))
3967 err = -EIO;
3968
3969 for (n = 0; n < nsibling; n++)
3970 i915_request_put(request[n]);
3971
3972 out_unpin:
3973 intel_context_unpin(ve);
3974 out_put:
3975 intel_context_put(ve);
3976 out_close:
3977 return err;
3978 }
3979
live_virtual_mask(void * arg)3980 static int live_virtual_mask(void *arg)
3981 {
3982 struct intel_gt *gt = arg;
3983 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3984 unsigned int class;
3985 int err;
3986
3987 if (intel_uc_uses_guc_submission(>->uc))
3988 return 0;
3989
3990 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3991 unsigned int nsibling;
3992
3993 nsibling = select_siblings(gt, class, siblings);
3994 if (nsibling < 2)
3995 continue;
3996
3997 err = mask_virtual_engine(gt, siblings, nsibling);
3998 if (err)
3999 return err;
4000 }
4001
4002 return 0;
4003 }
4004
slicein_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4005 static int slicein_virtual_engine(struct intel_gt *gt,
4006 struct intel_engine_cs **siblings,
4007 unsigned int nsibling)
4008 {
4009 const long timeout = slice_timeout(siblings[0]);
4010 struct intel_context *ce;
4011 struct i915_request *rq;
4012 struct igt_spinner spin;
4013 unsigned int n;
4014 int err = 0;
4015
4016 /*
4017 * Virtual requests must take part in timeslicing on the target engines.
4018 */
4019
4020 if (igt_spinner_init(&spin, gt))
4021 return -ENOMEM;
4022
4023 for (n = 0; n < nsibling; n++) {
4024 ce = intel_context_create(siblings[n]);
4025 if (IS_ERR(ce)) {
4026 err = PTR_ERR(ce);
4027 goto out;
4028 }
4029
4030 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4031 intel_context_put(ce);
4032 if (IS_ERR(rq)) {
4033 err = PTR_ERR(rq);
4034 goto out;
4035 }
4036
4037 i915_request_add(rq);
4038 }
4039
4040 ce = intel_execlists_create_virtual(siblings, nsibling);
4041 if (IS_ERR(ce)) {
4042 err = PTR_ERR(ce);
4043 goto out;
4044 }
4045
4046 rq = intel_context_create_request(ce);
4047 intel_context_put(ce);
4048 if (IS_ERR(rq)) {
4049 err = PTR_ERR(rq);
4050 goto out;
4051 }
4052
4053 i915_request_get(rq);
4054 i915_request_add(rq);
4055 if (i915_request_wait(rq, 0, timeout) < 0) {
4056 GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n",
4057 __func__, rq->engine->name);
4058 GEM_TRACE_DUMP();
4059 intel_gt_set_wedged(gt);
4060 err = -EIO;
4061 }
4062 i915_request_put(rq);
4063
4064 out:
4065 igt_spinner_end(&spin);
4066 if (igt_flush_test(gt->i915))
4067 err = -EIO;
4068 igt_spinner_fini(&spin);
4069 return err;
4070 }
4071
sliceout_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4072 static int sliceout_virtual_engine(struct intel_gt *gt,
4073 struct intel_engine_cs **siblings,
4074 unsigned int nsibling)
4075 {
4076 const long timeout = slice_timeout(siblings[0]);
4077 struct intel_context *ce;
4078 struct i915_request *rq;
4079 struct igt_spinner spin;
4080 unsigned int n;
4081 int err = 0;
4082
4083 /*
4084 * Virtual requests must allow others a fair timeslice.
4085 */
4086
4087 if (igt_spinner_init(&spin, gt))
4088 return -ENOMEM;
4089
4090 /* XXX We do not handle oversubscription and fairness with normal rq */
4091 for (n = 0; n < nsibling; n++) {
4092 ce = intel_execlists_create_virtual(siblings, nsibling);
4093 if (IS_ERR(ce)) {
4094 err = PTR_ERR(ce);
4095 goto out;
4096 }
4097
4098 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
4099 intel_context_put(ce);
4100 if (IS_ERR(rq)) {
4101 err = PTR_ERR(rq);
4102 goto out;
4103 }
4104
4105 i915_request_add(rq);
4106 }
4107
4108 for (n = 0; !err && n < nsibling; n++) {
4109 ce = intel_context_create(siblings[n]);
4110 if (IS_ERR(ce)) {
4111 err = PTR_ERR(ce);
4112 goto out;
4113 }
4114
4115 rq = intel_context_create_request(ce);
4116 intel_context_put(ce);
4117 if (IS_ERR(rq)) {
4118 err = PTR_ERR(rq);
4119 goto out;
4120 }
4121
4122 i915_request_get(rq);
4123 i915_request_add(rq);
4124 if (i915_request_wait(rq, 0, timeout) < 0) {
4125 GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n",
4126 __func__, siblings[n]->name);
4127 GEM_TRACE_DUMP();
4128 intel_gt_set_wedged(gt);
4129 err = -EIO;
4130 }
4131 i915_request_put(rq);
4132 }
4133
4134 out:
4135 igt_spinner_end(&spin);
4136 if (igt_flush_test(gt->i915))
4137 err = -EIO;
4138 igt_spinner_fini(&spin);
4139 return err;
4140 }
4141
live_virtual_slice(void * arg)4142 static int live_virtual_slice(void *arg)
4143 {
4144 struct intel_gt *gt = arg;
4145 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4146 unsigned int class;
4147 int err;
4148
4149 if (intel_uc_uses_guc_submission(>->uc))
4150 return 0;
4151
4152 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4153 unsigned int nsibling;
4154
4155 nsibling = __select_siblings(gt, class, siblings,
4156 intel_engine_has_timeslices);
4157 if (nsibling < 2)
4158 continue;
4159
4160 err = slicein_virtual_engine(gt, siblings, nsibling);
4161 if (err)
4162 return err;
4163
4164 err = sliceout_virtual_engine(gt, siblings, nsibling);
4165 if (err)
4166 return err;
4167 }
4168
4169 return 0;
4170 }
4171
preserved_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4172 static int preserved_virtual_engine(struct intel_gt *gt,
4173 struct intel_engine_cs **siblings,
4174 unsigned int nsibling)
4175 {
4176 struct i915_request *last = NULL;
4177 struct intel_context *ve;
4178 struct i915_vma *scratch;
4179 struct igt_live_test t;
4180 unsigned int n;
4181 int err = 0;
4182 u32 *cs;
4183
4184 scratch =
4185 __vm_create_scratch_for_read_pinned(&siblings[0]->gt->ggtt->vm,
4186 PAGE_SIZE);
4187 if (IS_ERR(scratch))
4188 return PTR_ERR(scratch);
4189
4190 err = i915_vma_sync(scratch);
4191 if (err)
4192 goto out_scratch;
4193
4194 ve = intel_execlists_create_virtual(siblings, nsibling);
4195 if (IS_ERR(ve)) {
4196 err = PTR_ERR(ve);
4197 goto out_scratch;
4198 }
4199
4200 err = intel_context_pin(ve);
4201 if (err)
4202 goto out_put;
4203
4204 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
4205 if (err)
4206 goto out_unpin;
4207
4208 for (n = 0; n < NUM_GPR_DW; n++) {
4209 struct intel_engine_cs *engine = siblings[n % nsibling];
4210 struct i915_request *rq;
4211
4212 rq = i915_request_create(ve);
4213 if (IS_ERR(rq)) {
4214 err = PTR_ERR(rq);
4215 goto out_end;
4216 }
4217
4218 i915_request_put(last);
4219 last = i915_request_get(rq);
4220
4221 cs = intel_ring_begin(rq, 8);
4222 if (IS_ERR(cs)) {
4223 i915_request_add(rq);
4224 err = PTR_ERR(cs);
4225 goto out_end;
4226 }
4227
4228 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4229 *cs++ = CS_GPR(engine, n);
4230 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4231 *cs++ = 0;
4232
4233 *cs++ = MI_LOAD_REGISTER_IMM(1);
4234 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
4235 *cs++ = n + 1;
4236
4237 *cs++ = MI_NOOP;
4238 intel_ring_advance(rq, cs);
4239
4240 /* Restrict this request to run on a particular engine */
4241 rq->execution_mask = engine->mask;
4242 i915_request_add(rq);
4243 }
4244
4245 if (i915_request_wait(last, 0, HZ / 5) < 0) {
4246 err = -ETIME;
4247 goto out_end;
4248 }
4249
4250 cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
4251 if (IS_ERR(cs)) {
4252 err = PTR_ERR(cs);
4253 goto out_end;
4254 }
4255
4256 for (n = 0; n < NUM_GPR_DW; n++) {
4257 if (cs[n] != n) {
4258 pr_err("Incorrect value[%d] found for GPR[%d]\n",
4259 cs[n], n);
4260 err = -EINVAL;
4261 break;
4262 }
4263 }
4264
4265 i915_gem_object_unpin_map(scratch->obj);
4266
4267 out_end:
4268 if (igt_live_test_end(&t))
4269 err = -EIO;
4270 i915_request_put(last);
4271 out_unpin:
4272 intel_context_unpin(ve);
4273 out_put:
4274 intel_context_put(ve);
4275 out_scratch:
4276 i915_vma_unpin_and_release(&scratch, 0);
4277 return err;
4278 }
4279
live_virtual_preserved(void * arg)4280 static int live_virtual_preserved(void *arg)
4281 {
4282 struct intel_gt *gt = arg;
4283 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4284 unsigned int class;
4285
4286 /*
4287 * Check that the context image retains non-privileged (user) registers
4288 * from one engine to the next. For this we check that the CS_GPR
4289 * are preserved.
4290 */
4291
4292 if (intel_uc_uses_guc_submission(>->uc))
4293 return 0;
4294
4295 /* As we use CS_GPR we cannot run before they existed on all engines. */
4296 if (INTEL_GEN(gt->i915) < 9)
4297 return 0;
4298
4299 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4300 int nsibling, err;
4301
4302 nsibling = select_siblings(gt, class, siblings);
4303 if (nsibling < 2)
4304 continue;
4305
4306 err = preserved_virtual_engine(gt, siblings, nsibling);
4307 if (err)
4308 return err;
4309 }
4310
4311 return 0;
4312 }
4313
bond_virtual_engine(struct intel_gt * gt,unsigned int class,struct intel_engine_cs ** siblings,unsigned int nsibling,unsigned int flags)4314 static int bond_virtual_engine(struct intel_gt *gt,
4315 unsigned int class,
4316 struct intel_engine_cs **siblings,
4317 unsigned int nsibling,
4318 unsigned int flags)
4319 #define BOND_SCHEDULE BIT(0)
4320 {
4321 struct intel_engine_cs *master;
4322 struct i915_request *rq[16];
4323 enum intel_engine_id id;
4324 struct igt_spinner spin;
4325 unsigned long n;
4326 int err;
4327
4328 /*
4329 * A set of bonded requests is intended to be run concurrently
4330 * across a number of engines. We use one request per-engine
4331 * and a magic fence to schedule each of the bonded requests
4332 * at the same time. A consequence of our current scheduler is that
4333 * we only move requests to the HW ready queue when the request
4334 * becomes ready, that is when all of its prerequisite fences have
4335 * been signaled. As one of those fences is the master submit fence,
4336 * there is a delay on all secondary fences as the HW may be
4337 * currently busy. Equally, as all the requests are independent,
4338 * they may have other fences that delay individual request
4339 * submission to HW. Ergo, we do not guarantee that all requests are
4340 * immediately submitted to HW at the same time, just that if the
4341 * rules are abided by, they are ready at the same time as the
4342 * first is submitted. Userspace can embed semaphores in its batch
4343 * to ensure parallel execution of its phases as it requires.
4344 * Though naturally it gets requested that perhaps the scheduler should
4345 * take care of parallel execution, even across preemption events on
4346 * different HW. (The proper answer is of course "lalalala".)
4347 *
4348 * With the submit-fence, we have identified three possible phases
4349 * of synchronisation depending on the master fence: queued (not
4350 * ready), executing, and signaled. The first two are quite simple
4351 * and checked below. However, the signaled master fence handling is
4352 * contentious. Currently we do not distinguish between a signaled
4353 * fence and an expired fence, as once signaled it does not convey
4354 * any information about the previous execution. It may even be freed
4355 * and hence checking later it may not exist at all. Ergo we currently
4356 * do not apply the bonding constraint for an already signaled fence,
4357 * as our expectation is that it should not constrain the secondaries
4358 * and is outside of the scope of the bonded request API (i.e. all
4359 * userspace requests are meant to be running in parallel). As
4360 * it imposes no constraint, and is effectively a no-op, we do not
4361 * check below as normal execution flows are checked extensively above.
4362 *
4363 * XXX Is the degenerate handling of signaled submit fences the
4364 * expected behaviour for userpace?
4365 */
4366
4367 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
4368
4369 if (igt_spinner_init(&spin, gt))
4370 return -ENOMEM;
4371
4372 err = 0;
4373 rq[0] = ERR_PTR(-ENOMEM);
4374 for_each_engine(master, gt, id) {
4375 struct i915_sw_fence fence = {};
4376 struct intel_context *ce;
4377
4378 if (master->class == class)
4379 continue;
4380
4381 ce = intel_context_create(master);
4382 if (IS_ERR(ce)) {
4383 err = PTR_ERR(ce);
4384 goto out;
4385 }
4386
4387 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
4388
4389 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
4390 intel_context_put(ce);
4391 if (IS_ERR(rq[0])) {
4392 err = PTR_ERR(rq[0]);
4393 goto out;
4394 }
4395 i915_request_get(rq[0]);
4396
4397 if (flags & BOND_SCHEDULE) {
4398 onstack_fence_init(&fence);
4399 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
4400 &fence,
4401 GFP_KERNEL);
4402 }
4403
4404 i915_request_add(rq[0]);
4405 if (err < 0)
4406 goto out;
4407
4408 if (!(flags & BOND_SCHEDULE) &&
4409 !igt_wait_for_spinner(&spin, rq[0])) {
4410 err = -EIO;
4411 goto out;
4412 }
4413
4414 for (n = 0; n < nsibling; n++) {
4415 struct intel_context *ve;
4416
4417 ve = intel_execlists_create_virtual(siblings, nsibling);
4418 if (IS_ERR(ve)) {
4419 err = PTR_ERR(ve);
4420 onstack_fence_fini(&fence);
4421 goto out;
4422 }
4423
4424 err = intel_virtual_engine_attach_bond(ve->engine,
4425 master,
4426 siblings[n]);
4427 if (err) {
4428 intel_context_put(ve);
4429 onstack_fence_fini(&fence);
4430 goto out;
4431 }
4432
4433 err = intel_context_pin(ve);
4434 intel_context_put(ve);
4435 if (err) {
4436 onstack_fence_fini(&fence);
4437 goto out;
4438 }
4439
4440 rq[n + 1] = i915_request_create(ve);
4441 intel_context_unpin(ve);
4442 if (IS_ERR(rq[n + 1])) {
4443 err = PTR_ERR(rq[n + 1]);
4444 onstack_fence_fini(&fence);
4445 goto out;
4446 }
4447 i915_request_get(rq[n + 1]);
4448
4449 err = i915_request_await_execution(rq[n + 1],
4450 &rq[0]->fence,
4451 ve->engine->bond_execute);
4452 i915_request_add(rq[n + 1]);
4453 if (err < 0) {
4454 onstack_fence_fini(&fence);
4455 goto out;
4456 }
4457 }
4458 onstack_fence_fini(&fence);
4459 intel_engine_flush_submission(master);
4460 igt_spinner_end(&spin);
4461
4462 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
4463 pr_err("Master request did not execute (on %s)!\n",
4464 rq[0]->engine->name);
4465 err = -EIO;
4466 goto out;
4467 }
4468
4469 for (n = 0; n < nsibling; n++) {
4470 if (i915_request_wait(rq[n + 1], 0,
4471 MAX_SCHEDULE_TIMEOUT) < 0) {
4472 err = -EIO;
4473 goto out;
4474 }
4475
4476 if (rq[n + 1]->engine != siblings[n]) {
4477 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
4478 siblings[n]->name,
4479 rq[n + 1]->engine->name,
4480 rq[0]->engine->name);
4481 err = -EINVAL;
4482 goto out;
4483 }
4484 }
4485
4486 for (n = 0; !IS_ERR(rq[n]); n++)
4487 i915_request_put(rq[n]);
4488 rq[0] = ERR_PTR(-ENOMEM);
4489 }
4490
4491 out:
4492 for (n = 0; !IS_ERR(rq[n]); n++)
4493 i915_request_put(rq[n]);
4494 if (igt_flush_test(gt->i915))
4495 err = -EIO;
4496
4497 igt_spinner_fini(&spin);
4498 return err;
4499 }
4500
live_virtual_bond(void * arg)4501 static int live_virtual_bond(void *arg)
4502 {
4503 static const struct phase {
4504 const char *name;
4505 unsigned int flags;
4506 } phases[] = {
4507 { "", 0 },
4508 { "schedule", BOND_SCHEDULE },
4509 { },
4510 };
4511 struct intel_gt *gt = arg;
4512 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4513 unsigned int class;
4514 int err;
4515
4516 if (intel_uc_uses_guc_submission(>->uc))
4517 return 0;
4518
4519 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4520 const struct phase *p;
4521 int nsibling;
4522
4523 nsibling = select_siblings(gt, class, siblings);
4524 if (nsibling < 2)
4525 continue;
4526
4527 for (p = phases; p->name; p++) {
4528 err = bond_virtual_engine(gt,
4529 class, siblings, nsibling,
4530 p->flags);
4531 if (err) {
4532 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
4533 __func__, p->name, class, nsibling, err);
4534 return err;
4535 }
4536 }
4537 }
4538
4539 return 0;
4540 }
4541
reset_virtual_engine(struct intel_gt * gt,struct intel_engine_cs ** siblings,unsigned int nsibling)4542 static int reset_virtual_engine(struct intel_gt *gt,
4543 struct intel_engine_cs **siblings,
4544 unsigned int nsibling)
4545 {
4546 struct intel_engine_cs *engine;
4547 struct intel_context *ve;
4548 struct igt_spinner spin;
4549 struct i915_request *rq;
4550 unsigned int n;
4551 int err = 0;
4552
4553 /*
4554 * In order to support offline error capture for fast preempt reset,
4555 * we need to decouple the guilty request and ensure that it and its
4556 * descendents are not executed while the capture is in progress.
4557 */
4558
4559 if (igt_spinner_init(&spin, gt))
4560 return -ENOMEM;
4561
4562 ve = intel_execlists_create_virtual(siblings, nsibling);
4563 if (IS_ERR(ve)) {
4564 err = PTR_ERR(ve);
4565 goto out_spin;
4566 }
4567
4568 for (n = 0; n < nsibling; n++)
4569 st_engine_heartbeat_disable(siblings[n]);
4570
4571 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
4572 if (IS_ERR(rq)) {
4573 err = PTR_ERR(rq);
4574 goto out_heartbeat;
4575 }
4576 i915_request_add(rq);
4577
4578 if (!igt_wait_for_spinner(&spin, rq)) {
4579 intel_gt_set_wedged(gt);
4580 err = -ETIME;
4581 goto out_heartbeat;
4582 }
4583
4584 engine = rq->engine;
4585 GEM_BUG_ON(engine == ve->engine);
4586
4587 /* Take ownership of the reset and tasklet */
4588 local_bh_disable();
4589 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
4590 >->reset.flags)) {
4591 local_bh_enable();
4592 intel_gt_set_wedged(gt);
4593 err = -EBUSY;
4594 goto out_heartbeat;
4595 }
4596 tasklet_disable(&engine->execlists.tasklet);
4597
4598 engine->execlists.tasklet.callback(&engine->execlists.tasklet);
4599 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
4600
4601 /* Fake a preemption event; failed of course */
4602 spin_lock_irq(&engine->active.lock);
4603 __unwind_incomplete_requests(engine);
4604 spin_unlock_irq(&engine->active.lock);
4605 GEM_BUG_ON(rq->engine != engine);
4606
4607 /* Reset the engine while keeping our active request on hold */
4608 execlists_hold(engine, rq);
4609 GEM_BUG_ON(!i915_request_on_hold(rq));
4610
4611 __intel_engine_reset_bh(engine, NULL);
4612 GEM_BUG_ON(rq->fence.error != -EIO);
4613
4614 /* Release our grasp on the engine, letting CS flow again */
4615 tasklet_enable(&engine->execlists.tasklet);
4616 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
4617 local_bh_enable();
4618
4619 /* Check that we do not resubmit the held request */
4620 i915_request_get(rq);
4621 if (!i915_request_wait(rq, 0, HZ / 5)) {
4622 pr_err("%s: on hold request completed!\n",
4623 engine->name);
4624 intel_gt_set_wedged(gt);
4625 err = -EIO;
4626 goto out_rq;
4627 }
4628 GEM_BUG_ON(!i915_request_on_hold(rq));
4629
4630 /* But is resubmitted on release */
4631 execlists_unhold(engine, rq);
4632 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4633 pr_err("%s: held request did not complete!\n",
4634 engine->name);
4635 intel_gt_set_wedged(gt);
4636 err = -ETIME;
4637 }
4638
4639 out_rq:
4640 i915_request_put(rq);
4641 out_heartbeat:
4642 for (n = 0; n < nsibling; n++)
4643 st_engine_heartbeat_enable(siblings[n]);
4644
4645 intel_context_put(ve);
4646 out_spin:
4647 igt_spinner_fini(&spin);
4648 return err;
4649 }
4650
live_virtual_reset(void * arg)4651 static int live_virtual_reset(void *arg)
4652 {
4653 struct intel_gt *gt = arg;
4654 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
4655 unsigned int class;
4656
4657 /*
4658 * Check that we handle a reset event within a virtual engine.
4659 * Only the physical engine is reset, but we have to check the flow
4660 * of the virtual requests around the reset, and make sure it is not
4661 * forgotten.
4662 */
4663
4664 if (intel_uc_uses_guc_submission(>->uc))
4665 return 0;
4666
4667 if (!intel_has_reset_engine(gt))
4668 return 0;
4669
4670 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
4671 int nsibling, err;
4672
4673 nsibling = select_siblings(gt, class, siblings);
4674 if (nsibling < 2)
4675 continue;
4676
4677 err = reset_virtual_engine(gt, siblings, nsibling);
4678 if (err)
4679 return err;
4680 }
4681
4682 return 0;
4683 }
4684
intel_execlists_live_selftests(struct drm_i915_private * i915)4685 int intel_execlists_live_selftests(struct drm_i915_private *i915)
4686 {
4687 static const struct i915_subtest tests[] = {
4688 SUBTEST(live_sanitycheck),
4689 SUBTEST(live_unlite_switch),
4690 SUBTEST(live_unlite_preempt),
4691 SUBTEST(live_unlite_ring),
4692 SUBTEST(live_pin_rewind),
4693 SUBTEST(live_hold_reset),
4694 SUBTEST(live_error_interrupt),
4695 SUBTEST(live_timeslice_preempt),
4696 SUBTEST(live_timeslice_rewind),
4697 SUBTEST(live_timeslice_queue),
4698 SUBTEST(live_timeslice_nopreempt),
4699 SUBTEST(live_busywait_preempt),
4700 SUBTEST(live_preempt),
4701 SUBTEST(live_late_preempt),
4702 SUBTEST(live_nopreempt),
4703 SUBTEST(live_preempt_cancel),
4704 SUBTEST(live_suppress_self_preempt),
4705 SUBTEST(live_chain_preempt),
4706 SUBTEST(live_preempt_ring),
4707 SUBTEST(live_preempt_gang),
4708 SUBTEST(live_preempt_timeout),
4709 SUBTEST(live_preempt_user),
4710 SUBTEST(live_preempt_smoke),
4711 SUBTEST(live_virtual_engine),
4712 SUBTEST(live_virtual_mask),
4713 SUBTEST(live_virtual_preserved),
4714 SUBTEST(live_virtual_slice),
4715 SUBTEST(live_virtual_bond),
4716 SUBTEST(live_virtual_reset),
4717 };
4718
4719 if (!HAS_EXECLISTS(i915))
4720 return 0;
4721
4722 if (intel_gt_is_wedged(&i915->gt))
4723 return 0;
4724
4725 return intel_gt_live_subtests(tests, &i915->gt);
4726 }
4727