xref: /linux/drivers/gpu/drm/i915/gt/selftest_lrc.c (revision 021bc4b9)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/prime_numbers.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "i915_selftest.h"
11 #include "intel_engine_heartbeat.h"
12 #include "intel_engine_pm.h"
13 #include "intel_reset.h"
14 #include "intel_ring.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_spinner.h"
20 #include "selftests/lib_sw_fence.h"
21 #include "shmem_utils.h"
22 
23 #include "gem/selftests/igt_gem_utils.h"
24 #include "gem/selftests/mock_context.h"
25 
26 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
27 #define NUM_GPR 16
28 #define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
29 
30 #define LRI_HEADER MI_INSTR(0x22, 0)
31 #define LRI_LENGTH_MASK GENMASK(7, 0)
32 
33 static struct i915_vma *create_scratch(struct intel_gt *gt)
34 {
35 	return __vm_create_scratch_for_read_pinned(&gt->ggtt->vm, PAGE_SIZE);
36 }
37 
38 static bool is_active(struct i915_request *rq)
39 {
40 	if (i915_request_is_active(rq))
41 		return true;
42 
43 	if (i915_request_on_hold(rq))
44 		return true;
45 
46 	if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
47 		return true;
48 
49 	return false;
50 }
51 
52 static int wait_for_submit(struct intel_engine_cs *engine,
53 			   struct i915_request *rq,
54 			   unsigned long timeout)
55 {
56 	/* Ignore our own attempts to suppress excess tasklets */
57 	tasklet_hi_schedule(&engine->sched_engine->tasklet);
58 
59 	timeout += jiffies;
60 	do {
61 		bool done = time_after(jiffies, timeout);
62 
63 		if (i915_request_completed(rq)) /* that was quick! */
64 			return 0;
65 
66 		/* Wait until the HW has acknowleged the submission (or err) */
67 		intel_engine_flush_submission(engine);
68 		if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
69 			return 0;
70 
71 		if (done)
72 			return -ETIME;
73 
74 		cond_resched();
75 	} while (1);
76 }
77 
78 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
79 {
80 	const u32 offset =
81 		i915_ggtt_offset(ce->engine->status_page.vma) +
82 		offset_in_page(slot);
83 	struct i915_request *rq;
84 	u32 *cs;
85 
86 	rq = intel_context_create_request(ce);
87 	if (IS_ERR(rq))
88 		return PTR_ERR(rq);
89 
90 	cs = intel_ring_begin(rq, 4);
91 	if (IS_ERR(cs)) {
92 		i915_request_add(rq);
93 		return PTR_ERR(cs);
94 	}
95 
96 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
97 	*cs++ = offset;
98 	*cs++ = 0;
99 	*cs++ = 1;
100 
101 	intel_ring_advance(rq, cs);
102 
103 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
104 	i915_request_add(rq);
105 	return 0;
106 }
107 
108 static int context_flush(struct intel_context *ce, long timeout)
109 {
110 	struct i915_request *rq;
111 	struct dma_fence *fence;
112 	int err = 0;
113 
114 	rq = intel_engine_create_kernel_request(ce->engine);
115 	if (IS_ERR(rq))
116 		return PTR_ERR(rq);
117 
118 	fence = i915_active_fence_get(&ce->timeline->last_request);
119 	if (fence) {
120 		i915_request_await_dma_fence(rq, fence);
121 		dma_fence_put(fence);
122 	}
123 
124 	rq = i915_request_get(rq);
125 	i915_request_add(rq);
126 	if (i915_request_wait(rq, 0, timeout) < 0)
127 		err = -ETIME;
128 	i915_request_put(rq);
129 
130 	rmb(); /* We know the request is written, make sure all state is too! */
131 	return err;
132 }
133 
134 static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
135 {
136 	if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
137 		return ~0u;
138 
139 	if (GRAPHICS_VER(engine->i915) < 12)
140 		return 0xfff;
141 
142 	switch (engine->class) {
143 	default:
144 	case RENDER_CLASS:
145 	case COMPUTE_CLASS:
146 		return 0x07ff;
147 	case COPY_ENGINE_CLASS:
148 		return 0x0fff;
149 	case VIDEO_DECODE_CLASS:
150 	case VIDEO_ENHANCEMENT_CLASS:
151 		return 0x3fff;
152 	}
153 }
154 
155 static int live_lrc_layout(void *arg)
156 {
157 	struct intel_gt *gt = arg;
158 	struct intel_engine_cs *engine;
159 	enum intel_engine_id id;
160 	u32 *lrc;
161 	int err;
162 
163 	/*
164 	 * Check the registers offsets we use to create the initial reg state
165 	 * match the layout saved by HW.
166 	 */
167 
168 	lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
169 	if (!lrc)
170 		return -ENOMEM;
171 	GEM_BUG_ON(offset_in_page(lrc));
172 
173 	err = 0;
174 	for_each_engine(engine, gt, id) {
175 		u32 *hw;
176 		int dw;
177 
178 		if (!engine->default_state)
179 			continue;
180 
181 		hw = shmem_pin_map(engine->default_state);
182 		if (!hw) {
183 			err = -ENOMEM;
184 			break;
185 		}
186 		hw += LRC_STATE_OFFSET / sizeof(*hw);
187 
188 		__lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
189 				engine->kernel_context, engine, true);
190 
191 		dw = 0;
192 		do {
193 			u32 lri = READ_ONCE(hw[dw]);
194 			u32 lri_mask;
195 
196 			if (lri == 0) {
197 				dw++;
198 				continue;
199 			}
200 
201 			if (lrc[dw] == 0) {
202 				pr_debug("%s: skipped instruction %x at dword %d\n",
203 					 engine->name, lri, dw);
204 				dw++;
205 				continue;
206 			}
207 
208 			if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
209 				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
210 				       engine->name, dw, lri);
211 				err = -EINVAL;
212 				break;
213 			}
214 
215 			if (lrc[dw] != lri) {
216 				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
217 				       engine->name, dw, lri, lrc[dw]);
218 				err = -EINVAL;
219 				break;
220 			}
221 
222 			/*
223 			 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
224 			 * opcode is set on Gen12+ devices, HW does not
225 			 * care about certain register address offsets, and
226 			 * instead check the following for valid address
227 			 * ranges on specific engines:
228 			 * RCS && CCS: BITS(0 - 10)
229 			 * BCS: BITS(0 - 11)
230 			 * VECS && VCS: BITS(0 - 13)
231 			 */
232 			lri_mask = get_lri_mask(engine, lri);
233 
234 			lri &= 0x7f;
235 			lri++;
236 			dw++;
237 
238 			while (lri) {
239 				u32 offset = READ_ONCE(hw[dw]);
240 
241 				if ((offset ^ lrc[dw]) & lri_mask) {
242 					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
243 					       engine->name, dw, offset, lrc[dw]);
244 					err = -EINVAL;
245 					break;
246 				}
247 
248 				/*
249 				 * Skip over the actual register value as we
250 				 * expect that to differ.
251 				 */
252 				dw += 2;
253 				lri -= 2;
254 			}
255 		} while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
256 
257 		if (err) {
258 			pr_info("%s: HW register image:\n", engine->name);
259 			igt_hexdump(hw, PAGE_SIZE);
260 
261 			pr_info("%s: SW register image:\n", engine->name);
262 			igt_hexdump(lrc, PAGE_SIZE);
263 		}
264 
265 		shmem_unpin_map(engine->default_state, hw);
266 		if (err)
267 			break;
268 	}
269 
270 	free_page((unsigned long)lrc);
271 	return err;
272 }
273 
274 static int find_offset(const u32 *lri, u32 offset)
275 {
276 	int i;
277 
278 	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
279 		if (lri[i] == offset)
280 			return i;
281 
282 	return -1;
283 }
284 
285 static int live_lrc_fixed(void *arg)
286 {
287 	struct intel_gt *gt = arg;
288 	struct intel_engine_cs *engine;
289 	enum intel_engine_id id;
290 	int err = 0;
291 
292 	/*
293 	 * Check the assumed register offsets match the actual locations in
294 	 * the context image.
295 	 */
296 
297 	for_each_engine(engine, gt, id) {
298 		const struct {
299 			u32 reg;
300 			u32 offset;
301 			const char *name;
302 		} tbl[] = {
303 			{
304 				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
305 				CTX_RING_START - 1,
306 				"RING_START"
307 			},
308 			{
309 				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
310 				CTX_RING_CTL - 1,
311 				"RING_CTL"
312 			},
313 			{
314 				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
315 				CTX_RING_HEAD - 1,
316 				"RING_HEAD"
317 			},
318 			{
319 				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
320 				CTX_RING_TAIL - 1,
321 				"RING_TAIL"
322 			},
323 			{
324 				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
325 				lrc_ring_mi_mode(engine),
326 				"RING_MI_MODE"
327 			},
328 			{
329 				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
330 				CTX_BB_STATE - 1,
331 				"BB_STATE"
332 			},
333 			{
334 				i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
335 				lrc_ring_wa_bb_per_ctx(engine),
336 				"RING_BB_PER_CTX_PTR"
337 			},
338 			{
339 				i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
340 				lrc_ring_indirect_ptr(engine),
341 				"RING_INDIRECT_CTX_PTR"
342 			},
343 			{
344 				i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
345 				lrc_ring_indirect_offset(engine),
346 				"RING_INDIRECT_CTX_OFFSET"
347 			},
348 			{
349 				i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
350 				CTX_TIMESTAMP - 1,
351 				"RING_CTX_TIMESTAMP"
352 			},
353 			{
354 				i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
355 				lrc_ring_gpr0(engine),
356 				"RING_CS_GPR0"
357 			},
358 			{
359 				i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
360 				lrc_ring_cmd_buf_cctl(engine),
361 				"RING_CMD_BUF_CCTL"
362 			},
363 			{
364 				i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
365 				lrc_ring_bb_offset(engine),
366 				"RING_BB_OFFSET"
367 			},
368 			{ },
369 		}, *t;
370 		u32 *hw;
371 
372 		if (!engine->default_state)
373 			continue;
374 
375 		hw = shmem_pin_map(engine->default_state);
376 		if (!hw) {
377 			err = -ENOMEM;
378 			break;
379 		}
380 		hw += LRC_STATE_OFFSET / sizeof(*hw);
381 
382 		for (t = tbl; t->name; t++) {
383 			int dw = find_offset(hw, t->reg);
384 
385 			if (dw != t->offset) {
386 				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
387 				       engine->name,
388 				       t->name,
389 				       t->reg,
390 				       dw,
391 				       t->offset);
392 				err = -EINVAL;
393 			}
394 		}
395 
396 		shmem_unpin_map(engine->default_state, hw);
397 	}
398 
399 	return err;
400 }
401 
402 static int __live_lrc_state(struct intel_engine_cs *engine,
403 			    struct i915_vma *scratch)
404 {
405 	struct intel_context *ce;
406 	struct i915_request *rq;
407 	struct i915_gem_ww_ctx ww;
408 	enum {
409 		RING_START_IDX = 0,
410 		RING_TAIL_IDX,
411 		MAX_IDX
412 	};
413 	u32 expected[MAX_IDX];
414 	u32 *cs;
415 	int err;
416 	int n;
417 
418 	ce = intel_context_create(engine);
419 	if (IS_ERR(ce))
420 		return PTR_ERR(ce);
421 
422 	i915_gem_ww_ctx_init(&ww, false);
423 retry:
424 	err = i915_gem_object_lock(scratch->obj, &ww);
425 	if (!err)
426 		err = intel_context_pin_ww(ce, &ww);
427 	if (err)
428 		goto err_put;
429 
430 	rq = i915_request_create(ce);
431 	if (IS_ERR(rq)) {
432 		err = PTR_ERR(rq);
433 		goto err_unpin;
434 	}
435 
436 	cs = intel_ring_begin(rq, 4 * MAX_IDX);
437 	if (IS_ERR(cs)) {
438 		err = PTR_ERR(cs);
439 		i915_request_add(rq);
440 		goto err_unpin;
441 	}
442 
443 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
444 	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
445 	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
446 	*cs++ = 0;
447 
448 	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
449 
450 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
451 	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
452 	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
453 	*cs++ = 0;
454 
455 	err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
456 
457 	i915_request_get(rq);
458 	i915_request_add(rq);
459 	if (err)
460 		goto err_rq;
461 
462 	intel_engine_flush_submission(engine);
463 	expected[RING_TAIL_IDX] = ce->ring->tail;
464 
465 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
466 		err = -ETIME;
467 		goto err_rq;
468 	}
469 
470 	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
471 	if (IS_ERR(cs)) {
472 		err = PTR_ERR(cs);
473 		goto err_rq;
474 	}
475 
476 	for (n = 0; n < MAX_IDX; n++) {
477 		if (cs[n] != expected[n]) {
478 			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
479 			       engine->name, n, cs[n], expected[n]);
480 			err = -EINVAL;
481 			break;
482 		}
483 	}
484 
485 	i915_gem_object_unpin_map(scratch->obj);
486 
487 err_rq:
488 	i915_request_put(rq);
489 err_unpin:
490 	intel_context_unpin(ce);
491 err_put:
492 	if (err == -EDEADLK) {
493 		err = i915_gem_ww_ctx_backoff(&ww);
494 		if (!err)
495 			goto retry;
496 	}
497 	i915_gem_ww_ctx_fini(&ww);
498 	intel_context_put(ce);
499 	return err;
500 }
501 
502 static int live_lrc_state(void *arg)
503 {
504 	struct intel_gt *gt = arg;
505 	struct intel_engine_cs *engine;
506 	struct i915_vma *scratch;
507 	enum intel_engine_id id;
508 	int err = 0;
509 
510 	/*
511 	 * Check the live register state matches what we expect for this
512 	 * intel_context.
513 	 */
514 
515 	scratch = create_scratch(gt);
516 	if (IS_ERR(scratch))
517 		return PTR_ERR(scratch);
518 
519 	for_each_engine(engine, gt, id) {
520 		err = __live_lrc_state(engine, scratch);
521 		if (err)
522 			break;
523 	}
524 
525 	if (igt_flush_test(gt->i915))
526 		err = -EIO;
527 
528 	i915_vma_unpin_and_release(&scratch, 0);
529 	return err;
530 }
531 
532 static int gpr_make_dirty(struct intel_context *ce)
533 {
534 	struct i915_request *rq;
535 	u32 *cs;
536 	int n;
537 
538 	rq = intel_context_create_request(ce);
539 	if (IS_ERR(rq))
540 		return PTR_ERR(rq);
541 
542 	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
543 	if (IS_ERR(cs)) {
544 		i915_request_add(rq);
545 		return PTR_ERR(cs);
546 	}
547 
548 	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
549 	for (n = 0; n < NUM_GPR_DW; n++) {
550 		*cs++ = CS_GPR(ce->engine, n);
551 		*cs++ = STACK_MAGIC;
552 	}
553 	*cs++ = MI_NOOP;
554 
555 	intel_ring_advance(rq, cs);
556 
557 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
558 	i915_request_add(rq);
559 
560 	return 0;
561 }
562 
563 static struct i915_request *
564 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
565 {
566 	const u32 offset =
567 		i915_ggtt_offset(ce->engine->status_page.vma) +
568 		offset_in_page(slot);
569 	struct i915_request *rq;
570 	u32 *cs;
571 	int err;
572 	int n;
573 
574 	rq = intel_context_create_request(ce);
575 	if (IS_ERR(rq))
576 		return rq;
577 
578 	cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
579 	if (IS_ERR(cs)) {
580 		i915_request_add(rq);
581 		return ERR_CAST(cs);
582 	}
583 
584 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
585 	*cs++ = MI_NOOP;
586 
587 	*cs++ = MI_SEMAPHORE_WAIT |
588 		MI_SEMAPHORE_GLOBAL_GTT |
589 		MI_SEMAPHORE_POLL |
590 		MI_SEMAPHORE_SAD_NEQ_SDD;
591 	*cs++ = 0;
592 	*cs++ = offset;
593 	*cs++ = 0;
594 
595 	for (n = 0; n < NUM_GPR_DW; n++) {
596 		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
597 		*cs++ = CS_GPR(ce->engine, n);
598 		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
599 		*cs++ = 0;
600 	}
601 
602 	err = igt_vma_move_to_active_unlocked(scratch, rq, EXEC_OBJECT_WRITE);
603 
604 	i915_request_get(rq);
605 	i915_request_add(rq);
606 	if (err) {
607 		i915_request_put(rq);
608 		rq = ERR_PTR(err);
609 	}
610 
611 	return rq;
612 }
613 
614 static int __live_lrc_gpr(struct intel_engine_cs *engine,
615 			  struct i915_vma *scratch,
616 			  bool preempt)
617 {
618 	u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
619 	struct intel_context *ce;
620 	struct i915_request *rq;
621 	u32 *cs;
622 	int err;
623 	int n;
624 
625 	if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
626 		return 0; /* GPR only on rcs0 for gen8 */
627 
628 	err = gpr_make_dirty(engine->kernel_context);
629 	if (err)
630 		return err;
631 
632 	ce = intel_context_create(engine);
633 	if (IS_ERR(ce))
634 		return PTR_ERR(ce);
635 
636 	rq = __gpr_read(ce, scratch, slot);
637 	if (IS_ERR(rq)) {
638 		err = PTR_ERR(rq);
639 		goto err_put;
640 	}
641 
642 	err = wait_for_submit(engine, rq, HZ / 2);
643 	if (err)
644 		goto err_rq;
645 
646 	if (preempt) {
647 		err = gpr_make_dirty(engine->kernel_context);
648 		if (err)
649 			goto err_rq;
650 
651 		err = emit_semaphore_signal(engine->kernel_context, slot);
652 		if (err)
653 			goto err_rq;
654 
655 		err = wait_for_submit(engine, rq, HZ / 2);
656 		if (err)
657 			goto err_rq;
658 	} else {
659 		slot[0] = 1;
660 		wmb();
661 	}
662 
663 	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
664 		err = -ETIME;
665 		goto err_rq;
666 	}
667 
668 	cs = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
669 	if (IS_ERR(cs)) {
670 		err = PTR_ERR(cs);
671 		goto err_rq;
672 	}
673 
674 	for (n = 0; n < NUM_GPR_DW; n++) {
675 		if (cs[n]) {
676 			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
677 			       engine->name,
678 			       n / 2, n & 1 ? "udw" : "ldw",
679 			       cs[n]);
680 			err = -EINVAL;
681 			break;
682 		}
683 	}
684 
685 	i915_gem_object_unpin_map(scratch->obj);
686 
687 err_rq:
688 	memset32(&slot[0], -1, 4);
689 	wmb();
690 	i915_request_put(rq);
691 err_put:
692 	intel_context_put(ce);
693 	return err;
694 }
695 
696 static int live_lrc_gpr(void *arg)
697 {
698 	struct intel_gt *gt = arg;
699 	struct intel_engine_cs *engine;
700 	struct i915_vma *scratch;
701 	enum intel_engine_id id;
702 	int err = 0;
703 
704 	/*
705 	 * Check that GPR registers are cleared in new contexts as we need
706 	 * to avoid leaking any information from previous contexts.
707 	 */
708 
709 	scratch = create_scratch(gt);
710 	if (IS_ERR(scratch))
711 		return PTR_ERR(scratch);
712 
713 	for_each_engine(engine, gt, id) {
714 		st_engine_heartbeat_disable(engine);
715 
716 		err = __live_lrc_gpr(engine, scratch, false);
717 		if (err)
718 			goto err;
719 
720 		err = __live_lrc_gpr(engine, scratch, true);
721 		if (err)
722 			goto err;
723 
724 err:
725 		st_engine_heartbeat_enable(engine);
726 		if (igt_flush_test(gt->i915))
727 			err = -EIO;
728 		if (err)
729 			break;
730 	}
731 
732 	i915_vma_unpin_and_release(&scratch, 0);
733 	return err;
734 }
735 
736 static struct i915_request *
737 create_timestamp(struct intel_context *ce, void *slot, int idx)
738 {
739 	const u32 offset =
740 		i915_ggtt_offset(ce->engine->status_page.vma) +
741 		offset_in_page(slot);
742 	struct i915_request *rq;
743 	u32 *cs;
744 	int err;
745 
746 	rq = intel_context_create_request(ce);
747 	if (IS_ERR(rq))
748 		return rq;
749 
750 	cs = intel_ring_begin(rq, 10);
751 	if (IS_ERR(cs)) {
752 		err = PTR_ERR(cs);
753 		goto err;
754 	}
755 
756 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
757 	*cs++ = MI_NOOP;
758 
759 	*cs++ = MI_SEMAPHORE_WAIT |
760 		MI_SEMAPHORE_GLOBAL_GTT |
761 		MI_SEMAPHORE_POLL |
762 		MI_SEMAPHORE_SAD_NEQ_SDD;
763 	*cs++ = 0;
764 	*cs++ = offset;
765 	*cs++ = 0;
766 
767 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
768 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
769 	*cs++ = offset + idx * sizeof(u32);
770 	*cs++ = 0;
771 
772 	intel_ring_advance(rq, cs);
773 
774 	err = 0;
775 err:
776 	i915_request_get(rq);
777 	i915_request_add(rq);
778 	if (err) {
779 		i915_request_put(rq);
780 		return ERR_PTR(err);
781 	}
782 
783 	return rq;
784 }
785 
786 struct lrc_timestamp {
787 	struct intel_engine_cs *engine;
788 	struct intel_context *ce[2];
789 	u32 poison;
790 };
791 
792 static bool timestamp_advanced(u32 start, u32 end)
793 {
794 	return (s32)(end - start) > 0;
795 }
796 
797 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
798 {
799 	u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
800 	struct i915_request *rq;
801 	u32 timestamp;
802 	int err = 0;
803 
804 	arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
805 	rq = create_timestamp(arg->ce[0], slot, 1);
806 	if (IS_ERR(rq))
807 		return PTR_ERR(rq);
808 
809 	err = wait_for_submit(rq->engine, rq, HZ / 2);
810 	if (err)
811 		goto err;
812 
813 	if (preempt) {
814 		arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
815 		err = emit_semaphore_signal(arg->ce[1], slot);
816 		if (err)
817 			goto err;
818 	} else {
819 		slot[0] = 1;
820 		wmb();
821 	}
822 
823 	/* And wait for switch to kernel (to save our context to memory) */
824 	err = context_flush(arg->ce[0], HZ / 2);
825 	if (err)
826 		goto err;
827 
828 	if (!timestamp_advanced(arg->poison, slot[1])) {
829 		pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
830 		       arg->engine->name, preempt ? "preempt" : "simple",
831 		       arg->poison, slot[1]);
832 		err = -EINVAL;
833 	}
834 
835 	timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
836 	if (!timestamp_advanced(slot[1], timestamp)) {
837 		pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
838 		       arg->engine->name, preempt ? "preempt" : "simple",
839 		       slot[1], timestamp);
840 		err = -EINVAL;
841 	}
842 
843 err:
844 	memset32(slot, -1, 4);
845 	i915_request_put(rq);
846 	return err;
847 }
848 
849 static int live_lrc_timestamp(void *arg)
850 {
851 	struct lrc_timestamp data = {};
852 	struct intel_gt *gt = arg;
853 	enum intel_engine_id id;
854 	const u32 poison[] = {
855 		0,
856 		S32_MAX,
857 		(u32)S32_MAX + 1,
858 		U32_MAX,
859 	};
860 
861 	/*
862 	 * We want to verify that the timestamp is saved and restore across
863 	 * context switches and is monotonic.
864 	 *
865 	 * So we do this with a little bit of LRC poisoning to check various
866 	 * boundary conditions, and see what happens if we preempt the context
867 	 * with a second request (carrying more poison into the timestamp).
868 	 */
869 
870 	for_each_engine(data.engine, gt, id) {
871 		int i, err = 0;
872 
873 		st_engine_heartbeat_disable(data.engine);
874 
875 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
876 			struct intel_context *tmp;
877 
878 			tmp = intel_context_create(data.engine);
879 			if (IS_ERR(tmp)) {
880 				err = PTR_ERR(tmp);
881 				goto err;
882 			}
883 
884 			err = intel_context_pin(tmp);
885 			if (err) {
886 				intel_context_put(tmp);
887 				goto err;
888 			}
889 
890 			data.ce[i] = tmp;
891 		}
892 
893 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
894 			data.poison = poison[i];
895 
896 			err = __lrc_timestamp(&data, false);
897 			if (err)
898 				break;
899 
900 			err = __lrc_timestamp(&data, true);
901 			if (err)
902 				break;
903 		}
904 
905 err:
906 		st_engine_heartbeat_enable(data.engine);
907 		for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
908 			if (!data.ce[i])
909 				break;
910 
911 			intel_context_unpin(data.ce[i]);
912 			intel_context_put(data.ce[i]);
913 		}
914 
915 		if (igt_flush_test(gt->i915))
916 			err = -EIO;
917 		if (err)
918 			return err;
919 	}
920 
921 	return 0;
922 }
923 
924 static struct i915_vma *
925 create_user_vma(struct i915_address_space *vm, unsigned long size)
926 {
927 	struct drm_i915_gem_object *obj;
928 	struct i915_vma *vma;
929 	int err;
930 
931 	obj = i915_gem_object_create_internal(vm->i915, size);
932 	if (IS_ERR(obj))
933 		return ERR_CAST(obj);
934 
935 	vma = i915_vma_instance(obj, vm, NULL);
936 	if (IS_ERR(vma)) {
937 		i915_gem_object_put(obj);
938 		return vma;
939 	}
940 
941 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
942 	if (err) {
943 		i915_gem_object_put(obj);
944 		return ERR_PTR(err);
945 	}
946 
947 	return vma;
948 }
949 
950 static u32 safe_poison(u32 offset, u32 poison)
951 {
952 	/*
953 	 * Do not enable predication as it will nop all subsequent commands,
954 	 * not only disabling the tests (by preventing all the other SRM) but
955 	 * also preventing the arbitration events at the end of the request.
956 	 */
957 	if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
958 		poison &= ~REG_BIT(0);
959 
960 	return poison;
961 }
962 
963 static struct i915_vma *
964 store_context(struct intel_context *ce, struct i915_vma *scratch)
965 {
966 	struct i915_vma *batch;
967 	u32 dw, x, *cs, *hw;
968 	u32 *defaults;
969 
970 	batch = create_user_vma(ce->vm, SZ_64K);
971 	if (IS_ERR(batch))
972 		return batch;
973 
974 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
975 	if (IS_ERR(cs)) {
976 		i915_vma_put(batch);
977 		return ERR_CAST(cs);
978 	}
979 
980 	defaults = shmem_pin_map(ce->engine->default_state);
981 	if (!defaults) {
982 		i915_gem_object_unpin_map(batch->obj);
983 		i915_vma_put(batch);
984 		return ERR_PTR(-ENOMEM);
985 	}
986 
987 	x = 0;
988 	dw = 0;
989 	hw = defaults;
990 	hw += LRC_STATE_OFFSET / sizeof(*hw);
991 	do {
992 		u32 len = hw[dw] & LRI_LENGTH_MASK;
993 
994 		/*
995 		 * Keep it simple, skip parsing complex commands
996 		 *
997 		 * At present, there are no more MI_LOAD_REGISTER_IMM
998 		 * commands after the first 3D state command. Rather
999 		 * than include a table (see i915_cmd_parser.c) of all
1000 		 * the possible commands and their instruction lengths
1001 		 * (or mask for variable length instructions), assume
1002 		 * we have gathered the complete list of registers and
1003 		 * bail out.
1004 		 */
1005 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1006 			break;
1007 
1008 		if (hw[dw] == 0) {
1009 			dw++;
1010 			continue;
1011 		}
1012 
1013 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1014 			/* Assume all other MI commands match LRI length mask */
1015 			dw += len + 2;
1016 			continue;
1017 		}
1018 
1019 		if (!len) {
1020 			pr_err("%s: invalid LRI found in context image\n",
1021 			       ce->engine->name);
1022 			igt_hexdump(defaults, PAGE_SIZE);
1023 			break;
1024 		}
1025 
1026 		dw++;
1027 		len = (len + 1) / 2;
1028 		while (len--) {
1029 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
1030 			*cs++ = hw[dw];
1031 			*cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1032 			*cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1033 
1034 			dw += 2;
1035 			x += 4;
1036 		}
1037 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1038 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1039 
1040 	*cs++ = MI_BATCH_BUFFER_END;
1041 
1042 	shmem_unpin_map(ce->engine->default_state, defaults);
1043 
1044 	i915_gem_object_flush_map(batch->obj);
1045 	i915_gem_object_unpin_map(batch->obj);
1046 
1047 	return batch;
1048 }
1049 
1050 static struct i915_request *
1051 record_registers(struct intel_context *ce,
1052 		 struct i915_vma *before,
1053 		 struct i915_vma *after,
1054 		 u32 *sema)
1055 {
1056 	struct i915_vma *b_before, *b_after;
1057 	struct i915_request *rq;
1058 	u32 *cs;
1059 	int err;
1060 
1061 	b_before = store_context(ce, before);
1062 	if (IS_ERR(b_before))
1063 		return ERR_CAST(b_before);
1064 
1065 	b_after = store_context(ce, after);
1066 	if (IS_ERR(b_after)) {
1067 		rq = ERR_CAST(b_after);
1068 		goto err_before;
1069 	}
1070 
1071 	rq = intel_context_create_request(ce);
1072 	if (IS_ERR(rq))
1073 		goto err_after;
1074 
1075 	err = igt_vma_move_to_active_unlocked(before, rq, EXEC_OBJECT_WRITE);
1076 	if (err)
1077 		goto err_rq;
1078 
1079 	err = igt_vma_move_to_active_unlocked(b_before, rq, 0);
1080 	if (err)
1081 		goto err_rq;
1082 
1083 	err = igt_vma_move_to_active_unlocked(after, rq, EXEC_OBJECT_WRITE);
1084 	if (err)
1085 		goto err_rq;
1086 
1087 	err = igt_vma_move_to_active_unlocked(b_after, rq, 0);
1088 	if (err)
1089 		goto err_rq;
1090 
1091 	cs = intel_ring_begin(rq, 14);
1092 	if (IS_ERR(cs)) {
1093 		err = PTR_ERR(cs);
1094 		goto err_rq;
1095 	}
1096 
1097 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1098 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1099 	*cs++ = lower_32_bits(i915_vma_offset(b_before));
1100 	*cs++ = upper_32_bits(i915_vma_offset(b_before));
1101 
1102 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1103 	*cs++ = MI_SEMAPHORE_WAIT |
1104 		MI_SEMAPHORE_GLOBAL_GTT |
1105 		MI_SEMAPHORE_POLL |
1106 		MI_SEMAPHORE_SAD_NEQ_SDD;
1107 	*cs++ = 0;
1108 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1109 		offset_in_page(sema);
1110 	*cs++ = 0;
1111 	*cs++ = MI_NOOP;
1112 
1113 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1114 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1115 	*cs++ = lower_32_bits(i915_vma_offset(b_after));
1116 	*cs++ = upper_32_bits(i915_vma_offset(b_after));
1117 
1118 	intel_ring_advance(rq, cs);
1119 
1120 	WRITE_ONCE(*sema, 0);
1121 	i915_request_get(rq);
1122 	i915_request_add(rq);
1123 err_after:
1124 	i915_vma_put(b_after);
1125 err_before:
1126 	i915_vma_put(b_before);
1127 	return rq;
1128 
1129 err_rq:
1130 	i915_request_add(rq);
1131 	rq = ERR_PTR(err);
1132 	goto err_after;
1133 }
1134 
1135 static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1136 {
1137 	struct i915_vma *batch;
1138 	u32 dw, *cs, *hw;
1139 	u32 *defaults;
1140 
1141 	batch = create_user_vma(ce->vm, SZ_64K);
1142 	if (IS_ERR(batch))
1143 		return batch;
1144 
1145 	cs = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC);
1146 	if (IS_ERR(cs)) {
1147 		i915_vma_put(batch);
1148 		return ERR_CAST(cs);
1149 	}
1150 
1151 	defaults = shmem_pin_map(ce->engine->default_state);
1152 	if (!defaults) {
1153 		i915_gem_object_unpin_map(batch->obj);
1154 		i915_vma_put(batch);
1155 		return ERR_PTR(-ENOMEM);
1156 	}
1157 
1158 	dw = 0;
1159 	hw = defaults;
1160 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1161 	do {
1162 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1163 
1164 		/* For simplicity, break parsing at the first complex command */
1165 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1166 			break;
1167 
1168 		if (hw[dw] == 0) {
1169 			dw++;
1170 			continue;
1171 		}
1172 
1173 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1174 			dw += len + 2;
1175 			continue;
1176 		}
1177 
1178 		if (!len) {
1179 			pr_err("%s: invalid LRI found in context image\n",
1180 			       ce->engine->name);
1181 			igt_hexdump(defaults, PAGE_SIZE);
1182 			break;
1183 		}
1184 
1185 		dw++;
1186 		len = (len + 1) / 2;
1187 		*cs++ = MI_LOAD_REGISTER_IMM(len);
1188 		while (len--) {
1189 			*cs++ = hw[dw];
1190 			*cs++ = safe_poison(hw[dw] & get_lri_mask(ce->engine,
1191 								  MI_LRI_LRM_CS_MMIO),
1192 					    poison);
1193 			dw += 2;
1194 		}
1195 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1196 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1197 
1198 	*cs++ = MI_BATCH_BUFFER_END;
1199 
1200 	shmem_unpin_map(ce->engine->default_state, defaults);
1201 
1202 	i915_gem_object_flush_map(batch->obj);
1203 	i915_gem_object_unpin_map(batch->obj);
1204 
1205 	return batch;
1206 }
1207 
1208 static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1209 {
1210 	struct i915_request *rq;
1211 	struct i915_vma *batch;
1212 	u32 *cs;
1213 	int err;
1214 
1215 	batch = load_context(ce, poison);
1216 	if (IS_ERR(batch))
1217 		return PTR_ERR(batch);
1218 
1219 	rq = intel_context_create_request(ce);
1220 	if (IS_ERR(rq)) {
1221 		err = PTR_ERR(rq);
1222 		goto err_batch;
1223 	}
1224 
1225 	err = igt_vma_move_to_active_unlocked(batch, rq, 0);
1226 	if (err)
1227 		goto err_rq;
1228 
1229 	cs = intel_ring_begin(rq, 8);
1230 	if (IS_ERR(cs)) {
1231 		err = PTR_ERR(cs);
1232 		goto err_rq;
1233 	}
1234 
1235 	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1236 	*cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1237 	*cs++ = lower_32_bits(i915_vma_offset(batch));
1238 	*cs++ = upper_32_bits(i915_vma_offset(batch));
1239 
1240 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241 	*cs++ = i915_ggtt_offset(ce->engine->status_page.vma) +
1242 		offset_in_page(sema);
1243 	*cs++ = 0;
1244 	*cs++ = 1;
1245 
1246 	intel_ring_advance(rq, cs);
1247 
1248 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1249 err_rq:
1250 	i915_request_add(rq);
1251 err_batch:
1252 	i915_vma_put(batch);
1253 	return err;
1254 }
1255 
1256 static bool is_moving(u32 a, u32 b)
1257 {
1258 	return a != b;
1259 }
1260 
1261 static int compare_isolation(struct intel_engine_cs *engine,
1262 			     struct i915_vma *ref[2],
1263 			     struct i915_vma *result[2],
1264 			     struct intel_context *ce,
1265 			     u32 poison)
1266 {
1267 	u32 x, dw, *hw, *lrc;
1268 	u32 *A[2], *B[2];
1269 	u32 *defaults;
1270 	int err = 0;
1271 
1272 	A[0] = i915_gem_object_pin_map_unlocked(ref[0]->obj, I915_MAP_WC);
1273 	if (IS_ERR(A[0]))
1274 		return PTR_ERR(A[0]);
1275 
1276 	A[1] = i915_gem_object_pin_map_unlocked(ref[1]->obj, I915_MAP_WC);
1277 	if (IS_ERR(A[1])) {
1278 		err = PTR_ERR(A[1]);
1279 		goto err_A0;
1280 	}
1281 
1282 	B[0] = i915_gem_object_pin_map_unlocked(result[0]->obj, I915_MAP_WC);
1283 	if (IS_ERR(B[0])) {
1284 		err = PTR_ERR(B[0]);
1285 		goto err_A1;
1286 	}
1287 
1288 	B[1] = i915_gem_object_pin_map_unlocked(result[1]->obj, I915_MAP_WC);
1289 	if (IS_ERR(B[1])) {
1290 		err = PTR_ERR(B[1]);
1291 		goto err_B0;
1292 	}
1293 
1294 	lrc = i915_gem_object_pin_map_unlocked(ce->state->obj,
1295 					       intel_gt_coherent_map_type(engine->gt,
1296 									  ce->state->obj,
1297 									  false));
1298 	if (IS_ERR(lrc)) {
1299 		err = PTR_ERR(lrc);
1300 		goto err_B1;
1301 	}
1302 	lrc += LRC_STATE_OFFSET / sizeof(*hw);
1303 
1304 	defaults = shmem_pin_map(ce->engine->default_state);
1305 	if (!defaults) {
1306 		err = -ENOMEM;
1307 		goto err_lrc;
1308 	}
1309 
1310 	x = 0;
1311 	dw = 0;
1312 	hw = defaults;
1313 	hw += LRC_STATE_OFFSET / sizeof(*hw);
1314 	do {
1315 		u32 len = hw[dw] & LRI_LENGTH_MASK;
1316 
1317 		/* For simplicity, break parsing at the first complex command */
1318 		if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1319 			break;
1320 
1321 		if (hw[dw] == 0) {
1322 			dw++;
1323 			continue;
1324 		}
1325 
1326 		if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1327 			dw += len + 2;
1328 			continue;
1329 		}
1330 
1331 		if (!len) {
1332 			pr_err("%s: invalid LRI found in context image\n",
1333 			       engine->name);
1334 			igt_hexdump(defaults, PAGE_SIZE);
1335 			break;
1336 		}
1337 
1338 		dw++;
1339 		len = (len + 1) / 2;
1340 		while (len--) {
1341 			if (!is_moving(A[0][x], A[1][x]) &&
1342 			    (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1343 				switch (hw[dw] & 4095) {
1344 				case 0x30: /* RING_HEAD */
1345 				case 0x34: /* RING_TAIL */
1346 					break;
1347 
1348 				default:
1349 					pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1350 					       engine->name, dw,
1351 					       hw[dw], hw[dw + 1],
1352 					       A[0][x], B[0][x], B[1][x],
1353 					       poison, lrc[dw + 1]);
1354 					err = -EINVAL;
1355 				}
1356 			}
1357 			dw += 2;
1358 			x++;
1359 		}
1360 	} while (dw < PAGE_SIZE / sizeof(u32) &&
1361 		 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1362 
1363 	shmem_unpin_map(ce->engine->default_state, defaults);
1364 err_lrc:
1365 	i915_gem_object_unpin_map(ce->state->obj);
1366 err_B1:
1367 	i915_gem_object_unpin_map(result[1]->obj);
1368 err_B0:
1369 	i915_gem_object_unpin_map(result[0]->obj);
1370 err_A1:
1371 	i915_gem_object_unpin_map(ref[1]->obj);
1372 err_A0:
1373 	i915_gem_object_unpin_map(ref[0]->obj);
1374 	return err;
1375 }
1376 
1377 static struct i915_vma *
1378 create_result_vma(struct i915_address_space *vm, unsigned long sz)
1379 {
1380 	struct i915_vma *vma;
1381 	void *ptr;
1382 
1383 	vma = create_user_vma(vm, sz);
1384 	if (IS_ERR(vma))
1385 		return vma;
1386 
1387 	/* Set the results to a known value distinct from the poison */
1388 	ptr = i915_gem_object_pin_map_unlocked(vma->obj, I915_MAP_WC);
1389 	if (IS_ERR(ptr)) {
1390 		i915_vma_put(vma);
1391 		return ERR_CAST(ptr);
1392 	}
1393 
1394 	memset(ptr, POISON_INUSE, vma->size);
1395 	i915_gem_object_flush_map(vma->obj);
1396 	i915_gem_object_unpin_map(vma->obj);
1397 
1398 	return vma;
1399 }
1400 
1401 static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1402 {
1403 	u32 *sema = memset32(engine->status_page.addr + 1000, 0, 1);
1404 	struct i915_vma *ref[2], *result[2];
1405 	struct intel_context *A, *B;
1406 	struct i915_request *rq;
1407 	int err;
1408 
1409 	A = intel_context_create(engine);
1410 	if (IS_ERR(A))
1411 		return PTR_ERR(A);
1412 
1413 	B = intel_context_create(engine);
1414 	if (IS_ERR(B)) {
1415 		err = PTR_ERR(B);
1416 		goto err_A;
1417 	}
1418 
1419 	ref[0] = create_result_vma(A->vm, SZ_64K);
1420 	if (IS_ERR(ref[0])) {
1421 		err = PTR_ERR(ref[0]);
1422 		goto err_B;
1423 	}
1424 
1425 	ref[1] = create_result_vma(A->vm, SZ_64K);
1426 	if (IS_ERR(ref[1])) {
1427 		err = PTR_ERR(ref[1]);
1428 		goto err_ref0;
1429 	}
1430 
1431 	rq = record_registers(A, ref[0], ref[1], sema);
1432 	if (IS_ERR(rq)) {
1433 		err = PTR_ERR(rq);
1434 		goto err_ref1;
1435 	}
1436 
1437 	WRITE_ONCE(*sema, 1);
1438 	wmb();
1439 
1440 	if (i915_request_wait(rq, 0, HZ / 2) < 0) {
1441 		i915_request_put(rq);
1442 		err = -ETIME;
1443 		goto err_ref1;
1444 	}
1445 	i915_request_put(rq);
1446 
1447 	result[0] = create_result_vma(A->vm, SZ_64K);
1448 	if (IS_ERR(result[0])) {
1449 		err = PTR_ERR(result[0]);
1450 		goto err_ref1;
1451 	}
1452 
1453 	result[1] = create_result_vma(A->vm, SZ_64K);
1454 	if (IS_ERR(result[1])) {
1455 		err = PTR_ERR(result[1]);
1456 		goto err_result0;
1457 	}
1458 
1459 	rq = record_registers(A, result[0], result[1], sema);
1460 	if (IS_ERR(rq)) {
1461 		err = PTR_ERR(rq);
1462 		goto err_result1;
1463 	}
1464 
1465 	err = poison_registers(B, poison, sema);
1466 	if (err == 0 && i915_request_wait(rq, 0, HZ / 2) < 0) {
1467 		pr_err("%s(%s): wait for results timed out\n",
1468 		       __func__, engine->name);
1469 		err = -ETIME;
1470 	}
1471 
1472 	/* Always cancel the semaphore wait, just in case the GPU gets stuck */
1473 	WRITE_ONCE(*sema, -1);
1474 	i915_request_put(rq);
1475 	if (err)
1476 		goto err_result1;
1477 
1478 	err = compare_isolation(engine, ref, result, A, poison);
1479 
1480 err_result1:
1481 	i915_vma_put(result[1]);
1482 err_result0:
1483 	i915_vma_put(result[0]);
1484 err_ref1:
1485 	i915_vma_put(ref[1]);
1486 err_ref0:
1487 	i915_vma_put(ref[0]);
1488 err_B:
1489 	intel_context_put(B);
1490 err_A:
1491 	intel_context_put(A);
1492 	return err;
1493 }
1494 
1495 static bool skip_isolation(const struct intel_engine_cs *engine)
1496 {
1497 	if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1498 		return true;
1499 
1500 	if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1501 		return true;
1502 
1503 	return false;
1504 }
1505 
1506 static int live_lrc_isolation(void *arg)
1507 {
1508 	struct intel_gt *gt = arg;
1509 	struct intel_engine_cs *engine;
1510 	enum intel_engine_id id;
1511 	const u32 poison[] = {
1512 		STACK_MAGIC,
1513 		0x3a3a3a3a,
1514 		0x5c5c5c5c,
1515 		0xffffffff,
1516 		0xffff0000,
1517 	};
1518 	int err = 0;
1519 
1520 	/*
1521 	 * Our goal is try and verify that per-context state cannot be
1522 	 * tampered with by another non-privileged client.
1523 	 *
1524 	 * We take the list of context registers from the LRI in the default
1525 	 * context image and attempt to modify that list from a remote context.
1526 	 */
1527 
1528 	for_each_engine(engine, gt, id) {
1529 		int i;
1530 
1531 		/* Just don't even ask */
1532 		if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1533 		    skip_isolation(engine))
1534 			continue;
1535 
1536 		intel_engine_pm_get(engine);
1537 		for (i = 0; i < ARRAY_SIZE(poison); i++) {
1538 			int result;
1539 
1540 			result = __lrc_isolation(engine, poison[i]);
1541 			if (result && !err)
1542 				err = result;
1543 
1544 			result = __lrc_isolation(engine, ~poison[i]);
1545 			if (result && !err)
1546 				err = result;
1547 		}
1548 		intel_engine_pm_put(engine);
1549 		if (igt_flush_test(gt->i915)) {
1550 			err = -EIO;
1551 			break;
1552 		}
1553 	}
1554 
1555 	return err;
1556 }
1557 
1558 static int wabb_ctx_submit_req(struct intel_context *ce)
1559 {
1560 	struct i915_request *rq;
1561 	int err = 0;
1562 
1563 	rq = intel_context_create_request(ce);
1564 	if (IS_ERR(rq))
1565 		return PTR_ERR(rq);
1566 
1567 	i915_request_get(rq);
1568 	i915_request_add(rq);
1569 
1570 	if (i915_request_wait(rq, 0, HZ / 5) < 0)
1571 		err = -ETIME;
1572 
1573 	i915_request_put(rq);
1574 
1575 	return err;
1576 }
1577 
1578 #define CTX_BB_CANARY_OFFSET (3 * 1024)
1579 #define CTX_BB_CANARY_INDEX  (CTX_BB_CANARY_OFFSET / sizeof(u32))
1580 
1581 static u32 *
1582 emit_wabb_ctx_canary(const struct intel_context *ce,
1583 		     u32 *cs, bool per_ctx)
1584 {
1585 	*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1586 		MI_SRM_LRM_GLOBAL_GTT |
1587 		MI_LRI_LRM_CS_MMIO;
1588 	*cs++ = i915_mmio_reg_offset(RING_START(0));
1589 	*cs++ = i915_ggtt_offset(ce->state) +
1590 		context_wa_bb_offset(ce) +
1591 		CTX_BB_CANARY_OFFSET +
1592 		(per_ctx ? PAGE_SIZE : 0);
1593 	*cs++ = 0;
1594 
1595 	return cs;
1596 }
1597 
1598 static u32 *
1599 emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1600 {
1601 	return emit_wabb_ctx_canary(ce, cs, false);
1602 }
1603 
1604 static u32 *
1605 emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1606 {
1607 	return emit_wabb_ctx_canary(ce, cs, true);
1608 }
1609 
1610 static void
1611 wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1612 {
1613 	u32 *cs = context_wabb(ce, per_ctx);
1614 
1615 	cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1616 
1617 	if (per_ctx)
1618 		setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
1619 	else
1620 		setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
1621 }
1622 
1623 static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1624 {
1625 	const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1626 		LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1627 		(per_ctx ? PAGE_SIZE : 0);
1628 
1629 	if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1630 		return true;
1631 
1632 	pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1633 	       ctx_bb[CTX_BB_CANARY_INDEX],
1634 	       ce->lrc_reg_state[CTX_RING_START]);
1635 
1636 	return false;
1637 }
1638 
1639 static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1640 {
1641 	int err;
1642 
1643 	err = wabb_ctx_submit_req(ce);
1644 	if (err)
1645 		return err;
1646 
1647 	if (!check_ring_start(ce, per_ctx))
1648 		return -EINVAL;
1649 
1650 	return 0;
1651 }
1652 
1653 static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1654 {
1655 	struct intel_context *a, *b;
1656 	int err;
1657 
1658 	a = intel_context_create(engine);
1659 	if (IS_ERR(a))
1660 		return PTR_ERR(a);
1661 	err = intel_context_pin(a);
1662 	if (err)
1663 		goto put_a;
1664 
1665 	b = intel_context_create(engine);
1666 	if (IS_ERR(b)) {
1667 		err = PTR_ERR(b);
1668 		goto unpin_a;
1669 	}
1670 	err = intel_context_pin(b);
1671 	if (err)
1672 		goto put_b;
1673 
1674 	/* We use the already reserved extra page in context state */
1675 	if (!a->wa_bb_page) {
1676 		GEM_BUG_ON(b->wa_bb_page);
1677 		GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1678 		goto unpin_b;
1679 	}
1680 
1681 	/*
1682 	 * In order to test that our per context bb is truly per context,
1683 	 * and executes at the intended spot on context restoring process,
1684 	 * make the batch store the ring start value to memory.
1685 	 * As ring start is restored apriori of starting the indirect ctx bb and
1686 	 * as it will be different for each context, it fits to this purpose.
1687 	 */
1688 	wabb_ctx_setup(a, per_ctx);
1689 	wabb_ctx_setup(b, per_ctx);
1690 
1691 	err = wabb_ctx_check(a, per_ctx);
1692 	if (err)
1693 		goto unpin_b;
1694 
1695 	err = wabb_ctx_check(b, per_ctx);
1696 
1697 unpin_b:
1698 	intel_context_unpin(b);
1699 put_b:
1700 	intel_context_put(b);
1701 unpin_a:
1702 	intel_context_unpin(a);
1703 put_a:
1704 	intel_context_put(a);
1705 
1706 	return err;
1707 }
1708 
1709 static int lrc_wabb_ctx(void *arg, bool per_ctx)
1710 {
1711 	struct intel_gt *gt = arg;
1712 	struct intel_engine_cs *engine;
1713 	enum intel_engine_id id;
1714 	int err = 0;
1715 
1716 	for_each_engine(engine, gt, id) {
1717 		intel_engine_pm_get(engine);
1718 		err = __lrc_wabb_ctx(engine, per_ctx);
1719 		intel_engine_pm_put(engine);
1720 
1721 		if (igt_flush_test(gt->i915))
1722 			err = -EIO;
1723 
1724 		if (err)
1725 			break;
1726 	}
1727 
1728 	return err;
1729 }
1730 
1731 static int live_lrc_indirect_ctx_bb(void *arg)
1732 {
1733 	return lrc_wabb_ctx(arg, false);
1734 }
1735 
1736 static int live_lrc_per_ctx_bb(void *arg)
1737 {
1738 	return lrc_wabb_ctx(arg, true);
1739 }
1740 
1741 static void garbage_reset(struct intel_engine_cs *engine,
1742 			  struct i915_request *rq)
1743 {
1744 	const unsigned int bit = I915_RESET_ENGINE + engine->id;
1745 	unsigned long *lock = &engine->gt->reset.flags;
1746 
1747 	local_bh_disable();
1748 	if (!test_and_set_bit(bit, lock)) {
1749 		tasklet_disable(&engine->sched_engine->tasklet);
1750 
1751 		if (!rq->fence.error)
1752 			__intel_engine_reset_bh(engine, NULL);
1753 
1754 		tasklet_enable(&engine->sched_engine->tasklet);
1755 		clear_and_wake_up_bit(bit, lock);
1756 	}
1757 	local_bh_enable();
1758 }
1759 
1760 static struct i915_request *garbage(struct intel_context *ce,
1761 				    struct rnd_state *prng)
1762 {
1763 	struct i915_request *rq;
1764 	int err;
1765 
1766 	err = intel_context_pin(ce);
1767 	if (err)
1768 		return ERR_PTR(err);
1769 
1770 	prandom_bytes_state(prng,
1771 			    ce->lrc_reg_state,
1772 			    ce->engine->context_size -
1773 			    LRC_STATE_OFFSET);
1774 
1775 	rq = intel_context_create_request(ce);
1776 	if (IS_ERR(rq)) {
1777 		err = PTR_ERR(rq);
1778 		goto err_unpin;
1779 	}
1780 
1781 	i915_request_get(rq);
1782 	i915_request_add(rq);
1783 	return rq;
1784 
1785 err_unpin:
1786 	intel_context_unpin(ce);
1787 	return ERR_PTR(err);
1788 }
1789 
1790 static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1791 {
1792 	struct intel_context *ce;
1793 	struct i915_request *hang;
1794 	int err = 0;
1795 
1796 	ce = intel_context_create(engine);
1797 	if (IS_ERR(ce))
1798 		return PTR_ERR(ce);
1799 
1800 	hang = garbage(ce, prng);
1801 	if (IS_ERR(hang)) {
1802 		err = PTR_ERR(hang);
1803 		goto err_ce;
1804 	}
1805 
1806 	if (wait_for_submit(engine, hang, HZ / 2)) {
1807 		i915_request_put(hang);
1808 		err = -ETIME;
1809 		goto err_ce;
1810 	}
1811 
1812 	intel_context_set_banned(ce);
1813 	garbage_reset(engine, hang);
1814 
1815 	intel_engine_flush_submission(engine);
1816 	if (!hang->fence.error) {
1817 		i915_request_put(hang);
1818 		pr_err("%s: corrupted context was not reset\n",
1819 		       engine->name);
1820 		err = -EINVAL;
1821 		goto err_ce;
1822 	}
1823 
1824 	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
1825 		pr_err("%s: corrupted context did not recover\n",
1826 		       engine->name);
1827 		i915_request_put(hang);
1828 		err = -EIO;
1829 		goto err_ce;
1830 	}
1831 	i915_request_put(hang);
1832 
1833 err_ce:
1834 	intel_context_put(ce);
1835 	return err;
1836 }
1837 
1838 static int live_lrc_garbage(void *arg)
1839 {
1840 	struct intel_gt *gt = arg;
1841 	struct intel_engine_cs *engine;
1842 	enum intel_engine_id id;
1843 
1844 	/*
1845 	 * Verify that we can recover if one context state is completely
1846 	 * corrupted.
1847 	 */
1848 
1849 	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1850 		return 0;
1851 
1852 	for_each_engine(engine, gt, id) {
1853 		I915_RND_STATE(prng);
1854 		int err = 0, i;
1855 
1856 		if (!intel_has_reset_engine(engine->gt))
1857 			continue;
1858 
1859 		intel_engine_pm_get(engine);
1860 		for (i = 0; i < 3; i++) {
1861 			err = __lrc_garbage(engine, &prng);
1862 			if (err)
1863 				break;
1864 		}
1865 		intel_engine_pm_put(engine);
1866 
1867 		if (igt_flush_test(gt->i915))
1868 			err = -EIO;
1869 		if (err)
1870 			return err;
1871 	}
1872 
1873 	return 0;
1874 }
1875 
1876 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1877 {
1878 	struct intel_context *ce;
1879 	struct i915_request *rq;
1880 	IGT_TIMEOUT(end_time);
1881 	int err;
1882 
1883 	ce = intel_context_create(engine);
1884 	if (IS_ERR(ce))
1885 		return PTR_ERR(ce);
1886 
1887 	ce->stats.runtime.num_underflow = 0;
1888 	ce->stats.runtime.max_underflow = 0;
1889 
1890 	do {
1891 		unsigned int loop = 1024;
1892 
1893 		while (loop) {
1894 			rq = intel_context_create_request(ce);
1895 			if (IS_ERR(rq)) {
1896 				err = PTR_ERR(rq);
1897 				goto err_rq;
1898 			}
1899 
1900 			if (--loop == 0)
1901 				i915_request_get(rq);
1902 
1903 			i915_request_add(rq);
1904 		}
1905 
1906 		if (__igt_timeout(end_time, NULL))
1907 			break;
1908 
1909 		i915_request_put(rq);
1910 	} while (1);
1911 
1912 	err = i915_request_wait(rq, 0, HZ / 5);
1913 	if (err < 0) {
1914 		pr_err("%s: request not completed!\n", engine->name);
1915 		goto err_wait;
1916 	}
1917 
1918 	igt_flush_test(engine->i915);
1919 
1920 	pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1921 		engine->name,
1922 		intel_context_get_total_runtime_ns(ce),
1923 		intel_context_get_avg_runtime_ns(ce));
1924 
1925 	err = 0;
1926 	if (ce->stats.runtime.num_underflow) {
1927 		pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1928 		       engine->name,
1929 		       ce->stats.runtime.num_underflow,
1930 		       ce->stats.runtime.max_underflow);
1931 		GEM_TRACE_DUMP();
1932 		err = -EOVERFLOW;
1933 	}
1934 
1935 err_wait:
1936 	i915_request_put(rq);
1937 err_rq:
1938 	intel_context_put(ce);
1939 	return err;
1940 }
1941 
1942 static int live_pphwsp_runtime(void *arg)
1943 {
1944 	struct intel_gt *gt = arg;
1945 	struct intel_engine_cs *engine;
1946 	enum intel_engine_id id;
1947 	int err = 0;
1948 
1949 	/*
1950 	 * Check that cumulative context runtime as stored in the pphwsp[16]
1951 	 * is monotonic.
1952 	 */
1953 
1954 	for_each_engine(engine, gt, id) {
1955 		err = __live_pphwsp_runtime(engine);
1956 		if (err)
1957 			break;
1958 	}
1959 
1960 	if (igt_flush_test(gt->i915))
1961 		err = -EIO;
1962 
1963 	return err;
1964 }
1965 
1966 int intel_lrc_live_selftests(struct drm_i915_private *i915)
1967 {
1968 	static const struct i915_subtest tests[] = {
1969 		SUBTEST(live_lrc_layout),
1970 		SUBTEST(live_lrc_fixed),
1971 		SUBTEST(live_lrc_state),
1972 		SUBTEST(live_lrc_gpr),
1973 		SUBTEST(live_lrc_isolation),
1974 		SUBTEST(live_lrc_timestamp),
1975 		SUBTEST(live_lrc_garbage),
1976 		SUBTEST(live_pphwsp_runtime),
1977 		SUBTEST(live_lrc_indirect_ctx_bb),
1978 		SUBTEST(live_lrc_per_ctx_bb),
1979 	};
1980 
1981 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1982 		return 0;
1983 
1984 	return intel_gt_live_subtests(tests, to_gt(i915));
1985 }
1986