1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "gt/intel_gt.h"
31 
32 #include "i915_random.h"
33 #include "i915_selftest.h"
34 #include "igt_live_test.h"
35 #include "lib_sw_fence.h"
36 
37 #include "mock_drm.h"
38 #include "mock_gem_device.h"
39 
40 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
41 {
42 	struct intel_engine_cs *engine;
43 	unsigned int count;
44 
45 	count = 0;
46 	for_each_uabi_engine(engine, i915)
47 		count++;
48 
49 	return count;
50 }
51 
52 static int igt_add_request(void *arg)
53 {
54 	struct drm_i915_private *i915 = arg;
55 	struct i915_request *request;
56 
57 	/* Basic preliminary test to create a request and let it loose! */
58 
59 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
60 	if (!request)
61 		return -ENOMEM;
62 
63 	i915_request_add(request);
64 
65 	return 0;
66 }
67 
68 static int igt_wait_request(void *arg)
69 {
70 	const long T = HZ / 4;
71 	struct drm_i915_private *i915 = arg;
72 	struct i915_request *request;
73 	int err = -EINVAL;
74 
75 	/* Submit a request, then wait upon it */
76 
77 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
78 	if (!request)
79 		return -ENOMEM;
80 
81 	i915_request_get(request);
82 
83 	if (i915_request_wait(request, 0, 0) != -ETIME) {
84 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
85 		goto out_request;
86 	}
87 
88 	if (i915_request_wait(request, 0, T) != -ETIME) {
89 		pr_err("request wait succeeded (expected timeout before submit!)\n");
90 		goto out_request;
91 	}
92 
93 	if (i915_request_completed(request)) {
94 		pr_err("request completed before submit!!\n");
95 		goto out_request;
96 	}
97 
98 	i915_request_add(request);
99 
100 	if (i915_request_wait(request, 0, 0) != -ETIME) {
101 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
102 		goto out_request;
103 	}
104 
105 	if (i915_request_completed(request)) {
106 		pr_err("request completed immediately!\n");
107 		goto out_request;
108 	}
109 
110 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
111 		pr_err("request wait succeeded (expected timeout!)\n");
112 		goto out_request;
113 	}
114 
115 	if (i915_request_wait(request, 0, T) == -ETIME) {
116 		pr_err("request wait timed out!\n");
117 		goto out_request;
118 	}
119 
120 	if (!i915_request_completed(request)) {
121 		pr_err("request not complete after waiting!\n");
122 		goto out_request;
123 	}
124 
125 	if (i915_request_wait(request, 0, T) == -ETIME) {
126 		pr_err("request wait timed out when already complete!\n");
127 		goto out_request;
128 	}
129 
130 	err = 0;
131 out_request:
132 	i915_request_put(request);
133 	mock_device_flush(i915);
134 	return err;
135 }
136 
137 static int igt_fence_wait(void *arg)
138 {
139 	const long T = HZ / 4;
140 	struct drm_i915_private *i915 = arg;
141 	struct i915_request *request;
142 	int err = -EINVAL;
143 
144 	/* Submit a request, treat it as a fence and wait upon it */
145 
146 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
147 	if (!request)
148 		return -ENOMEM;
149 
150 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
151 		pr_err("fence wait success before submit (expected timeout)!\n");
152 		goto out;
153 	}
154 
155 	i915_request_add(request);
156 
157 	if (dma_fence_is_signaled(&request->fence)) {
158 		pr_err("fence signaled immediately!\n");
159 		goto out;
160 	}
161 
162 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
163 		pr_err("fence wait success after submit (expected timeout)!\n");
164 		goto out;
165 	}
166 
167 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
168 		pr_err("fence wait timed out (expected success)!\n");
169 		goto out;
170 	}
171 
172 	if (!dma_fence_is_signaled(&request->fence)) {
173 		pr_err("fence unsignaled after waiting!\n");
174 		goto out;
175 	}
176 
177 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
178 		pr_err("fence wait timed out when complete (expected success)!\n");
179 		goto out;
180 	}
181 
182 	err = 0;
183 out:
184 	mock_device_flush(i915);
185 	return err;
186 }
187 
188 static int igt_request_rewind(void *arg)
189 {
190 	struct drm_i915_private *i915 = arg;
191 	struct i915_request *request, *vip;
192 	struct i915_gem_context *ctx[2];
193 	struct intel_context *ce;
194 	int err = -EINVAL;
195 
196 	ctx[0] = mock_context(i915, "A");
197 
198 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
199 	GEM_BUG_ON(IS_ERR(ce));
200 	request = mock_request(ce, 2 * HZ);
201 	intel_context_put(ce);
202 	if (!request) {
203 		err = -ENOMEM;
204 		goto err_context_0;
205 	}
206 
207 	i915_request_get(request);
208 	i915_request_add(request);
209 
210 	ctx[1] = mock_context(i915, "B");
211 
212 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
213 	GEM_BUG_ON(IS_ERR(ce));
214 	vip = mock_request(ce, 0);
215 	intel_context_put(ce);
216 	if (!vip) {
217 		err = -ENOMEM;
218 		goto err_context_1;
219 	}
220 
221 	/* Simulate preemption by manual reordering */
222 	if (!mock_cancel_request(request)) {
223 		pr_err("failed to cancel request (already executed)!\n");
224 		i915_request_add(vip);
225 		goto err_context_1;
226 	}
227 	i915_request_get(vip);
228 	i915_request_add(vip);
229 	rcu_read_lock();
230 	request->engine->submit_request(request);
231 	rcu_read_unlock();
232 
233 
234 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
235 		pr_err("timed out waiting for high priority request\n");
236 		goto err;
237 	}
238 
239 	if (i915_request_completed(request)) {
240 		pr_err("low priority request already completed\n");
241 		goto err;
242 	}
243 
244 	err = 0;
245 err:
246 	i915_request_put(vip);
247 err_context_1:
248 	mock_context_close(ctx[1]);
249 	i915_request_put(request);
250 err_context_0:
251 	mock_context_close(ctx[0]);
252 	mock_device_flush(i915);
253 	return err;
254 }
255 
256 struct smoketest {
257 	struct intel_engine_cs *engine;
258 	struct i915_gem_context **contexts;
259 	atomic_long_t num_waits, num_fences;
260 	int ncontexts, max_batch;
261 	struct i915_request *(*request_alloc)(struct intel_context *ce);
262 };
263 
264 static struct i915_request *
265 __mock_request_alloc(struct intel_context *ce)
266 {
267 	return mock_request(ce, 0);
268 }
269 
270 static struct i915_request *
271 __live_request_alloc(struct intel_context *ce)
272 {
273 	return intel_context_create_request(ce);
274 }
275 
276 static int __igt_breadcrumbs_smoketest(void *arg)
277 {
278 	struct smoketest *t = arg;
279 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
280 	const unsigned int total = 4 * t->ncontexts + 1;
281 	unsigned int num_waits = 0, num_fences = 0;
282 	struct i915_request **requests;
283 	I915_RND_STATE(prng);
284 	unsigned int *order;
285 	int err = 0;
286 
287 	/*
288 	 * A very simple test to catch the most egregious of list handling bugs.
289 	 *
290 	 * At its heart, we simply create oodles of requests running across
291 	 * multiple kthreads and enable signaling on them, for the sole purpose
292 	 * of stressing our breadcrumb handling. The only inspection we do is
293 	 * that the fences were marked as signaled.
294 	 */
295 
296 	requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
297 	if (!requests)
298 		return -ENOMEM;
299 
300 	order = i915_random_order(total, &prng);
301 	if (!order) {
302 		err = -ENOMEM;
303 		goto out_requests;
304 	}
305 
306 	while (!kthread_should_stop()) {
307 		struct i915_sw_fence *submit, *wait;
308 		unsigned int n, count;
309 
310 		submit = heap_fence_create(GFP_KERNEL);
311 		if (!submit) {
312 			err = -ENOMEM;
313 			break;
314 		}
315 
316 		wait = heap_fence_create(GFP_KERNEL);
317 		if (!wait) {
318 			i915_sw_fence_commit(submit);
319 			heap_fence_put(submit);
320 			err = ENOMEM;
321 			break;
322 		}
323 
324 		i915_random_reorder(order, total, &prng);
325 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
326 
327 		for (n = 0; n < count; n++) {
328 			struct i915_gem_context *ctx =
329 				t->contexts[order[n] % t->ncontexts];
330 			struct i915_request *rq;
331 			struct intel_context *ce;
332 
333 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
334 			GEM_BUG_ON(IS_ERR(ce));
335 			rq = t->request_alloc(ce);
336 			intel_context_put(ce);
337 			if (IS_ERR(rq)) {
338 				err = PTR_ERR(rq);
339 				count = n;
340 				break;
341 			}
342 
343 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
344 							       submit,
345 							       GFP_KERNEL);
346 
347 			requests[n] = i915_request_get(rq);
348 			i915_request_add(rq);
349 
350 			if (err >= 0)
351 				err = i915_sw_fence_await_dma_fence(wait,
352 								    &rq->fence,
353 								    0,
354 								    GFP_KERNEL);
355 
356 			if (err < 0) {
357 				i915_request_put(rq);
358 				count = n;
359 				break;
360 			}
361 		}
362 
363 		i915_sw_fence_commit(submit);
364 		i915_sw_fence_commit(wait);
365 
366 		if (!wait_event_timeout(wait->wait,
367 					i915_sw_fence_done(wait),
368 					5 * HZ)) {
369 			struct i915_request *rq = requests[count - 1];
370 
371 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
372 			       atomic_read(&wait->pending), count,
373 			       rq->fence.context, rq->fence.seqno,
374 			       t->engine->name);
375 			GEM_TRACE_DUMP();
376 
377 			intel_gt_set_wedged(t->engine->gt);
378 			GEM_BUG_ON(!i915_request_completed(rq));
379 			i915_sw_fence_wait(wait);
380 			err = -EIO;
381 		}
382 
383 		for (n = 0; n < count; n++) {
384 			struct i915_request *rq = requests[n];
385 
386 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
387 				      &rq->fence.flags)) {
388 				pr_err("%llu:%llu was not signaled!\n",
389 				       rq->fence.context, rq->fence.seqno);
390 				err = -EINVAL;
391 			}
392 
393 			i915_request_put(rq);
394 		}
395 
396 		heap_fence_put(wait);
397 		heap_fence_put(submit);
398 
399 		if (err < 0)
400 			break;
401 
402 		num_fences += count;
403 		num_waits++;
404 
405 		cond_resched();
406 	}
407 
408 	atomic_long_add(num_fences, &t->num_fences);
409 	atomic_long_add(num_waits, &t->num_waits);
410 
411 	kfree(order);
412 out_requests:
413 	kfree(requests);
414 	return err;
415 }
416 
417 static int mock_breadcrumbs_smoketest(void *arg)
418 {
419 	struct drm_i915_private *i915 = arg;
420 	struct smoketest t = {
421 		.engine = i915->engine[RCS0],
422 		.ncontexts = 1024,
423 		.max_batch = 1024,
424 		.request_alloc = __mock_request_alloc
425 	};
426 	unsigned int ncpus = num_online_cpus();
427 	struct task_struct **threads;
428 	unsigned int n;
429 	int ret = 0;
430 
431 	/*
432 	 * Smoketest our breadcrumb/signal handling for requests across multiple
433 	 * threads. A very simple test to only catch the most egregious of bugs.
434 	 * See __igt_breadcrumbs_smoketest();
435 	 */
436 
437 	threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
438 	if (!threads)
439 		return -ENOMEM;
440 
441 	t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
442 	if (!t.contexts) {
443 		ret = -ENOMEM;
444 		goto out_threads;
445 	}
446 
447 	for (n = 0; n < t.ncontexts; n++) {
448 		t.contexts[n] = mock_context(t.engine->i915, "mock");
449 		if (!t.contexts[n]) {
450 			ret = -ENOMEM;
451 			goto out_contexts;
452 		}
453 	}
454 
455 	for (n = 0; n < ncpus; n++) {
456 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
457 					 &t, "igt/%d", n);
458 		if (IS_ERR(threads[n])) {
459 			ret = PTR_ERR(threads[n]);
460 			ncpus = n;
461 			break;
462 		}
463 
464 		get_task_struct(threads[n]);
465 	}
466 
467 	yield(); /* start all threads before we begin */
468 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
469 
470 	for (n = 0; n < ncpus; n++) {
471 		int err;
472 
473 		err = kthread_stop(threads[n]);
474 		if (err < 0 && !ret)
475 			ret = err;
476 
477 		put_task_struct(threads[n]);
478 	}
479 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
480 		atomic_long_read(&t.num_waits),
481 		atomic_long_read(&t.num_fences),
482 		ncpus);
483 
484 out_contexts:
485 	for (n = 0; n < t.ncontexts; n++) {
486 		if (!t.contexts[n])
487 			break;
488 		mock_context_close(t.contexts[n]);
489 	}
490 	kfree(t.contexts);
491 out_threads:
492 	kfree(threads);
493 	return ret;
494 }
495 
496 int i915_request_mock_selftests(void)
497 {
498 	static const struct i915_subtest tests[] = {
499 		SUBTEST(igt_add_request),
500 		SUBTEST(igt_wait_request),
501 		SUBTEST(igt_fence_wait),
502 		SUBTEST(igt_request_rewind),
503 		SUBTEST(mock_breadcrumbs_smoketest),
504 	};
505 	struct drm_i915_private *i915;
506 	intel_wakeref_t wakeref;
507 	int err = 0;
508 
509 	i915 = mock_gem_device();
510 	if (!i915)
511 		return -ENOMEM;
512 
513 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
514 		err = i915_subtests(tests, i915);
515 
516 	drm_dev_put(&i915->drm);
517 
518 	return err;
519 }
520 
521 static int live_nop_request(void *arg)
522 {
523 	struct drm_i915_private *i915 = arg;
524 	struct intel_engine_cs *engine;
525 	struct igt_live_test t;
526 	int err = -ENODEV;
527 
528 	/*
529 	 * Submit various sized batches of empty requests, to each engine
530 	 * (individually), and wait for the batch to complete. We can check
531 	 * the overhead of submitting requests to the hardware.
532 	 */
533 
534 	for_each_uabi_engine(engine, i915) {
535 		unsigned long n, prime;
536 		IGT_TIMEOUT(end_time);
537 		ktime_t times[2] = {};
538 
539 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
540 		if (err)
541 			return err;
542 
543 		for_each_prime_number_from(prime, 1, 8192) {
544 			struct i915_request *request = NULL;
545 
546 			times[1] = ktime_get_raw();
547 
548 			for (n = 0; n < prime; n++) {
549 				i915_request_put(request);
550 				request = i915_request_create(engine->kernel_context);
551 				if (IS_ERR(request))
552 					return PTR_ERR(request);
553 
554 				/*
555 				 * This space is left intentionally blank.
556 				 *
557 				 * We do not actually want to perform any
558 				 * action with this request, we just want
559 				 * to measure the latency in allocation
560 				 * and submission of our breadcrumbs -
561 				 * ensuring that the bare request is sufficient
562 				 * for the system to work (i.e. proper HEAD
563 				 * tracking of the rings, interrupt handling,
564 				 * etc). It also gives us the lowest bounds
565 				 * for latency.
566 				 */
567 
568 				i915_request_get(request);
569 				i915_request_add(request);
570 			}
571 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
572 			i915_request_put(request);
573 
574 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
575 			if (prime == 1)
576 				times[0] = times[1];
577 
578 			if (__igt_timeout(end_time, NULL))
579 				break;
580 		}
581 
582 		err = igt_live_test_end(&t);
583 		if (err)
584 			return err;
585 
586 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
587 			engine->name,
588 			ktime_to_ns(times[0]),
589 			prime, div64_u64(ktime_to_ns(times[1]), prime));
590 	}
591 
592 	return err;
593 }
594 
595 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
596 {
597 	struct drm_i915_gem_object *obj;
598 	struct i915_vma *vma;
599 	u32 *cmd;
600 	int err;
601 
602 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
603 	if (IS_ERR(obj))
604 		return ERR_CAST(obj);
605 
606 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
607 	if (IS_ERR(cmd)) {
608 		err = PTR_ERR(cmd);
609 		goto err;
610 	}
611 
612 	*cmd = MI_BATCH_BUFFER_END;
613 
614 	__i915_gem_object_flush_map(obj, 0, 64);
615 	i915_gem_object_unpin_map(obj);
616 
617 	intel_gt_chipset_flush(&i915->gt);
618 
619 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
620 	if (IS_ERR(vma)) {
621 		err = PTR_ERR(vma);
622 		goto err;
623 	}
624 
625 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
626 	if (err)
627 		goto err;
628 
629 	/* Force the wait wait now to avoid including it in the benchmark */
630 	err = i915_vma_sync(vma);
631 	if (err)
632 		goto err_pin;
633 
634 	return vma;
635 
636 err_pin:
637 	i915_vma_unpin(vma);
638 err:
639 	i915_gem_object_put(obj);
640 	return ERR_PTR(err);
641 }
642 
643 static struct i915_request *
644 empty_request(struct intel_engine_cs *engine,
645 	      struct i915_vma *batch)
646 {
647 	struct i915_request *request;
648 	int err;
649 
650 	request = i915_request_create(engine->kernel_context);
651 	if (IS_ERR(request))
652 		return request;
653 
654 	err = engine->emit_bb_start(request,
655 				    batch->node.start,
656 				    batch->node.size,
657 				    I915_DISPATCH_SECURE);
658 	if (err)
659 		goto out_request;
660 
661 	i915_request_get(request);
662 out_request:
663 	i915_request_add(request);
664 	return err ? ERR_PTR(err) : request;
665 }
666 
667 static int live_empty_request(void *arg)
668 {
669 	struct drm_i915_private *i915 = arg;
670 	struct intel_engine_cs *engine;
671 	struct igt_live_test t;
672 	struct i915_vma *batch;
673 	int err = 0;
674 
675 	/*
676 	 * Submit various sized batches of empty requests, to each engine
677 	 * (individually), and wait for the batch to complete. We can check
678 	 * the overhead of submitting requests to the hardware.
679 	 */
680 
681 	batch = empty_batch(i915);
682 	if (IS_ERR(batch))
683 		return PTR_ERR(batch);
684 
685 	for_each_uabi_engine(engine, i915) {
686 		IGT_TIMEOUT(end_time);
687 		struct i915_request *request;
688 		unsigned long n, prime;
689 		ktime_t times[2] = {};
690 
691 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
692 		if (err)
693 			goto out_batch;
694 
695 		/* Warmup / preload */
696 		request = empty_request(engine, batch);
697 		if (IS_ERR(request)) {
698 			err = PTR_ERR(request);
699 			goto out_batch;
700 		}
701 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
702 
703 		for_each_prime_number_from(prime, 1, 8192) {
704 			times[1] = ktime_get_raw();
705 
706 			for (n = 0; n < prime; n++) {
707 				i915_request_put(request);
708 				request = empty_request(engine, batch);
709 				if (IS_ERR(request)) {
710 					err = PTR_ERR(request);
711 					goto out_batch;
712 				}
713 			}
714 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
715 
716 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
717 			if (prime == 1)
718 				times[0] = times[1];
719 
720 			if (__igt_timeout(end_time, NULL))
721 				break;
722 		}
723 		i915_request_put(request);
724 
725 		err = igt_live_test_end(&t);
726 		if (err)
727 			goto out_batch;
728 
729 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
730 			engine->name,
731 			ktime_to_ns(times[0]),
732 			prime, div64_u64(ktime_to_ns(times[1]), prime));
733 	}
734 
735 out_batch:
736 	i915_vma_unpin(batch);
737 	i915_vma_put(batch);
738 	return err;
739 }
740 
741 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
742 {
743 	struct i915_gem_context *ctx = i915->kernel_context;
744 	struct drm_i915_gem_object *obj;
745 	const int gen = INTEL_GEN(i915);
746 	struct i915_address_space *vm;
747 	struct i915_vma *vma;
748 	u32 *cmd;
749 	int err;
750 
751 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
752 	if (IS_ERR(obj))
753 		return ERR_CAST(obj);
754 
755 	vm = i915_gem_context_get_vm_rcu(ctx);
756 	vma = i915_vma_instance(obj, vm, NULL);
757 	i915_vm_put(vm);
758 	if (IS_ERR(vma)) {
759 		err = PTR_ERR(vma);
760 		goto err;
761 	}
762 
763 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
764 	if (err)
765 		goto err;
766 
767 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
768 	if (IS_ERR(cmd)) {
769 		err = PTR_ERR(cmd);
770 		goto err;
771 	}
772 
773 	if (gen >= 8) {
774 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
775 		*cmd++ = lower_32_bits(vma->node.start);
776 		*cmd++ = upper_32_bits(vma->node.start);
777 	} else if (gen >= 6) {
778 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
779 		*cmd++ = lower_32_bits(vma->node.start);
780 	} else {
781 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
782 		*cmd++ = lower_32_bits(vma->node.start);
783 	}
784 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
785 
786 	__i915_gem_object_flush_map(obj, 0, 64);
787 	i915_gem_object_unpin_map(obj);
788 
789 	intel_gt_chipset_flush(&i915->gt);
790 
791 	return vma;
792 
793 err:
794 	i915_gem_object_put(obj);
795 	return ERR_PTR(err);
796 }
797 
798 static int recursive_batch_resolve(struct i915_vma *batch)
799 {
800 	u32 *cmd;
801 
802 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
803 	if (IS_ERR(cmd))
804 		return PTR_ERR(cmd);
805 
806 	*cmd = MI_BATCH_BUFFER_END;
807 	intel_gt_chipset_flush(batch->vm->gt);
808 
809 	i915_gem_object_unpin_map(batch->obj);
810 
811 	return 0;
812 }
813 
814 static int live_all_engines(void *arg)
815 {
816 	struct drm_i915_private *i915 = arg;
817 	const unsigned int nengines = num_uabi_engines(i915);
818 	struct intel_engine_cs *engine;
819 	struct i915_request **request;
820 	struct igt_live_test t;
821 	struct i915_vma *batch;
822 	unsigned int idx;
823 	int err;
824 
825 	/*
826 	 * Check we can submit requests to all engines simultaneously. We
827 	 * send a recursive batch to each engine - checking that we don't
828 	 * block doing so, and that they don't complete too soon.
829 	 */
830 
831 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
832 	if (!request)
833 		return -ENOMEM;
834 
835 	err = igt_live_test_begin(&t, i915, __func__, "");
836 	if (err)
837 		goto out_free;
838 
839 	batch = recursive_batch(i915);
840 	if (IS_ERR(batch)) {
841 		err = PTR_ERR(batch);
842 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
843 		goto out_free;
844 	}
845 
846 	idx = 0;
847 	for_each_uabi_engine(engine, i915) {
848 		request[idx] = i915_request_create(engine->kernel_context);
849 		if (IS_ERR(request[idx])) {
850 			err = PTR_ERR(request[idx]);
851 			pr_err("%s: Request allocation failed with err=%d\n",
852 			       __func__, err);
853 			goto out_request;
854 		}
855 
856 		err = engine->emit_bb_start(request[idx],
857 					    batch->node.start,
858 					    batch->node.size,
859 					    0);
860 		GEM_BUG_ON(err);
861 		request[idx]->batch = batch;
862 
863 		i915_vma_lock(batch);
864 		err = i915_request_await_object(request[idx], batch->obj, 0);
865 		if (err == 0)
866 			err = i915_vma_move_to_active(batch, request[idx], 0);
867 		i915_vma_unlock(batch);
868 		GEM_BUG_ON(err);
869 
870 		i915_request_get(request[idx]);
871 		i915_request_add(request[idx]);
872 		idx++;
873 	}
874 
875 	idx = 0;
876 	for_each_uabi_engine(engine, i915) {
877 		if (i915_request_completed(request[idx])) {
878 			pr_err("%s(%s): request completed too early!\n",
879 			       __func__, engine->name);
880 			err = -EINVAL;
881 			goto out_request;
882 		}
883 		idx++;
884 	}
885 
886 	err = recursive_batch_resolve(batch);
887 	if (err) {
888 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
889 		goto out_request;
890 	}
891 
892 	idx = 0;
893 	for_each_uabi_engine(engine, i915) {
894 		long timeout;
895 
896 		timeout = i915_request_wait(request[idx], 0,
897 					    MAX_SCHEDULE_TIMEOUT);
898 		if (timeout < 0) {
899 			err = timeout;
900 			pr_err("%s: error waiting for request on %s, err=%d\n",
901 			       __func__, engine->name, err);
902 			goto out_request;
903 		}
904 
905 		GEM_BUG_ON(!i915_request_completed(request[idx]));
906 		i915_request_put(request[idx]);
907 		request[idx] = NULL;
908 		idx++;
909 	}
910 
911 	err = igt_live_test_end(&t);
912 
913 out_request:
914 	idx = 0;
915 	for_each_uabi_engine(engine, i915) {
916 		if (request[idx])
917 			i915_request_put(request[idx]);
918 		idx++;
919 	}
920 	i915_vma_unpin(batch);
921 	i915_vma_put(batch);
922 out_free:
923 	kfree(request);
924 	return err;
925 }
926 
927 static int live_sequential_engines(void *arg)
928 {
929 	struct drm_i915_private *i915 = arg;
930 	const unsigned int nengines = num_uabi_engines(i915);
931 	struct i915_request **request;
932 	struct i915_request *prev = NULL;
933 	struct intel_engine_cs *engine;
934 	struct igt_live_test t;
935 	unsigned int idx;
936 	int err;
937 
938 	/*
939 	 * Check we can submit requests to all engines sequentially, such
940 	 * that each successive request waits for the earlier ones. This
941 	 * tests that we don't execute requests out of order, even though
942 	 * they are running on independent engines.
943 	 */
944 
945 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
946 	if (!request)
947 		return -ENOMEM;
948 
949 	err = igt_live_test_begin(&t, i915, __func__, "");
950 	if (err)
951 		goto out_free;
952 
953 	idx = 0;
954 	for_each_uabi_engine(engine, i915) {
955 		struct i915_vma *batch;
956 
957 		batch = recursive_batch(i915);
958 		if (IS_ERR(batch)) {
959 			err = PTR_ERR(batch);
960 			pr_err("%s: Unable to create batch for %s, err=%d\n",
961 			       __func__, engine->name, err);
962 			goto out_free;
963 		}
964 
965 		request[idx] = i915_request_create(engine->kernel_context);
966 		if (IS_ERR(request[idx])) {
967 			err = PTR_ERR(request[idx]);
968 			pr_err("%s: Request allocation failed for %s with err=%d\n",
969 			       __func__, engine->name, err);
970 			goto out_request;
971 		}
972 
973 		if (prev) {
974 			err = i915_request_await_dma_fence(request[idx],
975 							   &prev->fence);
976 			if (err) {
977 				i915_request_add(request[idx]);
978 				pr_err("%s: Request await failed for %s with err=%d\n",
979 				       __func__, engine->name, err);
980 				goto out_request;
981 			}
982 		}
983 
984 		err = engine->emit_bb_start(request[idx],
985 					    batch->node.start,
986 					    batch->node.size,
987 					    0);
988 		GEM_BUG_ON(err);
989 		request[idx]->batch = batch;
990 
991 		i915_vma_lock(batch);
992 		err = i915_request_await_object(request[idx],
993 						batch->obj, false);
994 		if (err == 0)
995 			err = i915_vma_move_to_active(batch, request[idx], 0);
996 		i915_vma_unlock(batch);
997 		GEM_BUG_ON(err);
998 
999 		i915_request_get(request[idx]);
1000 		i915_request_add(request[idx]);
1001 
1002 		prev = request[idx];
1003 		idx++;
1004 	}
1005 
1006 	idx = 0;
1007 	for_each_uabi_engine(engine, i915) {
1008 		long timeout;
1009 
1010 		if (i915_request_completed(request[idx])) {
1011 			pr_err("%s(%s): request completed too early!\n",
1012 			       __func__, engine->name);
1013 			err = -EINVAL;
1014 			goto out_request;
1015 		}
1016 
1017 		err = recursive_batch_resolve(request[idx]->batch);
1018 		if (err) {
1019 			pr_err("%s: failed to resolve batch, err=%d\n",
1020 			       __func__, err);
1021 			goto out_request;
1022 		}
1023 
1024 		timeout = i915_request_wait(request[idx], 0,
1025 					    MAX_SCHEDULE_TIMEOUT);
1026 		if (timeout < 0) {
1027 			err = timeout;
1028 			pr_err("%s: error waiting for request on %s, err=%d\n",
1029 			       __func__, engine->name, err);
1030 			goto out_request;
1031 		}
1032 
1033 		GEM_BUG_ON(!i915_request_completed(request[idx]));
1034 		idx++;
1035 	}
1036 
1037 	err = igt_live_test_end(&t);
1038 
1039 out_request:
1040 	idx = 0;
1041 	for_each_uabi_engine(engine, i915) {
1042 		u32 *cmd;
1043 
1044 		if (!request[idx])
1045 			break;
1046 
1047 		cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1048 					      I915_MAP_WC);
1049 		if (!IS_ERR(cmd)) {
1050 			*cmd = MI_BATCH_BUFFER_END;
1051 			intel_gt_chipset_flush(engine->gt);
1052 
1053 			i915_gem_object_unpin_map(request[idx]->batch->obj);
1054 		}
1055 
1056 		i915_vma_put(request[idx]->batch);
1057 		i915_request_put(request[idx]);
1058 		idx++;
1059 	}
1060 out_free:
1061 	kfree(request);
1062 	return err;
1063 }
1064 
1065 static int __live_parallel_engine1(void *arg)
1066 {
1067 	struct intel_engine_cs *engine = arg;
1068 	IGT_TIMEOUT(end_time);
1069 	unsigned long count;
1070 
1071 	count = 0;
1072 	do {
1073 		struct i915_request *rq;
1074 		int err;
1075 
1076 		rq = i915_request_create(engine->kernel_context);
1077 		if (IS_ERR(rq))
1078 			return PTR_ERR(rq);
1079 
1080 		i915_request_get(rq);
1081 		i915_request_add(rq);
1082 
1083 		err = 0;
1084 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1085 			err = -ETIME;
1086 		i915_request_put(rq);
1087 		if (err)
1088 			return err;
1089 
1090 		count++;
1091 	} while (!__igt_timeout(end_time, NULL));
1092 
1093 	pr_info("%s: %lu request + sync\n", engine->name, count);
1094 	return 0;
1095 }
1096 
1097 static int __live_parallel_engineN(void *arg)
1098 {
1099 	struct intel_engine_cs *engine = arg;
1100 	IGT_TIMEOUT(end_time);
1101 	unsigned long count;
1102 
1103 	count = 0;
1104 	do {
1105 		struct i915_request *rq;
1106 
1107 		rq = i915_request_create(engine->kernel_context);
1108 		if (IS_ERR(rq))
1109 			return PTR_ERR(rq);
1110 
1111 		i915_request_add(rq);
1112 		count++;
1113 	} while (!__igt_timeout(end_time, NULL));
1114 
1115 	pr_info("%s: %lu requests\n", engine->name, count);
1116 	return 0;
1117 }
1118 
1119 static int live_parallel_engines(void *arg)
1120 {
1121 	struct drm_i915_private *i915 = arg;
1122 	static int (* const func[])(void *arg) = {
1123 		__live_parallel_engine1,
1124 		__live_parallel_engineN,
1125 		NULL,
1126 	};
1127 	const unsigned int nengines = num_uabi_engines(i915);
1128 	struct intel_engine_cs *engine;
1129 	int (* const *fn)(void *arg);
1130 	struct task_struct **tsk;
1131 	int err = 0;
1132 
1133 	/*
1134 	 * Check we can submit requests to all engines concurrently. This
1135 	 * tests that we load up the system maximally.
1136 	 */
1137 
1138 	tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1139 	if (!tsk)
1140 		return -ENOMEM;
1141 
1142 	for (fn = func; !err && *fn; fn++) {
1143 		struct igt_live_test t;
1144 		unsigned int idx;
1145 
1146 		err = igt_live_test_begin(&t, i915, __func__, "");
1147 		if (err)
1148 			break;
1149 
1150 		idx = 0;
1151 		for_each_uabi_engine(engine, i915) {
1152 			tsk[idx] = kthread_run(*fn, engine,
1153 					       "igt/parallel:%s",
1154 					       engine->name);
1155 			if (IS_ERR(tsk[idx])) {
1156 				err = PTR_ERR(tsk[idx]);
1157 				break;
1158 			}
1159 			get_task_struct(tsk[idx++]);
1160 		}
1161 
1162 		yield(); /* start all threads before we kthread_stop() */
1163 
1164 		idx = 0;
1165 		for_each_uabi_engine(engine, i915) {
1166 			int status;
1167 
1168 			if (IS_ERR(tsk[idx]))
1169 				break;
1170 
1171 			status = kthread_stop(tsk[idx]);
1172 			if (status && !err)
1173 				err = status;
1174 
1175 			put_task_struct(tsk[idx++]);
1176 		}
1177 
1178 		if (igt_live_test_end(&t))
1179 			err = -EIO;
1180 	}
1181 
1182 	kfree(tsk);
1183 	return err;
1184 }
1185 
1186 static int
1187 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1188 {
1189 	struct i915_request *rq;
1190 	int ret;
1191 
1192 	/*
1193 	 * Before execlists, all contexts share the same ringbuffer. With
1194 	 * execlists, each context/engine has a separate ringbuffer and
1195 	 * for the purposes of this test, inexhaustible.
1196 	 *
1197 	 * For the global ringbuffer though, we have to be very careful
1198 	 * that we do not wrap while preventing the execution of requests
1199 	 * with a unsignaled fence.
1200 	 */
1201 	if (HAS_EXECLISTS(ctx->i915))
1202 		return INT_MAX;
1203 
1204 	rq = igt_request_alloc(ctx, engine);
1205 	if (IS_ERR(rq)) {
1206 		ret = PTR_ERR(rq);
1207 	} else {
1208 		int sz;
1209 
1210 		ret = rq->ring->size - rq->reserved_space;
1211 		i915_request_add(rq);
1212 
1213 		sz = rq->ring->emit - rq->head;
1214 		if (sz < 0)
1215 			sz += rq->ring->size;
1216 		ret /= sz;
1217 		ret /= 2; /* leave half spare, in case of emergency! */
1218 	}
1219 
1220 	return ret;
1221 }
1222 
1223 static int live_breadcrumbs_smoketest(void *arg)
1224 {
1225 	struct drm_i915_private *i915 = arg;
1226 	const unsigned int nengines = num_uabi_engines(i915);
1227 	const unsigned int ncpus = num_online_cpus();
1228 	unsigned long num_waits, num_fences;
1229 	struct intel_engine_cs *engine;
1230 	struct task_struct **threads;
1231 	struct igt_live_test live;
1232 	intel_wakeref_t wakeref;
1233 	struct drm_file *file;
1234 	struct smoketest *smoke;
1235 	unsigned int n, idx;
1236 	int ret = 0;
1237 
1238 	/*
1239 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1240 	 * threads. A very simple test to only catch the most egregious of bugs.
1241 	 * See __igt_breadcrumbs_smoketest();
1242 	 *
1243 	 * On real hardware this time.
1244 	 */
1245 
1246 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1247 
1248 	file = mock_file(i915);
1249 	if (IS_ERR(file)) {
1250 		ret = PTR_ERR(file);
1251 		goto out_rpm;
1252 	}
1253 
1254 	smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1255 	if (!smoke) {
1256 		ret = -ENOMEM;
1257 		goto out_file;
1258 	}
1259 
1260 	threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1261 	if (!threads) {
1262 		ret = -ENOMEM;
1263 		goto out_smoke;
1264 	}
1265 
1266 	smoke[0].request_alloc = __live_request_alloc;
1267 	smoke[0].ncontexts = 64;
1268 	smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1269 				    sizeof(*smoke[0].contexts),
1270 				    GFP_KERNEL);
1271 	if (!smoke[0].contexts) {
1272 		ret = -ENOMEM;
1273 		goto out_threads;
1274 	}
1275 
1276 	for (n = 0; n < smoke[0].ncontexts; n++) {
1277 		smoke[0].contexts[n] = live_context(i915, file);
1278 		if (!smoke[0].contexts[n]) {
1279 			ret = -ENOMEM;
1280 			goto out_contexts;
1281 		}
1282 	}
1283 
1284 	ret = igt_live_test_begin(&live, i915, __func__, "");
1285 	if (ret)
1286 		goto out_contexts;
1287 
1288 	idx = 0;
1289 	for_each_uabi_engine(engine, i915) {
1290 		smoke[idx] = smoke[0];
1291 		smoke[idx].engine = engine;
1292 		smoke[idx].max_batch =
1293 			max_batches(smoke[0].contexts[0], engine);
1294 		if (smoke[idx].max_batch < 0) {
1295 			ret = smoke[idx].max_batch;
1296 			goto out_flush;
1297 		}
1298 		/* One ring interleaved between requests from all cpus */
1299 		smoke[idx].max_batch /= num_online_cpus() + 1;
1300 		pr_debug("Limiting batches to %d requests on %s\n",
1301 			 smoke[idx].max_batch, engine->name);
1302 
1303 		for (n = 0; n < ncpus; n++) {
1304 			struct task_struct *tsk;
1305 
1306 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1307 					  &smoke[idx], "igt/%d.%d", idx, n);
1308 			if (IS_ERR(tsk)) {
1309 				ret = PTR_ERR(tsk);
1310 				goto out_flush;
1311 			}
1312 
1313 			get_task_struct(tsk);
1314 			threads[idx * ncpus + n] = tsk;
1315 		}
1316 
1317 		idx++;
1318 	}
1319 
1320 	yield(); /* start all threads before we begin */
1321 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1322 
1323 out_flush:
1324 	idx = 0;
1325 	num_waits = 0;
1326 	num_fences = 0;
1327 	for_each_uabi_engine(engine, i915) {
1328 		for (n = 0; n < ncpus; n++) {
1329 			struct task_struct *tsk = threads[idx * ncpus + n];
1330 			int err;
1331 
1332 			if (!tsk)
1333 				continue;
1334 
1335 			err = kthread_stop(tsk);
1336 			if (err < 0 && !ret)
1337 				ret = err;
1338 
1339 			put_task_struct(tsk);
1340 		}
1341 
1342 		num_waits += atomic_long_read(&smoke[idx].num_waits);
1343 		num_fences += atomic_long_read(&smoke[idx].num_fences);
1344 		idx++;
1345 	}
1346 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1347 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1348 
1349 	ret = igt_live_test_end(&live) ?: ret;
1350 out_contexts:
1351 	kfree(smoke[0].contexts);
1352 out_threads:
1353 	kfree(threads);
1354 out_smoke:
1355 	kfree(smoke);
1356 out_file:
1357 	mock_file_free(i915, file);
1358 out_rpm:
1359 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1360 
1361 	return ret;
1362 }
1363 
1364 int i915_request_live_selftests(struct drm_i915_private *i915)
1365 {
1366 	static const struct i915_subtest tests[] = {
1367 		SUBTEST(live_nop_request),
1368 		SUBTEST(live_all_engines),
1369 		SUBTEST(live_sequential_engines),
1370 		SUBTEST(live_parallel_engines),
1371 		SUBTEST(live_empty_request),
1372 		SUBTEST(live_breadcrumbs_smoketest),
1373 	};
1374 
1375 	if (intel_gt_is_wedged(&i915->gt))
1376 		return 0;
1377 
1378 	return i915_subtests(tests, i915);
1379 }
1380