xref: /openbsd/sys/dev/pci/drm/i915/gt/uc/selftest_guc.c (revision f005ef32)
11bb76ff1Sjsg // SPDX-License-Identifier: MIT
21bb76ff1Sjsg /*
31bb76ff1Sjsg  * Copyright �� 2021 Intel Corporation
41bb76ff1Sjsg  */
51bb76ff1Sjsg 
6*f005ef32Sjsg #include "gt/intel_gt_print.h"
7*f005ef32Sjsg #include "intel_guc_print.h"
81bb76ff1Sjsg #include "selftests/igt_spinner.h"
91bb76ff1Sjsg #include "selftests/intel_scheduler_helpers.h"
101bb76ff1Sjsg 
request_add_spin(struct i915_request * rq,struct igt_spinner * spin)111bb76ff1Sjsg static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
121bb76ff1Sjsg {
131bb76ff1Sjsg 	int err = 0;
141bb76ff1Sjsg 
151bb76ff1Sjsg 	i915_request_get(rq);
161bb76ff1Sjsg 	i915_request_add(rq);
171bb76ff1Sjsg 	if (spin && !igt_wait_for_spinner(spin, rq))
181bb76ff1Sjsg 		err = -ETIMEDOUT;
191bb76ff1Sjsg 
201bb76ff1Sjsg 	return err;
211bb76ff1Sjsg }
221bb76ff1Sjsg 
nop_user_request(struct intel_context * ce,struct i915_request * from)231bb76ff1Sjsg static struct i915_request *nop_user_request(struct intel_context *ce,
241bb76ff1Sjsg 					     struct i915_request *from)
251bb76ff1Sjsg {
261bb76ff1Sjsg 	struct i915_request *rq;
271bb76ff1Sjsg 	int ret;
281bb76ff1Sjsg 
291bb76ff1Sjsg 	rq = intel_context_create_request(ce);
301bb76ff1Sjsg 	if (IS_ERR(rq))
311bb76ff1Sjsg 		return rq;
321bb76ff1Sjsg 
331bb76ff1Sjsg 	if (from) {
341bb76ff1Sjsg 		ret = i915_sw_fence_await_dma_fence(&rq->submit,
351bb76ff1Sjsg 						    &from->fence, 0,
361bb76ff1Sjsg 						    I915_FENCE_GFP);
371bb76ff1Sjsg 		if (ret < 0) {
381bb76ff1Sjsg 			i915_request_put(rq);
391bb76ff1Sjsg 			return ERR_PTR(ret);
401bb76ff1Sjsg 		}
411bb76ff1Sjsg 	}
421bb76ff1Sjsg 
431bb76ff1Sjsg 	i915_request_get(rq);
441bb76ff1Sjsg 	i915_request_add(rq);
451bb76ff1Sjsg 
461bb76ff1Sjsg 	return rq;
471bb76ff1Sjsg }
481bb76ff1Sjsg 
intel_guc_scrub_ctbs(void * arg)491bb76ff1Sjsg static int intel_guc_scrub_ctbs(void *arg)
501bb76ff1Sjsg {
511bb76ff1Sjsg 	struct intel_gt *gt = arg;
521bb76ff1Sjsg 	int ret = 0;
531bb76ff1Sjsg 	int i;
541bb76ff1Sjsg 	struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
551bb76ff1Sjsg 	intel_wakeref_t wakeref;
561bb76ff1Sjsg 	struct intel_engine_cs *engine;
571bb76ff1Sjsg 	struct intel_context *ce;
581bb76ff1Sjsg 
591bb76ff1Sjsg 	if (!intel_has_gpu_reset(gt))
601bb76ff1Sjsg 		return 0;
611bb76ff1Sjsg 
621bb76ff1Sjsg 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
631bb76ff1Sjsg 	engine = intel_selftest_find_any_engine(gt);
641bb76ff1Sjsg 
651bb76ff1Sjsg 	/* Submit requests and inject errors forcing G2H to be dropped */
661bb76ff1Sjsg 	for (i = 0; i < 3; ++i) {
671bb76ff1Sjsg 		ce = intel_context_create(engine);
681bb76ff1Sjsg 		if (IS_ERR(ce)) {
691bb76ff1Sjsg 			ret = PTR_ERR(ce);
70*f005ef32Sjsg 			gt_err(gt, "Failed to create context %d: %pe\n", i, ce);
711bb76ff1Sjsg 			goto err;
721bb76ff1Sjsg 		}
731bb76ff1Sjsg 
741bb76ff1Sjsg 		switch (i) {
751bb76ff1Sjsg 		case 0:
761bb76ff1Sjsg 			ce->drop_schedule_enable = true;
771bb76ff1Sjsg 			break;
781bb76ff1Sjsg 		case 1:
791bb76ff1Sjsg 			ce->drop_schedule_disable = true;
801bb76ff1Sjsg 			break;
811bb76ff1Sjsg 		case 2:
821bb76ff1Sjsg 			ce->drop_deregister = true;
831bb76ff1Sjsg 			break;
841bb76ff1Sjsg 		}
851bb76ff1Sjsg 
861bb76ff1Sjsg 		rq = nop_user_request(ce, NULL);
871bb76ff1Sjsg 		intel_context_put(ce);
881bb76ff1Sjsg 
891bb76ff1Sjsg 		if (IS_ERR(rq)) {
901bb76ff1Sjsg 			ret = PTR_ERR(rq);
91*f005ef32Sjsg 			gt_err(gt, "Failed to create request %d: %pe\n", i, rq);
921bb76ff1Sjsg 			goto err;
931bb76ff1Sjsg 		}
941bb76ff1Sjsg 
951bb76ff1Sjsg 		last[i] = rq;
961bb76ff1Sjsg 	}
971bb76ff1Sjsg 
981bb76ff1Sjsg 	for (i = 0; i < 3; ++i) {
991bb76ff1Sjsg 		ret = i915_request_wait(last[i], 0, HZ);
1001bb76ff1Sjsg 		if (ret < 0) {
101*f005ef32Sjsg 			gt_err(gt, "Last request failed to complete: %pe\n", ERR_PTR(ret));
1021bb76ff1Sjsg 			goto err;
1031bb76ff1Sjsg 		}
1041bb76ff1Sjsg 		i915_request_put(last[i]);
1051bb76ff1Sjsg 		last[i] = NULL;
1061bb76ff1Sjsg 	}
1071bb76ff1Sjsg 
1081bb76ff1Sjsg 	/* Force all H2G / G2H to be submitted / processed */
1091bb76ff1Sjsg 	intel_gt_retire_requests(gt);
110*f005ef32Sjsg 	drm_msleep(500);
1111bb76ff1Sjsg 
1121bb76ff1Sjsg 	/* Scrub missing G2H */
1131bb76ff1Sjsg 	intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
1141bb76ff1Sjsg 
1151bb76ff1Sjsg 	/* GT will not idle if G2H are lost */
1161bb76ff1Sjsg 	ret = intel_gt_wait_for_idle(gt, HZ);
1171bb76ff1Sjsg 	if (ret < 0) {
118*f005ef32Sjsg 		gt_err(gt, "GT failed to idle: %pe\n", ERR_PTR(ret));
1191bb76ff1Sjsg 		goto err;
1201bb76ff1Sjsg 	}
1211bb76ff1Sjsg 
1221bb76ff1Sjsg err:
1231bb76ff1Sjsg 	for (i = 0; i < 3; ++i)
1241bb76ff1Sjsg 		if (last[i])
1251bb76ff1Sjsg 			i915_request_put(last[i]);
1261bb76ff1Sjsg 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
1271bb76ff1Sjsg 
1281bb76ff1Sjsg 	return ret;
1291bb76ff1Sjsg }
1301bb76ff1Sjsg 
1311bb76ff1Sjsg /*
1321bb76ff1Sjsg  * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
1331bb76ff1Sjsg  *
1341bb76ff1Sjsg  * This test creates a spinner which is used to block all subsequent submissions
1351bb76ff1Sjsg  * until it completes. Next, a loop creates a context and a NOP request each
1361bb76ff1Sjsg  * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
1371bb76ff1Sjsg  * The spinner is ended, unblocking all requests created in the loop. At this
1381bb76ff1Sjsg  * point all guc_ids are exhausted but are available to steal. Try to create
1391bb76ff1Sjsg  * another request which should successfully steal a guc_id. Wait on last
1401bb76ff1Sjsg  * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
1411bb76ff1Sjsg  * exit the test. Test also artificially reduces the number of guc_ids so the
1421bb76ff1Sjsg  * test runs in a timely manner.
1431bb76ff1Sjsg  */
intel_guc_steal_guc_ids(void * arg)1441bb76ff1Sjsg static int intel_guc_steal_guc_ids(void *arg)
1451bb76ff1Sjsg {
1461bb76ff1Sjsg 	struct intel_gt *gt = arg;
1471bb76ff1Sjsg 	struct intel_guc *guc = &gt->uc.guc;
1481bb76ff1Sjsg 	int ret, sv, context_index = 0;
1491bb76ff1Sjsg 	intel_wakeref_t wakeref;
1501bb76ff1Sjsg 	struct intel_engine_cs *engine;
1511bb76ff1Sjsg 	struct intel_context **ce;
1521bb76ff1Sjsg 	struct igt_spinner spin;
1531bb76ff1Sjsg 	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
1541bb76ff1Sjsg 	int number_guc_id_stolen = guc->number_guc_id_stolen;
1551bb76ff1Sjsg 
1561bb76ff1Sjsg 	ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
1571bb76ff1Sjsg 	if (!ce) {
158*f005ef32Sjsg 		guc_err(guc, "Context array allocation failed\n");
1591bb76ff1Sjsg 		return -ENOMEM;
1601bb76ff1Sjsg 	}
1611bb76ff1Sjsg 
1621bb76ff1Sjsg 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
1631bb76ff1Sjsg 	engine = intel_selftest_find_any_engine(gt);
1641bb76ff1Sjsg 	sv = guc->submission_state.num_guc_ids;
1651bb76ff1Sjsg 	guc->submission_state.num_guc_ids = 512;
1661bb76ff1Sjsg 
1671bb76ff1Sjsg 	/* Create spinner to block requests in below loop */
1681bb76ff1Sjsg 	ce[context_index] = intel_context_create(engine);
1691bb76ff1Sjsg 	if (IS_ERR(ce[context_index])) {
1701bb76ff1Sjsg 		ret = PTR_ERR(ce[context_index]);
171*f005ef32Sjsg 		guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
1721bb76ff1Sjsg 		ce[context_index] = NULL;
1731bb76ff1Sjsg 		goto err_wakeref;
1741bb76ff1Sjsg 	}
1751bb76ff1Sjsg 	ret = igt_spinner_init(&spin, engine->gt);
1761bb76ff1Sjsg 	if (ret) {
177*f005ef32Sjsg 		guc_err(guc, "Failed to create spinner: %pe\n", ERR_PTR(ret));
1781bb76ff1Sjsg 		goto err_contexts;
1791bb76ff1Sjsg 	}
1801bb76ff1Sjsg 	spin_rq = igt_spinner_create_request(&spin, ce[context_index],
1811bb76ff1Sjsg 					     MI_ARB_CHECK);
1821bb76ff1Sjsg 	if (IS_ERR(spin_rq)) {
1831bb76ff1Sjsg 		ret = PTR_ERR(spin_rq);
184*f005ef32Sjsg 		guc_err(guc, "Failed to create spinner request: %pe\n", spin_rq);
1851bb76ff1Sjsg 		goto err_contexts;
1861bb76ff1Sjsg 	}
1871bb76ff1Sjsg 	ret = request_add_spin(spin_rq, &spin);
1881bb76ff1Sjsg 	if (ret) {
189*f005ef32Sjsg 		guc_err(guc, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
1901bb76ff1Sjsg 		goto err_spin_rq;
1911bb76ff1Sjsg 	}
1921bb76ff1Sjsg 
1931bb76ff1Sjsg 	/* Use all guc_ids */
1941bb76ff1Sjsg 	while (ret != -EAGAIN) {
1951bb76ff1Sjsg 		ce[++context_index] = intel_context_create(engine);
1961bb76ff1Sjsg 		if (IS_ERR(ce[context_index])) {
197*f005ef32Sjsg 			ret = PTR_ERR(ce[context_index]);
198*f005ef32Sjsg 			guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
199*f005ef32Sjsg 			ce[context_index--] = NULL;
2001bb76ff1Sjsg 			goto err_spin_rq;
2011bb76ff1Sjsg 		}
2021bb76ff1Sjsg 
2031bb76ff1Sjsg 		rq = nop_user_request(ce[context_index], spin_rq);
2041bb76ff1Sjsg 		if (IS_ERR(rq)) {
2051bb76ff1Sjsg 			ret = PTR_ERR(rq);
2061bb76ff1Sjsg 			rq = NULL;
207*f005ef32Sjsg 			if ((ret != -EAGAIN) || !last) {
208*f005ef32Sjsg 				guc_err(guc, "Failed to create %srequest %d: %pe\n",
209*f005ef32Sjsg 					last ? "" : "first ", context_index, ERR_PTR(ret));
2101bb76ff1Sjsg 				goto err_spin_rq;
2111bb76ff1Sjsg 			}
2121bb76ff1Sjsg 		} else {
2131bb76ff1Sjsg 			if (last)
2141bb76ff1Sjsg 				i915_request_put(last);
2151bb76ff1Sjsg 			last = rq;
2161bb76ff1Sjsg 		}
2171bb76ff1Sjsg 	}
2181bb76ff1Sjsg 
2191bb76ff1Sjsg 	/* Release blocked requests */
2201bb76ff1Sjsg 	igt_spinner_end(&spin);
2211bb76ff1Sjsg 	ret = intel_selftest_wait_for_rq(spin_rq);
2221bb76ff1Sjsg 	if (ret) {
223*f005ef32Sjsg 		guc_err(guc, "Spin request failed to complete: %pe\n", ERR_PTR(ret));
2241bb76ff1Sjsg 		i915_request_put(last);
2251bb76ff1Sjsg 		goto err_spin_rq;
2261bb76ff1Sjsg 	}
2271bb76ff1Sjsg 	i915_request_put(spin_rq);
2281bb76ff1Sjsg 	igt_spinner_fini(&spin);
2291bb76ff1Sjsg 	spin_rq = NULL;
2301bb76ff1Sjsg 
2311bb76ff1Sjsg 	/* Wait for last request */
2321bb76ff1Sjsg 	ret = i915_request_wait(last, 0, HZ * 30);
2331bb76ff1Sjsg 	i915_request_put(last);
2341bb76ff1Sjsg 	if (ret < 0) {
235*f005ef32Sjsg 		guc_err(guc, "Last request failed to complete: %pe\n", ERR_PTR(ret));
2361bb76ff1Sjsg 		goto err_spin_rq;
2371bb76ff1Sjsg 	}
2381bb76ff1Sjsg 
2391bb76ff1Sjsg 	/* Try to steal guc_id */
2401bb76ff1Sjsg 	rq = nop_user_request(ce[context_index], NULL);
2411bb76ff1Sjsg 	if (IS_ERR(rq)) {
2421bb76ff1Sjsg 		ret = PTR_ERR(rq);
243*f005ef32Sjsg 		guc_err(guc, "Failed to steal guc_id %d: %pe\n", context_index, rq);
2441bb76ff1Sjsg 		goto err_spin_rq;
2451bb76ff1Sjsg 	}
2461bb76ff1Sjsg 
2471bb76ff1Sjsg 	/* Wait for request with stolen guc_id */
2481bb76ff1Sjsg 	ret = i915_request_wait(rq, 0, HZ);
2491bb76ff1Sjsg 	i915_request_put(rq);
2501bb76ff1Sjsg 	if (ret < 0) {
251*f005ef32Sjsg 		guc_err(guc, "Request with stolen guc_id failed to complete: %pe\n", ERR_PTR(ret));
2521bb76ff1Sjsg 		goto err_spin_rq;
2531bb76ff1Sjsg 	}
2541bb76ff1Sjsg 
2551bb76ff1Sjsg 	/* Wait for idle */
2561bb76ff1Sjsg 	ret = intel_gt_wait_for_idle(gt, HZ * 30);
2571bb76ff1Sjsg 	if (ret < 0) {
258*f005ef32Sjsg 		guc_err(guc, "GT failed to idle: %pe\n", ERR_PTR(ret));
2591bb76ff1Sjsg 		goto err_spin_rq;
2601bb76ff1Sjsg 	}
2611bb76ff1Sjsg 
2621bb76ff1Sjsg 	/* Verify a guc_id was stolen */
2631bb76ff1Sjsg 	if (guc->number_guc_id_stolen == number_guc_id_stolen) {
264*f005ef32Sjsg 		guc_err(guc, "No guc_id was stolen");
2651bb76ff1Sjsg 		ret = -EINVAL;
2661bb76ff1Sjsg 	} else {
2671bb76ff1Sjsg 		ret = 0;
2681bb76ff1Sjsg 	}
2691bb76ff1Sjsg 
2701bb76ff1Sjsg err_spin_rq:
2711bb76ff1Sjsg 	if (spin_rq) {
2721bb76ff1Sjsg 		igt_spinner_end(&spin);
2731bb76ff1Sjsg 		intel_selftest_wait_for_rq(spin_rq);
2741bb76ff1Sjsg 		i915_request_put(spin_rq);
2751bb76ff1Sjsg 		igt_spinner_fini(&spin);
2761bb76ff1Sjsg 		intel_gt_wait_for_idle(gt, HZ * 30);
2771bb76ff1Sjsg 	}
2781bb76ff1Sjsg err_contexts:
2791bb76ff1Sjsg 	for (; context_index >= 0 && ce[context_index]; --context_index)
2801bb76ff1Sjsg 		intel_context_put(ce[context_index]);
2811bb76ff1Sjsg err_wakeref:
2821bb76ff1Sjsg 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
2831bb76ff1Sjsg 	kfree(ce);
2841bb76ff1Sjsg 	guc->submission_state.num_guc_ids = sv;
2851bb76ff1Sjsg 
2861bb76ff1Sjsg 	return ret;
2871bb76ff1Sjsg }
2881bb76ff1Sjsg 
intel_guc_live_selftests(struct drm_i915_private * i915)2891bb76ff1Sjsg int intel_guc_live_selftests(struct drm_i915_private *i915)
2901bb76ff1Sjsg {
2911bb76ff1Sjsg 	static const struct i915_subtest tests[] = {
2921bb76ff1Sjsg 		SUBTEST(intel_guc_scrub_ctbs),
2931bb76ff1Sjsg 		SUBTEST(intel_guc_steal_guc_ids),
2941bb76ff1Sjsg 	};
2951bb76ff1Sjsg 	struct intel_gt *gt = to_gt(i915);
2961bb76ff1Sjsg 
2971bb76ff1Sjsg 	if (intel_gt_is_wedged(gt))
2981bb76ff1Sjsg 		return 0;
2991bb76ff1Sjsg 
3001bb76ff1Sjsg 	if (!intel_uc_uses_guc_submission(&gt->uc))
3011bb76ff1Sjsg 		return 0;
3021bb76ff1Sjsg 
3031bb76ff1Sjsg 	return intel_gt_live_subtests(tests, gt);
3041bb76ff1Sjsg }
305