11bb76ff1Sjsg // SPDX-License-Identifier: MIT
21bb76ff1Sjsg /*
31bb76ff1Sjsg * Copyright �� 2021 Intel Corporation
41bb76ff1Sjsg */
51bb76ff1Sjsg
6*f005ef32Sjsg #include "gt/intel_gt_print.h"
7*f005ef32Sjsg #include "intel_guc_print.h"
81bb76ff1Sjsg #include "selftests/igt_spinner.h"
91bb76ff1Sjsg #include "selftests/intel_scheduler_helpers.h"
101bb76ff1Sjsg
request_add_spin(struct i915_request * rq,struct igt_spinner * spin)111bb76ff1Sjsg static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
121bb76ff1Sjsg {
131bb76ff1Sjsg int err = 0;
141bb76ff1Sjsg
151bb76ff1Sjsg i915_request_get(rq);
161bb76ff1Sjsg i915_request_add(rq);
171bb76ff1Sjsg if (spin && !igt_wait_for_spinner(spin, rq))
181bb76ff1Sjsg err = -ETIMEDOUT;
191bb76ff1Sjsg
201bb76ff1Sjsg return err;
211bb76ff1Sjsg }
221bb76ff1Sjsg
nop_user_request(struct intel_context * ce,struct i915_request * from)231bb76ff1Sjsg static struct i915_request *nop_user_request(struct intel_context *ce,
241bb76ff1Sjsg struct i915_request *from)
251bb76ff1Sjsg {
261bb76ff1Sjsg struct i915_request *rq;
271bb76ff1Sjsg int ret;
281bb76ff1Sjsg
291bb76ff1Sjsg rq = intel_context_create_request(ce);
301bb76ff1Sjsg if (IS_ERR(rq))
311bb76ff1Sjsg return rq;
321bb76ff1Sjsg
331bb76ff1Sjsg if (from) {
341bb76ff1Sjsg ret = i915_sw_fence_await_dma_fence(&rq->submit,
351bb76ff1Sjsg &from->fence, 0,
361bb76ff1Sjsg I915_FENCE_GFP);
371bb76ff1Sjsg if (ret < 0) {
381bb76ff1Sjsg i915_request_put(rq);
391bb76ff1Sjsg return ERR_PTR(ret);
401bb76ff1Sjsg }
411bb76ff1Sjsg }
421bb76ff1Sjsg
431bb76ff1Sjsg i915_request_get(rq);
441bb76ff1Sjsg i915_request_add(rq);
451bb76ff1Sjsg
461bb76ff1Sjsg return rq;
471bb76ff1Sjsg }
481bb76ff1Sjsg
intel_guc_scrub_ctbs(void * arg)491bb76ff1Sjsg static int intel_guc_scrub_ctbs(void *arg)
501bb76ff1Sjsg {
511bb76ff1Sjsg struct intel_gt *gt = arg;
521bb76ff1Sjsg int ret = 0;
531bb76ff1Sjsg int i;
541bb76ff1Sjsg struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
551bb76ff1Sjsg intel_wakeref_t wakeref;
561bb76ff1Sjsg struct intel_engine_cs *engine;
571bb76ff1Sjsg struct intel_context *ce;
581bb76ff1Sjsg
591bb76ff1Sjsg if (!intel_has_gpu_reset(gt))
601bb76ff1Sjsg return 0;
611bb76ff1Sjsg
621bb76ff1Sjsg wakeref = intel_runtime_pm_get(gt->uncore->rpm);
631bb76ff1Sjsg engine = intel_selftest_find_any_engine(gt);
641bb76ff1Sjsg
651bb76ff1Sjsg /* Submit requests and inject errors forcing G2H to be dropped */
661bb76ff1Sjsg for (i = 0; i < 3; ++i) {
671bb76ff1Sjsg ce = intel_context_create(engine);
681bb76ff1Sjsg if (IS_ERR(ce)) {
691bb76ff1Sjsg ret = PTR_ERR(ce);
70*f005ef32Sjsg gt_err(gt, "Failed to create context %d: %pe\n", i, ce);
711bb76ff1Sjsg goto err;
721bb76ff1Sjsg }
731bb76ff1Sjsg
741bb76ff1Sjsg switch (i) {
751bb76ff1Sjsg case 0:
761bb76ff1Sjsg ce->drop_schedule_enable = true;
771bb76ff1Sjsg break;
781bb76ff1Sjsg case 1:
791bb76ff1Sjsg ce->drop_schedule_disable = true;
801bb76ff1Sjsg break;
811bb76ff1Sjsg case 2:
821bb76ff1Sjsg ce->drop_deregister = true;
831bb76ff1Sjsg break;
841bb76ff1Sjsg }
851bb76ff1Sjsg
861bb76ff1Sjsg rq = nop_user_request(ce, NULL);
871bb76ff1Sjsg intel_context_put(ce);
881bb76ff1Sjsg
891bb76ff1Sjsg if (IS_ERR(rq)) {
901bb76ff1Sjsg ret = PTR_ERR(rq);
91*f005ef32Sjsg gt_err(gt, "Failed to create request %d: %pe\n", i, rq);
921bb76ff1Sjsg goto err;
931bb76ff1Sjsg }
941bb76ff1Sjsg
951bb76ff1Sjsg last[i] = rq;
961bb76ff1Sjsg }
971bb76ff1Sjsg
981bb76ff1Sjsg for (i = 0; i < 3; ++i) {
991bb76ff1Sjsg ret = i915_request_wait(last[i], 0, HZ);
1001bb76ff1Sjsg if (ret < 0) {
101*f005ef32Sjsg gt_err(gt, "Last request failed to complete: %pe\n", ERR_PTR(ret));
1021bb76ff1Sjsg goto err;
1031bb76ff1Sjsg }
1041bb76ff1Sjsg i915_request_put(last[i]);
1051bb76ff1Sjsg last[i] = NULL;
1061bb76ff1Sjsg }
1071bb76ff1Sjsg
1081bb76ff1Sjsg /* Force all H2G / G2H to be submitted / processed */
1091bb76ff1Sjsg intel_gt_retire_requests(gt);
110*f005ef32Sjsg drm_msleep(500);
1111bb76ff1Sjsg
1121bb76ff1Sjsg /* Scrub missing G2H */
1131bb76ff1Sjsg intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
1141bb76ff1Sjsg
1151bb76ff1Sjsg /* GT will not idle if G2H are lost */
1161bb76ff1Sjsg ret = intel_gt_wait_for_idle(gt, HZ);
1171bb76ff1Sjsg if (ret < 0) {
118*f005ef32Sjsg gt_err(gt, "GT failed to idle: %pe\n", ERR_PTR(ret));
1191bb76ff1Sjsg goto err;
1201bb76ff1Sjsg }
1211bb76ff1Sjsg
1221bb76ff1Sjsg err:
1231bb76ff1Sjsg for (i = 0; i < 3; ++i)
1241bb76ff1Sjsg if (last[i])
1251bb76ff1Sjsg i915_request_put(last[i]);
1261bb76ff1Sjsg intel_runtime_pm_put(gt->uncore->rpm, wakeref);
1271bb76ff1Sjsg
1281bb76ff1Sjsg return ret;
1291bb76ff1Sjsg }
1301bb76ff1Sjsg
1311bb76ff1Sjsg /*
1321bb76ff1Sjsg * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
1331bb76ff1Sjsg *
1341bb76ff1Sjsg * This test creates a spinner which is used to block all subsequent submissions
1351bb76ff1Sjsg * until it completes. Next, a loop creates a context and a NOP request each
1361bb76ff1Sjsg * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
1371bb76ff1Sjsg * The spinner is ended, unblocking all requests created in the loop. At this
1381bb76ff1Sjsg * point all guc_ids are exhausted but are available to steal. Try to create
1391bb76ff1Sjsg * another request which should successfully steal a guc_id. Wait on last
1401bb76ff1Sjsg * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
1411bb76ff1Sjsg * exit the test. Test also artificially reduces the number of guc_ids so the
1421bb76ff1Sjsg * test runs in a timely manner.
1431bb76ff1Sjsg */
intel_guc_steal_guc_ids(void * arg)1441bb76ff1Sjsg static int intel_guc_steal_guc_ids(void *arg)
1451bb76ff1Sjsg {
1461bb76ff1Sjsg struct intel_gt *gt = arg;
1471bb76ff1Sjsg struct intel_guc *guc = >->uc.guc;
1481bb76ff1Sjsg int ret, sv, context_index = 0;
1491bb76ff1Sjsg intel_wakeref_t wakeref;
1501bb76ff1Sjsg struct intel_engine_cs *engine;
1511bb76ff1Sjsg struct intel_context **ce;
1521bb76ff1Sjsg struct igt_spinner spin;
1531bb76ff1Sjsg struct i915_request *spin_rq = NULL, *rq, *last = NULL;
1541bb76ff1Sjsg int number_guc_id_stolen = guc->number_guc_id_stolen;
1551bb76ff1Sjsg
1561bb76ff1Sjsg ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
1571bb76ff1Sjsg if (!ce) {
158*f005ef32Sjsg guc_err(guc, "Context array allocation failed\n");
1591bb76ff1Sjsg return -ENOMEM;
1601bb76ff1Sjsg }
1611bb76ff1Sjsg
1621bb76ff1Sjsg wakeref = intel_runtime_pm_get(gt->uncore->rpm);
1631bb76ff1Sjsg engine = intel_selftest_find_any_engine(gt);
1641bb76ff1Sjsg sv = guc->submission_state.num_guc_ids;
1651bb76ff1Sjsg guc->submission_state.num_guc_ids = 512;
1661bb76ff1Sjsg
1671bb76ff1Sjsg /* Create spinner to block requests in below loop */
1681bb76ff1Sjsg ce[context_index] = intel_context_create(engine);
1691bb76ff1Sjsg if (IS_ERR(ce[context_index])) {
1701bb76ff1Sjsg ret = PTR_ERR(ce[context_index]);
171*f005ef32Sjsg guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
1721bb76ff1Sjsg ce[context_index] = NULL;
1731bb76ff1Sjsg goto err_wakeref;
1741bb76ff1Sjsg }
1751bb76ff1Sjsg ret = igt_spinner_init(&spin, engine->gt);
1761bb76ff1Sjsg if (ret) {
177*f005ef32Sjsg guc_err(guc, "Failed to create spinner: %pe\n", ERR_PTR(ret));
1781bb76ff1Sjsg goto err_contexts;
1791bb76ff1Sjsg }
1801bb76ff1Sjsg spin_rq = igt_spinner_create_request(&spin, ce[context_index],
1811bb76ff1Sjsg MI_ARB_CHECK);
1821bb76ff1Sjsg if (IS_ERR(spin_rq)) {
1831bb76ff1Sjsg ret = PTR_ERR(spin_rq);
184*f005ef32Sjsg guc_err(guc, "Failed to create spinner request: %pe\n", spin_rq);
1851bb76ff1Sjsg goto err_contexts;
1861bb76ff1Sjsg }
1871bb76ff1Sjsg ret = request_add_spin(spin_rq, &spin);
1881bb76ff1Sjsg if (ret) {
189*f005ef32Sjsg guc_err(guc, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
1901bb76ff1Sjsg goto err_spin_rq;
1911bb76ff1Sjsg }
1921bb76ff1Sjsg
1931bb76ff1Sjsg /* Use all guc_ids */
1941bb76ff1Sjsg while (ret != -EAGAIN) {
1951bb76ff1Sjsg ce[++context_index] = intel_context_create(engine);
1961bb76ff1Sjsg if (IS_ERR(ce[context_index])) {
197*f005ef32Sjsg ret = PTR_ERR(ce[context_index]);
198*f005ef32Sjsg guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
199*f005ef32Sjsg ce[context_index--] = NULL;
2001bb76ff1Sjsg goto err_spin_rq;
2011bb76ff1Sjsg }
2021bb76ff1Sjsg
2031bb76ff1Sjsg rq = nop_user_request(ce[context_index], spin_rq);
2041bb76ff1Sjsg if (IS_ERR(rq)) {
2051bb76ff1Sjsg ret = PTR_ERR(rq);
2061bb76ff1Sjsg rq = NULL;
207*f005ef32Sjsg if ((ret != -EAGAIN) || !last) {
208*f005ef32Sjsg guc_err(guc, "Failed to create %srequest %d: %pe\n",
209*f005ef32Sjsg last ? "" : "first ", context_index, ERR_PTR(ret));
2101bb76ff1Sjsg goto err_spin_rq;
2111bb76ff1Sjsg }
2121bb76ff1Sjsg } else {
2131bb76ff1Sjsg if (last)
2141bb76ff1Sjsg i915_request_put(last);
2151bb76ff1Sjsg last = rq;
2161bb76ff1Sjsg }
2171bb76ff1Sjsg }
2181bb76ff1Sjsg
2191bb76ff1Sjsg /* Release blocked requests */
2201bb76ff1Sjsg igt_spinner_end(&spin);
2211bb76ff1Sjsg ret = intel_selftest_wait_for_rq(spin_rq);
2221bb76ff1Sjsg if (ret) {
223*f005ef32Sjsg guc_err(guc, "Spin request failed to complete: %pe\n", ERR_PTR(ret));
2241bb76ff1Sjsg i915_request_put(last);
2251bb76ff1Sjsg goto err_spin_rq;
2261bb76ff1Sjsg }
2271bb76ff1Sjsg i915_request_put(spin_rq);
2281bb76ff1Sjsg igt_spinner_fini(&spin);
2291bb76ff1Sjsg spin_rq = NULL;
2301bb76ff1Sjsg
2311bb76ff1Sjsg /* Wait for last request */
2321bb76ff1Sjsg ret = i915_request_wait(last, 0, HZ * 30);
2331bb76ff1Sjsg i915_request_put(last);
2341bb76ff1Sjsg if (ret < 0) {
235*f005ef32Sjsg guc_err(guc, "Last request failed to complete: %pe\n", ERR_PTR(ret));
2361bb76ff1Sjsg goto err_spin_rq;
2371bb76ff1Sjsg }
2381bb76ff1Sjsg
2391bb76ff1Sjsg /* Try to steal guc_id */
2401bb76ff1Sjsg rq = nop_user_request(ce[context_index], NULL);
2411bb76ff1Sjsg if (IS_ERR(rq)) {
2421bb76ff1Sjsg ret = PTR_ERR(rq);
243*f005ef32Sjsg guc_err(guc, "Failed to steal guc_id %d: %pe\n", context_index, rq);
2441bb76ff1Sjsg goto err_spin_rq;
2451bb76ff1Sjsg }
2461bb76ff1Sjsg
2471bb76ff1Sjsg /* Wait for request with stolen guc_id */
2481bb76ff1Sjsg ret = i915_request_wait(rq, 0, HZ);
2491bb76ff1Sjsg i915_request_put(rq);
2501bb76ff1Sjsg if (ret < 0) {
251*f005ef32Sjsg guc_err(guc, "Request with stolen guc_id failed to complete: %pe\n", ERR_PTR(ret));
2521bb76ff1Sjsg goto err_spin_rq;
2531bb76ff1Sjsg }
2541bb76ff1Sjsg
2551bb76ff1Sjsg /* Wait for idle */
2561bb76ff1Sjsg ret = intel_gt_wait_for_idle(gt, HZ * 30);
2571bb76ff1Sjsg if (ret < 0) {
258*f005ef32Sjsg guc_err(guc, "GT failed to idle: %pe\n", ERR_PTR(ret));
2591bb76ff1Sjsg goto err_spin_rq;
2601bb76ff1Sjsg }
2611bb76ff1Sjsg
2621bb76ff1Sjsg /* Verify a guc_id was stolen */
2631bb76ff1Sjsg if (guc->number_guc_id_stolen == number_guc_id_stolen) {
264*f005ef32Sjsg guc_err(guc, "No guc_id was stolen");
2651bb76ff1Sjsg ret = -EINVAL;
2661bb76ff1Sjsg } else {
2671bb76ff1Sjsg ret = 0;
2681bb76ff1Sjsg }
2691bb76ff1Sjsg
2701bb76ff1Sjsg err_spin_rq:
2711bb76ff1Sjsg if (spin_rq) {
2721bb76ff1Sjsg igt_spinner_end(&spin);
2731bb76ff1Sjsg intel_selftest_wait_for_rq(spin_rq);
2741bb76ff1Sjsg i915_request_put(spin_rq);
2751bb76ff1Sjsg igt_spinner_fini(&spin);
2761bb76ff1Sjsg intel_gt_wait_for_idle(gt, HZ * 30);
2771bb76ff1Sjsg }
2781bb76ff1Sjsg err_contexts:
2791bb76ff1Sjsg for (; context_index >= 0 && ce[context_index]; --context_index)
2801bb76ff1Sjsg intel_context_put(ce[context_index]);
2811bb76ff1Sjsg err_wakeref:
2821bb76ff1Sjsg intel_runtime_pm_put(gt->uncore->rpm, wakeref);
2831bb76ff1Sjsg kfree(ce);
2841bb76ff1Sjsg guc->submission_state.num_guc_ids = sv;
2851bb76ff1Sjsg
2861bb76ff1Sjsg return ret;
2871bb76ff1Sjsg }
2881bb76ff1Sjsg
intel_guc_live_selftests(struct drm_i915_private * i915)2891bb76ff1Sjsg int intel_guc_live_selftests(struct drm_i915_private *i915)
2901bb76ff1Sjsg {
2911bb76ff1Sjsg static const struct i915_subtest tests[] = {
2921bb76ff1Sjsg SUBTEST(intel_guc_scrub_ctbs),
2931bb76ff1Sjsg SUBTEST(intel_guc_steal_guc_ids),
2941bb76ff1Sjsg };
2951bb76ff1Sjsg struct intel_gt *gt = to_gt(i915);
2961bb76ff1Sjsg
2971bb76ff1Sjsg if (intel_gt_is_wedged(gt))
2981bb76ff1Sjsg return 0;
2991bb76ff1Sjsg
3001bb76ff1Sjsg if (!intel_uc_uses_guc_submission(>->uc))
3011bb76ff1Sjsg return 0;
3021bb76ff1Sjsg
3031bb76ff1Sjsg return intel_gt_live_subtests(tests, gt);
3041bb76ff1Sjsg }
305