xref: /linux/drivers/gpu/drm/i915/gt/selftest_context.c (revision 52338415)
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include "i915_selftest.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gt.h"
10 
11 #include "gem/selftests/mock_context.h"
12 #include "selftests/igt_flush_test.h"
13 #include "selftests/mock_drm.h"
14 
15 static int request_sync(struct i915_request *rq)
16 {
17 	long timeout;
18 	int err = 0;
19 
20 	i915_request_get(rq);
21 
22 	i915_request_add(rq);
23 	timeout = i915_request_wait(rq, 0, HZ / 10);
24 	if (timeout < 0) {
25 		err = timeout;
26 	} else {
27 		mutex_lock(&rq->timeline->mutex);
28 		i915_request_retire_upto(rq);
29 		mutex_unlock(&rq->timeline->mutex);
30 	}
31 
32 	i915_request_put(rq);
33 
34 	return err;
35 }
36 
37 static int context_sync(struct intel_context *ce)
38 {
39 	struct intel_timeline *tl = ce->timeline;
40 	int err = 0;
41 
42 	mutex_lock(&tl->mutex);
43 	do {
44 		struct i915_request *rq;
45 		long timeout;
46 
47 		rcu_read_lock();
48 		rq = rcu_dereference(tl->last_request.request);
49 		if (rq)
50 			rq = i915_request_get_rcu(rq);
51 		rcu_read_unlock();
52 		if (!rq)
53 			break;
54 
55 		timeout = i915_request_wait(rq, 0, HZ / 10);
56 		if (timeout < 0)
57 			err = timeout;
58 		else
59 			i915_request_retire_upto(rq);
60 
61 		i915_request_put(rq);
62 	} while (!err);
63 	mutex_unlock(&tl->mutex);
64 
65 	return err;
66 }
67 
68 static int __live_context_size(struct intel_engine_cs *engine,
69 			       struct i915_gem_context *fixme)
70 {
71 	struct intel_context *ce;
72 	struct i915_request *rq;
73 	void *vaddr;
74 	int err;
75 
76 	ce = intel_context_create(fixme, engine);
77 	if (IS_ERR(ce))
78 		return PTR_ERR(ce);
79 
80 	err = intel_context_pin(ce);
81 	if (err)
82 		goto err;
83 
84 	vaddr = i915_gem_object_pin_map(ce->state->obj,
85 					i915_coherent_map_type(engine->i915));
86 	if (IS_ERR(vaddr)) {
87 		err = PTR_ERR(vaddr);
88 		intel_context_unpin(ce);
89 		goto err;
90 	}
91 
92 	/*
93 	 * Note that execlists also applies a redzone which it checks on
94 	 * context unpin when debugging. We are using the same location
95 	 * and same poison value so that our checks overlap. Despite the
96 	 * redundancy, we want to keep this little selftest so that we
97 	 * get coverage of any and all submission backends, and we can
98 	 * always extend this test to ensure we trick the HW into a
99 	 * compromising position wrt to the various sections that need
100 	 * to be written into the context state.
101 	 *
102 	 * TLDR; this overlaps with the execlists redzone.
103 	 */
104 	if (HAS_EXECLISTS(engine->i915))
105 		vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
106 
107 	vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
108 	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
109 
110 	rq = intel_context_create_request(ce);
111 	intel_context_unpin(ce);
112 	if (IS_ERR(rq)) {
113 		err = PTR_ERR(rq);
114 		goto err_unpin;
115 	}
116 
117 	err = request_sync(rq);
118 	if (err)
119 		goto err_unpin;
120 
121 	/* Force the context switch */
122 	rq = i915_request_create(engine->kernel_context);
123 	if (IS_ERR(rq)) {
124 		err = PTR_ERR(rq);
125 		goto err_unpin;
126 	}
127 	err = request_sync(rq);
128 	if (err)
129 		goto err_unpin;
130 
131 	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
132 		pr_err("%s context overwrote trailing red-zone!", engine->name);
133 		err = -EINVAL;
134 	}
135 
136 err_unpin:
137 	i915_gem_object_unpin_map(ce->state->obj);
138 err:
139 	intel_context_put(ce);
140 	return err;
141 }
142 
143 static int live_context_size(void *arg)
144 {
145 	struct intel_gt *gt = arg;
146 	struct intel_engine_cs *engine;
147 	struct i915_gem_context *fixme;
148 	enum intel_engine_id id;
149 	int err = 0;
150 
151 	/*
152 	 * Check that our context sizes are correct by seeing if the
153 	 * HW tries to write past the end of one.
154 	 */
155 
156 	mutex_lock(&gt->i915->drm.struct_mutex);
157 
158 	fixme = kernel_context(gt->i915);
159 	if (IS_ERR(fixme)) {
160 		err = PTR_ERR(fixme);
161 		goto unlock;
162 	}
163 
164 	for_each_engine(engine, gt->i915, id) {
165 		struct {
166 			struct drm_i915_gem_object *state;
167 			void *pinned;
168 		} saved;
169 
170 		if (!engine->context_size)
171 			continue;
172 
173 		intel_engine_pm_get(engine);
174 
175 		/*
176 		 * Hide the old default state -- we lie about the context size
177 		 * and get confused when the default state is smaller than
178 		 * expected. For our do nothing request, inheriting the
179 		 * active state is sufficient, we are only checking that we
180 		 * don't use more than we planned.
181 		 */
182 		saved.state = fetch_and_zero(&engine->default_state);
183 		saved.pinned = fetch_and_zero(&engine->pinned_default_state);
184 
185 		/* Overlaps with the execlists redzone */
186 		engine->context_size += I915_GTT_PAGE_SIZE;
187 
188 		err = __live_context_size(engine, fixme);
189 
190 		engine->context_size -= I915_GTT_PAGE_SIZE;
191 
192 		engine->pinned_default_state = saved.pinned;
193 		engine->default_state = saved.state;
194 
195 		intel_engine_pm_put(engine);
196 
197 		if (err)
198 			break;
199 	}
200 
201 	kernel_context_close(fixme);
202 unlock:
203 	mutex_unlock(&gt->i915->drm.struct_mutex);
204 	return err;
205 }
206 
207 static int __live_active_context(struct intel_engine_cs *engine,
208 				 struct i915_gem_context *fixme)
209 {
210 	struct intel_context *ce;
211 	int pass;
212 	int err;
213 
214 	/*
215 	 * We keep active contexts alive until after a subsequent context
216 	 * switch as the final write from the context-save will be after
217 	 * we retire the final request. We track when we unpin the context,
218 	 * under the presumption that the final pin is from the last request,
219 	 * and instead of immediately unpinning the context, we add a task
220 	 * to unpin the context from the next idle-barrier.
221 	 *
222 	 * This test makes sure that the context is kept alive until a
223 	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
224 	 * with no more outstanding requests).
225 	 */
226 
227 	if (intel_engine_pm_is_awake(engine)) {
228 		pr_err("%s is awake before starting %s!\n",
229 		       engine->name, __func__);
230 		return -EINVAL;
231 	}
232 
233 	ce = intel_context_create(fixme, engine);
234 	if (IS_ERR(ce))
235 		return PTR_ERR(ce);
236 
237 	for (pass = 0; pass <= 2; pass++) {
238 		struct i915_request *rq;
239 
240 		rq = intel_context_create_request(ce);
241 		if (IS_ERR(rq)) {
242 			err = PTR_ERR(rq);
243 			goto err;
244 		}
245 
246 		err = request_sync(rq);
247 		if (err)
248 			goto err;
249 
250 		/* Context will be kept active until after an idle-barrier. */
251 		if (i915_active_is_idle(&ce->active)) {
252 			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
253 			       engine->name, pass);
254 			err = -EINVAL;
255 			goto err;
256 		}
257 
258 		if (!intel_engine_pm_is_awake(engine)) {
259 			pr_err("%s is asleep before idle-barrier\n",
260 			       engine->name);
261 			err = -EINVAL;
262 			goto err;
263 		}
264 	}
265 
266 	/* Now make sure our idle-barriers are flushed */
267 	err = context_sync(engine->kernel_context);
268 	if (err)
269 		goto err;
270 
271 	if (!i915_active_is_idle(&ce->active)) {
272 		pr_err("context is still active!");
273 		err = -EINVAL;
274 	}
275 
276 	if (intel_engine_pm_is_awake(engine)) {
277 		struct drm_printer p = drm_debug_printer(__func__);
278 
279 		intel_engine_dump(engine, &p,
280 				  "%s is still awake after idle-barriers\n",
281 				  engine->name);
282 		GEM_TRACE_DUMP();
283 
284 		err = -EINVAL;
285 		goto err;
286 	}
287 
288 err:
289 	intel_context_put(ce);
290 	return err;
291 }
292 
293 static int live_active_context(void *arg)
294 {
295 	struct intel_gt *gt = arg;
296 	struct intel_engine_cs *engine;
297 	struct i915_gem_context *fixme;
298 	enum intel_engine_id id;
299 	struct drm_file *file;
300 	int err = 0;
301 
302 	file = mock_file(gt->i915);
303 	if (IS_ERR(file))
304 		return PTR_ERR(file);
305 
306 	mutex_lock(&gt->i915->drm.struct_mutex);
307 
308 	fixme = live_context(gt->i915, file);
309 	if (IS_ERR(fixme)) {
310 		err = PTR_ERR(fixme);
311 		goto unlock;
312 	}
313 
314 	for_each_engine(engine, gt->i915, id) {
315 		err = __live_active_context(engine, fixme);
316 		if (err)
317 			break;
318 
319 		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
320 		if (err)
321 			break;
322 	}
323 
324 unlock:
325 	mutex_unlock(&gt->i915->drm.struct_mutex);
326 	mock_file_free(gt->i915, file);
327 	return err;
328 }
329 
330 static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
331 {
332 	struct i915_request *rq;
333 	int err;
334 
335 	err = intel_context_pin(remote);
336 	if (err)
337 		return err;
338 
339 	rq = intel_context_create_request(ce);
340 	if (IS_ERR(rq)) {
341 		err = PTR_ERR(rq);
342 		goto unpin;
343 	}
344 
345 	err = intel_context_prepare_remote_request(remote, rq);
346 	if (err) {
347 		i915_request_add(rq);
348 		goto unpin;
349 	}
350 
351 	err = request_sync(rq);
352 
353 unpin:
354 	intel_context_unpin(remote);
355 	return err;
356 }
357 
358 static int __live_remote_context(struct intel_engine_cs *engine,
359 				 struct i915_gem_context *fixme)
360 {
361 	struct intel_context *local, *remote;
362 	int pass;
363 	int err;
364 
365 	/*
366 	 * Check that our idle barriers do not interfere with normal
367 	 * activity tracking. In particular, check that operating
368 	 * on the context image remotely (intel_context_prepare_remote_request),
369 	 * which inserts foreign fences into intel_context.active, does not
370 	 * clobber the idle-barrier.
371 	 */
372 
373 	remote = intel_context_create(fixme, engine);
374 	if (IS_ERR(remote))
375 		return PTR_ERR(remote);
376 
377 	local = intel_context_create(fixme, engine);
378 	if (IS_ERR(local)) {
379 		err = PTR_ERR(local);
380 		goto err_remote;
381 	}
382 
383 	for (pass = 0; pass <= 2; pass++) {
384 		err = __remote_sync(local, remote);
385 		if (err)
386 			break;
387 
388 		err = __remote_sync(engine->kernel_context, remote);
389 		if (err)
390 			break;
391 
392 		if (i915_active_is_idle(&remote->active)) {
393 			pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
394 			       engine->name, pass);
395 			err = -EINVAL;
396 			break;
397 		}
398 	}
399 
400 	intel_context_put(local);
401 err_remote:
402 	intel_context_put(remote);
403 	return err;
404 }
405 
406 static int live_remote_context(void *arg)
407 {
408 	struct intel_gt *gt = arg;
409 	struct intel_engine_cs *engine;
410 	struct i915_gem_context *fixme;
411 	enum intel_engine_id id;
412 	struct drm_file *file;
413 	int err = 0;
414 
415 	file = mock_file(gt->i915);
416 	if (IS_ERR(file))
417 		return PTR_ERR(file);
418 
419 	mutex_lock(&gt->i915->drm.struct_mutex);
420 
421 	fixme = live_context(gt->i915, file);
422 	if (IS_ERR(fixme)) {
423 		err = PTR_ERR(fixme);
424 		goto unlock;
425 	}
426 
427 	for_each_engine(engine, gt->i915, id) {
428 		err = __live_remote_context(engine, fixme);
429 		if (err)
430 			break;
431 
432 		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
433 		if (err)
434 			break;
435 	}
436 
437 unlock:
438 	mutex_unlock(&gt->i915->drm.struct_mutex);
439 	mock_file_free(gt->i915, file);
440 	return err;
441 }
442 
443 int intel_context_live_selftests(struct drm_i915_private *i915)
444 {
445 	static const struct i915_subtest tests[] = {
446 		SUBTEST(live_context_size),
447 		SUBTEST(live_active_context),
448 		SUBTEST(live_remote_context),
449 	};
450 	struct intel_gt *gt = &i915->gt;
451 
452 	if (intel_gt_is_wedged(gt))
453 		return 0;
454 
455 	return intel_gt_live_subtests(tests, gt);
456 }
457