xref: /openbsd/sys/dev/pci/drm/i915/i915_scheduler.c (revision 09467b48)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/mutex.h>
8 
9 #include "i915_drv.h"
10 #include "i915_globals.h"
11 #include "i915_request.h"
12 #include "i915_scheduler.h"
13 
14 static struct i915_global_scheduler {
15 	struct i915_global base;
16 #ifdef __linux__
17 	struct kmem_cache *slab_dependencies;
18 	struct kmem_cache *slab_priorities;
19 #else
20 	struct pool slab_dependencies;
21 	struct pool slab_priorities;
22 #endif
23 } global;
24 
25 static DEFINE_SPINLOCK(schedule_lock);
26 
27 static const struct i915_request *
28 node_to_request(const struct i915_sched_node *node)
29 {
30 	return container_of(node, const struct i915_request, sched);
31 }
32 
33 static inline bool node_started(const struct i915_sched_node *node)
34 {
35 	return i915_request_started(node_to_request(node));
36 }
37 
38 static inline bool node_signaled(const struct i915_sched_node *node)
39 {
40 	return i915_request_completed(node_to_request(node));
41 }
42 
43 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
44 {
45 	return rb_entry(rb, struct i915_priolist, node);
46 }
47 
48 static void assert_priolists(struct intel_engine_execlists * const execlists)
49 {
50 	struct rb_node *rb;
51 	long last_prio, i;
52 
53 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
54 		return;
55 
56 	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
57 		   rb_first(&execlists->queue.rb_root));
58 
59 	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
60 	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
61 		const struct i915_priolist *p = to_priolist(rb);
62 
63 		GEM_BUG_ON(p->priority >= last_prio);
64 		last_prio = p->priority;
65 
66 		GEM_BUG_ON(!p->used);
67 		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
68 			if (list_empty(&p->requests[i]))
69 				continue;
70 
71 			GEM_BUG_ON(!(p->used & BIT(i)));
72 		}
73 	}
74 }
75 
76 struct list_head *
77 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
78 {
79 	struct intel_engine_execlists * const execlists = &engine->execlists;
80 	struct i915_priolist *p;
81 	struct rb_node **parent, *rb;
82 	bool first = true;
83 	int idx, i;
84 
85 	lockdep_assert_held(&engine->active.lock);
86 	assert_priolists(execlists);
87 
88 	/* buckets sorted from highest [in slot 0] to lowest priority */
89 	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
90 	prio >>= I915_USER_PRIORITY_SHIFT;
91 	if (unlikely(execlists->no_priolist))
92 		prio = I915_PRIORITY_NORMAL;
93 
94 find_priolist:
95 	/* most positive priority is scheduled first, equal priorities fifo */
96 	rb = NULL;
97 	parent = &execlists->queue.rb_root.rb_node;
98 	while (*parent) {
99 		rb = *parent;
100 		p = to_priolist(rb);
101 		if (prio > p->priority) {
102 			parent = &rb->rb_left;
103 		} else if (prio < p->priority) {
104 			parent = &rb->rb_right;
105 			first = false;
106 		} else {
107 			goto out;
108 		}
109 	}
110 
111 	if (prio == I915_PRIORITY_NORMAL) {
112 		p = &execlists->default_priolist;
113 	} else {
114 #ifdef __linux__
115 		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
116 #else
117 		p = pool_get(&global.slab_priorities, PR_NOWAIT);
118 #endif
119 		/* Convert an allocation failure to a priority bump */
120 		if (unlikely(!p)) {
121 			prio = I915_PRIORITY_NORMAL; /* recurses just once */
122 
123 			/* To maintain ordering with all rendering, after an
124 			 * allocation failure we have to disable all scheduling.
125 			 * Requests will then be executed in fifo, and schedule
126 			 * will ensure that dependencies are emitted in fifo.
127 			 * There will be still some reordering with existing
128 			 * requests, so if userspace lied about their
129 			 * dependencies that reordering may be visible.
130 			 */
131 			execlists->no_priolist = true;
132 			goto find_priolist;
133 		}
134 	}
135 
136 	p->priority = prio;
137 	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
138 		INIT_LIST_HEAD(&p->requests[i]);
139 	rb_link_node(&p->node, rb, parent);
140 	rb_insert_color_cached(&p->node, &execlists->queue, first);
141 	p->used = 0;
142 
143 out:
144 	p->used |= BIT(idx);
145 	return &p->requests[idx];
146 }
147 
148 void __i915_priolist_free(struct i915_priolist *p)
149 {
150 #ifdef __linux__
151 	kmem_cache_free(global.slab_priorities, p);
152 #else
153 	pool_put(&global.slab_priorities, p);
154 #endif
155 }
156 
157 struct sched_cache {
158 	struct list_head *priolist;
159 };
160 
161 static struct intel_engine_cs *
162 sched_lock_engine(const struct i915_sched_node *node,
163 		  struct intel_engine_cs *locked,
164 		  struct sched_cache *cache)
165 {
166 	const struct i915_request *rq = node_to_request(node);
167 	struct intel_engine_cs *engine;
168 
169 	GEM_BUG_ON(!locked);
170 
171 	/*
172 	 * Virtual engines complicate acquiring the engine timeline lock,
173 	 * as their rq->engine pointer is not stable until under that
174 	 * engine lock. The simple ploy we use is to take the lock then
175 	 * check that the rq still belongs to the newly locked engine.
176 	 */
177 	while (locked != (engine = READ_ONCE(rq->engine))) {
178 		spin_unlock(&locked->active.lock);
179 		memset(cache, 0, sizeof(*cache));
180 		spin_lock(&engine->active.lock);
181 		locked = engine;
182 	}
183 
184 	GEM_BUG_ON(locked != engine);
185 	return locked;
186 }
187 
188 static inline int rq_prio(const struct i915_request *rq)
189 {
190 	return rq->sched.attr.priority | __NO_PREEMPTION;
191 }
192 
193 static inline bool need_preempt(int prio, int active)
194 {
195 	/*
196 	 * Allow preemption of low -> normal -> high, but we do
197 	 * not allow low priority tasks to preempt other low priority
198 	 * tasks under the impression that latency for low priority
199 	 * tasks does not matter (as much as background throughput),
200 	 * so kiss.
201 	 */
202 	return prio >= max(I915_PRIORITY_NORMAL, active);
203 }
204 
205 static void kick_submission(struct intel_engine_cs *engine,
206 			    const struct i915_request *rq,
207 			    int prio)
208 {
209 	const struct i915_request *inflight;
210 
211 	/*
212 	 * We only need to kick the tasklet once for the high priority
213 	 * new context we add into the queue.
214 	 */
215 	if (prio <= engine->execlists.queue_priority_hint)
216 		return;
217 
218 	rcu_read_lock();
219 
220 	/* Nothing currently active? We're overdue for a submission! */
221 	inflight = execlists_active(&engine->execlists);
222 	if (!inflight)
223 		goto unlock;
224 
225 	engine->execlists.queue_priority_hint = prio;
226 
227 	/*
228 	 * If we are already the currently executing context, don't
229 	 * bother evaluating if we should preempt ourselves.
230 	 */
231 	if (inflight->context == rq->context)
232 		goto unlock;
233 
234 	if (need_preempt(prio, rq_prio(inflight)))
235 		tasklet_hi_schedule(&engine->execlists.tasklet);
236 
237 unlock:
238 	rcu_read_unlock();
239 }
240 
241 static void __i915_schedule(struct i915_sched_node *node,
242 			    const struct i915_sched_attr *attr)
243 {
244 	const int prio = max(attr->priority, node->attr.priority);
245 	struct intel_engine_cs *engine;
246 	struct i915_dependency *dep, *p;
247 	struct i915_dependency stack;
248 	struct sched_cache cache;
249 	DRM_LIST_HEAD(dfs);
250 
251 	/* Needed in order to use the temporary link inside i915_dependency */
252 	lockdep_assert_held(&schedule_lock);
253 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
254 
255 	if (node_signaled(node))
256 		return;
257 
258 	stack.signaler = node;
259 	list_add(&stack.dfs_link, &dfs);
260 
261 	/*
262 	 * Recursively bump all dependent priorities to match the new request.
263 	 *
264 	 * A naive approach would be to use recursion:
265 	 * static void update_priorities(struct i915_sched_node *node, prio) {
266 	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
267 	 *		update_priorities(dep->signal, prio)
268 	 *	queue_request(node);
269 	 * }
270 	 * but that may have unlimited recursion depth and so runs a very
271 	 * real risk of overunning the kernel stack. Instead, we build
272 	 * a flat list of all dependencies starting with the current request.
273 	 * As we walk the list of dependencies, we add all of its dependencies
274 	 * to the end of the list (this may include an already visited
275 	 * request) and continue to walk onwards onto the new dependencies. The
276 	 * end result is a topological list of requests in reverse order, the
277 	 * last element in the list is the request we must execute first.
278 	 */
279 	list_for_each_entry(dep, &dfs, dfs_link) {
280 		struct i915_sched_node *node = dep->signaler;
281 
282 		/* If we are already flying, we know we have no signalers */
283 		if (node_started(node))
284 			continue;
285 
286 		/*
287 		 * Within an engine, there can be no cycle, but we may
288 		 * refer to the same dependency chain multiple times
289 		 * (redundant dependencies are not eliminated) and across
290 		 * engines.
291 		 */
292 		list_for_each_entry(p, &node->signalers_list, signal_link) {
293 			GEM_BUG_ON(p == dep); /* no cycles! */
294 
295 			if (node_signaled(p->signaler))
296 				continue;
297 
298 			if (prio > READ_ONCE(p->signaler->attr.priority))
299 				list_move_tail(&p->dfs_link, &dfs);
300 		}
301 	}
302 
303 	/*
304 	 * If we didn't need to bump any existing priorities, and we haven't
305 	 * yet submitted this request (i.e. there is no potential race with
306 	 * execlists_submit_request()), we can set our own priority and skip
307 	 * acquiring the engine locks.
308 	 */
309 	if (node->attr.priority == I915_PRIORITY_INVALID) {
310 		GEM_BUG_ON(!list_empty(&node->link));
311 		node->attr = *attr;
312 
313 		if (stack.dfs_link.next == stack.dfs_link.prev)
314 			return;
315 
316 		__list_del_entry(&stack.dfs_link);
317 	}
318 
319 	memset(&cache, 0, sizeof(cache));
320 	engine = node_to_request(node)->engine;
321 	spin_lock(&engine->active.lock);
322 
323 	/* Fifo and depth-first replacement ensure our deps execute before us */
324 	engine = sched_lock_engine(node, engine, &cache);
325 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
326 		INIT_LIST_HEAD(&dep->dfs_link);
327 
328 		node = dep->signaler;
329 		engine = sched_lock_engine(node, engine, &cache);
330 		lockdep_assert_held(&engine->active.lock);
331 
332 		/* Recheck after acquiring the engine->timeline.lock */
333 		if (prio <= node->attr.priority || node_signaled(node))
334 			continue;
335 
336 		GEM_BUG_ON(node_to_request(node)->engine != engine);
337 
338 		WRITE_ONCE(node->attr.priority, prio);
339 
340 		/*
341 		 * Once the request is ready, it will be placed into the
342 		 * priority lists and then onto the HW runlist. Before the
343 		 * request is ready, it does not contribute to our preemption
344 		 * decisions and we can safely ignore it, as it will, and
345 		 * any preemption required, be dealt with upon submission.
346 		 * See engine->submit_request()
347 		 */
348 		if (list_empty(&node->link))
349 			continue;
350 
351 		if (i915_request_in_priority_queue(node_to_request(node))) {
352 			if (!cache.priolist)
353 				cache.priolist =
354 					i915_sched_lookup_priolist(engine,
355 								   prio);
356 			list_move_tail(&node->link, cache.priolist);
357 		}
358 
359 		/* Defer (tasklet) submission until after all of our updates. */
360 		kick_submission(engine, node_to_request(node), prio);
361 	}
362 
363 	spin_unlock(&engine->active.lock);
364 }
365 
366 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
367 {
368 	spin_lock_irq(&schedule_lock);
369 	__i915_schedule(&rq->sched, attr);
370 	spin_unlock_irq(&schedule_lock);
371 }
372 
373 static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
374 {
375 	struct i915_sched_attr attr = node->attr;
376 
377 	if (attr.priority & bump)
378 		return;
379 
380 	attr.priority |= bump;
381 	__i915_schedule(node, &attr);
382 }
383 
384 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
385 {
386 	unsigned long flags;
387 
388 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
389 	if (READ_ONCE(rq->sched.attr.priority) & bump)
390 		return;
391 
392 	spin_lock_irqsave(&schedule_lock, flags);
393 	__bump_priority(&rq->sched, bump);
394 	spin_unlock_irqrestore(&schedule_lock, flags);
395 }
396 
397 void i915_sched_node_init(struct i915_sched_node *node)
398 {
399 	INIT_LIST_HEAD(&node->signalers_list);
400 	INIT_LIST_HEAD(&node->waiters_list);
401 	INIT_LIST_HEAD(&node->link);
402 
403 	i915_sched_node_reinit(node);
404 }
405 
406 void i915_sched_node_reinit(struct i915_sched_node *node)
407 {
408 	node->attr.priority = I915_PRIORITY_INVALID;
409 	node->semaphores = 0;
410 	node->flags = 0;
411 
412 	GEM_BUG_ON(!list_empty(&node->signalers_list));
413 	GEM_BUG_ON(!list_empty(&node->waiters_list));
414 	GEM_BUG_ON(!list_empty(&node->link));
415 }
416 
417 static struct i915_dependency *
418 i915_dependency_alloc(void)
419 {
420 #ifdef __linux__
421 	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
422 #else
423 	return pool_get(&global.slab_dependencies, PR_WAITOK);
424 #endif
425 }
426 
427 static void
428 i915_dependency_free(struct i915_dependency *dep)
429 {
430 #ifdef __linux__
431 	kmem_cache_free(global.slab_dependencies, dep);
432 #else
433 	pool_put(&global.slab_dependencies, dep);
434 #endif
435 }
436 
437 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
438 				      struct i915_sched_node *signal,
439 				      struct i915_dependency *dep,
440 				      unsigned long flags)
441 {
442 	bool ret = false;
443 
444 	spin_lock_irq(&schedule_lock);
445 
446 	if (!node_signaled(signal)) {
447 		INIT_LIST_HEAD(&dep->dfs_link);
448 		dep->signaler = signal;
449 		dep->waiter = node;
450 		dep->flags = flags;
451 
452 		/* Keep track of whether anyone on this chain has a semaphore */
453 		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
454 		    !node_started(signal))
455 			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
456 
457 		/* All set, now publish. Beware the lockless walkers. */
458 		list_add_rcu(&dep->signal_link, &node->signalers_list);
459 		list_add_rcu(&dep->wait_link, &signal->waiters_list);
460 
461 		/*
462 		 * As we do not allow WAIT to preempt inflight requests,
463 		 * once we have executed a request, along with triggering
464 		 * any execution callbacks, we must preserve its ordering
465 		 * within the non-preemptible FIFO.
466 		 */
467 		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
468 		if (flags & I915_DEPENDENCY_EXTERNAL)
469 			__bump_priority(signal, __NO_PREEMPTION);
470 
471 		ret = true;
472 	}
473 
474 	spin_unlock_irq(&schedule_lock);
475 
476 	return ret;
477 }
478 
479 int i915_sched_node_add_dependency(struct i915_sched_node *node,
480 				   struct i915_sched_node *signal,
481 				   unsigned long flags)
482 {
483 	struct i915_dependency *dep;
484 
485 	dep = i915_dependency_alloc();
486 	if (!dep)
487 		return -ENOMEM;
488 
489 	if (!__i915_sched_node_add_dependency(node, signal, dep,
490 					      flags | I915_DEPENDENCY_ALLOC))
491 		i915_dependency_free(dep);
492 
493 	return 0;
494 }
495 
496 void i915_sched_node_fini(struct i915_sched_node *node)
497 {
498 	struct i915_dependency *dep, *tmp;
499 
500 	spin_lock_irq(&schedule_lock);
501 
502 	/*
503 	 * Everyone we depended upon (the fences we wait to be signaled)
504 	 * should retire before us and remove themselves from our list.
505 	 * However, retirement is run independently on each timeline and
506 	 * so we may be called out-of-order.
507 	 */
508 	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
509 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
510 
511 		list_del_rcu(&dep->wait_link);
512 		if (dep->flags & I915_DEPENDENCY_ALLOC)
513 			i915_dependency_free(dep);
514 	}
515 	INIT_LIST_HEAD(&node->signalers_list);
516 
517 	/* Remove ourselves from everyone who depends upon us */
518 	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
519 		GEM_BUG_ON(dep->signaler != node);
520 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
521 
522 		list_del_rcu(&dep->signal_link);
523 		if (dep->flags & I915_DEPENDENCY_ALLOC)
524 			i915_dependency_free(dep);
525 	}
526 	INIT_LIST_HEAD(&node->waiters_list);
527 
528 	spin_unlock_irq(&schedule_lock);
529 }
530 
531 static void i915_global_scheduler_shrink(void)
532 {
533 #ifdef notyet
534 	kmem_cache_shrink(global.slab_dependencies);
535 	kmem_cache_shrink(global.slab_priorities);
536 #endif
537 }
538 
539 static void i915_global_scheduler_exit(void)
540 {
541 #ifdef __linux__
542 	kmem_cache_destroy(global.slab_dependencies);
543 	kmem_cache_destroy(global.slab_priorities);
544 #else
545 	pool_destroy(&global.slab_dependencies);
546 	pool_destroy(&global.slab_priorities);
547 #endif
548 }
549 
550 static struct i915_global_scheduler global = { {
551 	.shrink = i915_global_scheduler_shrink,
552 	.exit = i915_global_scheduler_exit,
553 } };
554 
555 int __init i915_global_scheduler_init(void)
556 {
557 #ifdef __linux__
558 	global.slab_dependencies = KMEM_CACHE(i915_dependency,
559 					      SLAB_HWCACHE_ALIGN |
560 					      SLAB_TYPESAFE_BY_RCU);
561 	if (!global.slab_dependencies)
562 		return -ENOMEM;
563 
564 	global.slab_priorities = KMEM_CACHE(i915_priolist,
565 					    SLAB_HWCACHE_ALIGN);
566 	if (!global.slab_priorities)
567 		goto err_priorities;
568 
569 	i915_global_register(&global.base);
570 	return 0;
571 
572 err_priorities:
573 	kmem_cache_destroy(global.slab_priorities);
574 	return -ENOMEM;
575 #else
576 	pool_init(&global.slab_dependencies, sizeof(struct i915_dependency),
577 	    0, IPL_TTY, 0, "gsdep", NULL);
578 	pool_init(&global.slab_priorities, sizeof(struct i915_priolist),
579 	    0, IPL_TTY, 0, "gspri", NULL);
580 
581 	i915_global_register(&global.base);
582 	return 0;
583 #endif
584 }
585