xref: /linux/drivers/gpu/drm/scheduler/sched_main.c (revision 35a4279d)
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 /**
25  * DOC: Overview
26  *
27  * The GPU scheduler provides entities which allow userspace to push jobs
28  * into software queues which are then scheduled on a hardware run queue.
29  * The software queues have a priority among them. The scheduler selects the entities
30  * from the run queue using a FIFO. The scheduler provides dependency handling
31  * features among jobs. The driver is supposed to provide callback functions for
32  * backend operations to the scheduler like submitting a job to hardware run queue,
33  * returning the dependencies of a job etc.
34  *
35  * The organisation of the scheduler is the following:
36  *
37  * 1. Each hw run queue has one scheduler
38  * 2. Each scheduler has multiple run queues with different priorities
39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40  * 3. Each scheduler run queue has a queue of entities to schedule
41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42  *    the hardware.
43  *
44  * The jobs in a entity are always scheduled in the order that they were pushed.
45  *
46  * Note that once a job was taken from the entities queue and pushed to the
47  * hardware, i.e. the pending queue, the entity must not be referenced anymore
48  * through the jobs entity pointer.
49  */
50 
51 #include <linux/wait.h>
52 #include <linux/sched.h>
53 #include <linux/completion.h>
54 #include <linux/dma-resv.h>
55 #include <uapi/linux/sched/types.h>
56 
57 #include <drm/drm_print.h>
58 #include <drm/drm_gem.h>
59 #include <drm/drm_syncobj.h>
60 #include <drm/gpu_scheduler.h>
61 #include <drm/spsc_queue.h>
62 
63 #define CREATE_TRACE_POINTS
64 #include "gpu_scheduler_trace.h"
65 
66 #define to_drm_sched_job(sched_job)		\
67 		container_of((sched_job), struct drm_sched_job, queue_node)
68 
69 int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
70 
71 /**
72  * DOC: sched_policy (int)
73  * Used to override default entities scheduling policy in a run queue.
74  */
75 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
76 module_param_named(sched_policy, drm_sched_policy, int, 0444);
77 
78 static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
79 							    const struct rb_node *b)
80 {
81 	struct drm_sched_entity *ent_a =  rb_entry((a), struct drm_sched_entity, rb_tree_node);
82 	struct drm_sched_entity *ent_b =  rb_entry((b), struct drm_sched_entity, rb_tree_node);
83 
84 	return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
85 }
86 
87 static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity)
88 {
89 	struct drm_sched_rq *rq = entity->rq;
90 
91 	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
92 		rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
93 		RB_CLEAR_NODE(&entity->rb_tree_node);
94 	}
95 }
96 
97 void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts)
98 {
99 	/*
100 	 * Both locks need to be grabbed, one to protect from entity->rq change
101 	 * for entity from within concurrent drm_sched_entity_select_rq and the
102 	 * other to update the rb tree structure.
103 	 */
104 	spin_lock(&entity->rq_lock);
105 	spin_lock(&entity->rq->lock);
106 
107 	drm_sched_rq_remove_fifo_locked(entity);
108 
109 	entity->oldest_job_waiting = ts;
110 
111 	rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
112 		      drm_sched_entity_compare_before);
113 
114 	spin_unlock(&entity->rq->lock);
115 	spin_unlock(&entity->rq_lock);
116 }
117 
118 /**
119  * drm_sched_rq_init - initialize a given run queue struct
120  *
121  * @sched: scheduler instance to associate with this run queue
122  * @rq: scheduler run queue
123  *
124  * Initializes a scheduler runqueue.
125  */
126 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
127 			      struct drm_sched_rq *rq)
128 {
129 	spin_lock_init(&rq->lock);
130 	INIT_LIST_HEAD(&rq->entities);
131 	rq->rb_tree_root = RB_ROOT_CACHED;
132 	rq->current_entity = NULL;
133 	rq->sched = sched;
134 }
135 
136 /**
137  * drm_sched_rq_add_entity - add an entity
138  *
139  * @rq: scheduler run queue
140  * @entity: scheduler entity
141  *
142  * Adds a scheduler entity to the run queue.
143  */
144 void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
145 			     struct drm_sched_entity *entity)
146 {
147 	if (!list_empty(&entity->list))
148 		return;
149 
150 	spin_lock(&rq->lock);
151 
152 	atomic_inc(rq->sched->score);
153 	list_add_tail(&entity->list, &rq->entities);
154 
155 	spin_unlock(&rq->lock);
156 }
157 
158 /**
159  * drm_sched_rq_remove_entity - remove an entity
160  *
161  * @rq: scheduler run queue
162  * @entity: scheduler entity
163  *
164  * Removes a scheduler entity from the run queue.
165  */
166 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
167 				struct drm_sched_entity *entity)
168 {
169 	if (list_empty(&entity->list))
170 		return;
171 
172 	spin_lock(&rq->lock);
173 
174 	atomic_dec(rq->sched->score);
175 	list_del_init(&entity->list);
176 
177 	if (rq->current_entity == entity)
178 		rq->current_entity = NULL;
179 
180 	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
181 		drm_sched_rq_remove_fifo_locked(entity);
182 
183 	spin_unlock(&rq->lock);
184 }
185 
186 /**
187  * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
188  *
189  * @rq: scheduler run queue to check.
190  *
191  * Try to find a ready entity, returns NULL if none found.
192  */
193 static struct drm_sched_entity *
194 drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
195 {
196 	struct drm_sched_entity *entity;
197 
198 	spin_lock(&rq->lock);
199 
200 	entity = rq->current_entity;
201 	if (entity) {
202 		list_for_each_entry_continue(entity, &rq->entities, list) {
203 			if (drm_sched_entity_is_ready(entity)) {
204 				rq->current_entity = entity;
205 				reinit_completion(&entity->entity_idle);
206 				spin_unlock(&rq->lock);
207 				return entity;
208 			}
209 		}
210 	}
211 
212 	list_for_each_entry(entity, &rq->entities, list) {
213 
214 		if (drm_sched_entity_is_ready(entity)) {
215 			rq->current_entity = entity;
216 			reinit_completion(&entity->entity_idle);
217 			spin_unlock(&rq->lock);
218 			return entity;
219 		}
220 
221 		if (entity == rq->current_entity)
222 			break;
223 	}
224 
225 	spin_unlock(&rq->lock);
226 
227 	return NULL;
228 }
229 
230 /**
231  * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
232  *
233  * @rq: scheduler run queue to check.
234  *
235  * Find oldest waiting ready entity, returns NULL if none found.
236  */
237 static struct drm_sched_entity *
238 drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
239 {
240 	struct rb_node *rb;
241 
242 	spin_lock(&rq->lock);
243 	for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
244 		struct drm_sched_entity *entity;
245 
246 		entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
247 		if (drm_sched_entity_is_ready(entity)) {
248 			rq->current_entity = entity;
249 			reinit_completion(&entity->entity_idle);
250 			break;
251 		}
252 	}
253 	spin_unlock(&rq->lock);
254 
255 	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
256 }
257 
258 /**
259  * __drm_sched_run_job_queue - enqueue run-job work
260  * @sched: scheduler instance
261  */
262 static void __drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
263 {
264 	if (!READ_ONCE(sched->pause_submit))
265 		queue_work(sched->submit_wq, &sched->work_run_job);
266 }
267 
268 /**
269  * __drm_sched_run_free_queue - enqueue free-job work
270  * @sched: scheduler instance
271  */
272 static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
273 {
274 	if (!READ_ONCE(sched->pause_submit))
275 		queue_work(sched->submit_wq, &sched->work_free_job);
276 }
277 
278 /**
279  * drm_sched_run_free_queue - enqueue free-job work if ready
280  * @sched: scheduler instance
281  */
282 static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
283 {
284 	struct drm_sched_job *job;
285 
286 	spin_lock(&sched->job_list_lock);
287 	job = list_first_entry_or_null(&sched->pending_list,
288 				       struct drm_sched_job, list);
289 	if (job && dma_fence_is_signaled(&job->s_fence->finished))
290 		__drm_sched_run_free_queue(sched);
291 	spin_unlock(&sched->job_list_lock);
292 }
293 
294 /**
295  * drm_sched_job_done - complete a job
296  * @s_job: pointer to the job which is done
297  *
298  * Finish the job's fence and wake up the worker thread.
299  */
300 static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
301 {
302 	struct drm_sched_fence *s_fence = s_job->s_fence;
303 	struct drm_gpu_scheduler *sched = s_fence->sched;
304 
305 	atomic_dec(&sched->hw_rq_count);
306 	atomic_dec(sched->score);
307 
308 	trace_drm_sched_process_job(s_fence);
309 
310 	dma_fence_get(&s_fence->finished);
311 	drm_sched_fence_finished(s_fence, result);
312 	dma_fence_put(&s_fence->finished);
313 	__drm_sched_run_free_queue(sched);
314 }
315 
316 /**
317  * drm_sched_job_done_cb - the callback for a done job
318  * @f: fence
319  * @cb: fence callbacks
320  */
321 static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
322 {
323 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
324 
325 	drm_sched_job_done(s_job, f->error);
326 }
327 
328 /**
329  * drm_sched_start_timeout - start timeout for reset worker
330  *
331  * @sched: scheduler instance to start the worker for
332  *
333  * Start the timeout for the given scheduler.
334  */
335 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
336 {
337 	lockdep_assert_held(&sched->job_list_lock);
338 
339 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
340 	    !list_empty(&sched->pending_list))
341 		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
342 }
343 
344 static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
345 {
346 	spin_lock(&sched->job_list_lock);
347 	drm_sched_start_timeout(sched);
348 	spin_unlock(&sched->job_list_lock);
349 }
350 
351 /**
352  * drm_sched_tdr_queue_imm: - immediately start job timeout handler
353  *
354  * @sched: scheduler for which the timeout handling should be started.
355  *
356  * Start timeout handling immediately for the named scheduler.
357  */
358 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
359 {
360 	spin_lock(&sched->job_list_lock);
361 	sched->timeout = 0;
362 	drm_sched_start_timeout(sched);
363 	spin_unlock(&sched->job_list_lock);
364 }
365 EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
366 
367 /**
368  * drm_sched_fault - immediately start timeout handler
369  *
370  * @sched: scheduler where the timeout handling should be started.
371  *
372  * Start timeout handling immediately when the driver detects a hardware fault.
373  */
374 void drm_sched_fault(struct drm_gpu_scheduler *sched)
375 {
376 	if (sched->timeout_wq)
377 		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
378 }
379 EXPORT_SYMBOL(drm_sched_fault);
380 
381 /**
382  * drm_sched_suspend_timeout - Suspend scheduler job timeout
383  *
384  * @sched: scheduler instance for which to suspend the timeout
385  *
386  * Suspend the delayed work timeout for the scheduler. This is done by
387  * modifying the delayed work timeout to an arbitrary large value,
388  * MAX_SCHEDULE_TIMEOUT in this case.
389  *
390  * Returns the timeout remaining
391  *
392  */
393 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
394 {
395 	unsigned long sched_timeout, now = jiffies;
396 
397 	sched_timeout = sched->work_tdr.timer.expires;
398 
399 	/*
400 	 * Modify the timeout to an arbitrarily large value. This also prevents
401 	 * the timeout to be restarted when new submissions arrive
402 	 */
403 	if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
404 			&& time_after(sched_timeout, now))
405 		return sched_timeout - now;
406 	else
407 		return sched->timeout;
408 }
409 EXPORT_SYMBOL(drm_sched_suspend_timeout);
410 
411 /**
412  * drm_sched_resume_timeout - Resume scheduler job timeout
413  *
414  * @sched: scheduler instance for which to resume the timeout
415  * @remaining: remaining timeout
416  *
417  * Resume the delayed work timeout for the scheduler.
418  */
419 void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
420 		unsigned long remaining)
421 {
422 	spin_lock(&sched->job_list_lock);
423 
424 	if (list_empty(&sched->pending_list))
425 		cancel_delayed_work(&sched->work_tdr);
426 	else
427 		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
428 
429 	spin_unlock(&sched->job_list_lock);
430 }
431 EXPORT_SYMBOL(drm_sched_resume_timeout);
432 
433 static void drm_sched_job_begin(struct drm_sched_job *s_job)
434 {
435 	struct drm_gpu_scheduler *sched = s_job->sched;
436 
437 	spin_lock(&sched->job_list_lock);
438 	list_add_tail(&s_job->list, &sched->pending_list);
439 	drm_sched_start_timeout(sched);
440 	spin_unlock(&sched->job_list_lock);
441 }
442 
443 static void drm_sched_job_timedout(struct work_struct *work)
444 {
445 	struct drm_gpu_scheduler *sched;
446 	struct drm_sched_job *job;
447 	enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
448 
449 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
450 
451 	/* Protects against concurrent deletion in drm_sched_get_finished_job */
452 	spin_lock(&sched->job_list_lock);
453 	job = list_first_entry_or_null(&sched->pending_list,
454 				       struct drm_sched_job, list);
455 
456 	if (job) {
457 		/*
458 		 * Remove the bad job so it cannot be freed by concurrent
459 		 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
460 		 * is parked at which point it's safe.
461 		 */
462 		list_del_init(&job->list);
463 		spin_unlock(&sched->job_list_lock);
464 
465 		status = job->sched->ops->timedout_job(job);
466 
467 		/*
468 		 * Guilty job did complete and hence needs to be manually removed
469 		 * See drm_sched_stop doc.
470 		 */
471 		if (sched->free_guilty) {
472 			job->sched->ops->free_job(job);
473 			sched->free_guilty = false;
474 		}
475 	} else {
476 		spin_unlock(&sched->job_list_lock);
477 	}
478 
479 	if (status != DRM_GPU_SCHED_STAT_ENODEV)
480 		drm_sched_start_timeout_unlocked(sched);
481 }
482 
483 /**
484  * drm_sched_stop - stop the scheduler
485  *
486  * @sched: scheduler instance
487  * @bad: job which caused the time out
488  *
489  * Stop the scheduler and also removes and frees all completed jobs.
490  * Note: bad job will not be freed as it might be used later and so it's
491  * callers responsibility to release it manually if it's not part of the
492  * pending list any more.
493  *
494  */
495 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
496 {
497 	struct drm_sched_job *s_job, *tmp;
498 
499 	drm_sched_wqueue_stop(sched);
500 
501 	/*
502 	 * Reinsert back the bad job here - now it's safe as
503 	 * drm_sched_get_finished_job cannot race against us and release the
504 	 * bad job at this point - we parked (waited for) any in progress
505 	 * (earlier) cleanups and drm_sched_get_finished_job will not be called
506 	 * now until the scheduler thread is unparked.
507 	 */
508 	if (bad && bad->sched == sched)
509 		/*
510 		 * Add at the head of the queue to reflect it was the earliest
511 		 * job extracted.
512 		 */
513 		list_add(&bad->list, &sched->pending_list);
514 
515 	/*
516 	 * Iterate the job list from later to  earlier one and either deactive
517 	 * their HW callbacks or remove them from pending list if they already
518 	 * signaled.
519 	 * This iteration is thread safe as sched thread is stopped.
520 	 */
521 	list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
522 					 list) {
523 		if (s_job->s_fence->parent &&
524 		    dma_fence_remove_callback(s_job->s_fence->parent,
525 					      &s_job->cb)) {
526 			dma_fence_put(s_job->s_fence->parent);
527 			s_job->s_fence->parent = NULL;
528 			atomic_dec(&sched->hw_rq_count);
529 		} else {
530 			/*
531 			 * remove job from pending_list.
532 			 * Locking here is for concurrent resume timeout
533 			 */
534 			spin_lock(&sched->job_list_lock);
535 			list_del_init(&s_job->list);
536 			spin_unlock(&sched->job_list_lock);
537 
538 			/*
539 			 * Wait for job's HW fence callback to finish using s_job
540 			 * before releasing it.
541 			 *
542 			 * Job is still alive so fence refcount at least 1
543 			 */
544 			dma_fence_wait(&s_job->s_fence->finished, false);
545 
546 			/*
547 			 * We must keep bad job alive for later use during
548 			 * recovery by some of the drivers but leave a hint
549 			 * that the guilty job must be released.
550 			 */
551 			if (bad != s_job)
552 				sched->ops->free_job(s_job);
553 			else
554 				sched->free_guilty = true;
555 		}
556 	}
557 
558 	/*
559 	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
560 	 * avoids the pending timeout work in progress to fire right away after
561 	 * this TDR finished and before the newly restarted jobs had a
562 	 * chance to complete.
563 	 */
564 	cancel_delayed_work(&sched->work_tdr);
565 }
566 
567 EXPORT_SYMBOL(drm_sched_stop);
568 
569 /**
570  * drm_sched_start - recover jobs after a reset
571  *
572  * @sched: scheduler instance
573  * @full_recovery: proceed with complete sched restart
574  *
575  */
576 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
577 {
578 	struct drm_sched_job *s_job, *tmp;
579 	int r;
580 
581 	/*
582 	 * Locking the list is not required here as the sched thread is parked
583 	 * so no new jobs are being inserted or removed. Also concurrent
584 	 * GPU recovers can't run in parallel.
585 	 */
586 	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
587 		struct dma_fence *fence = s_job->s_fence->parent;
588 
589 		atomic_inc(&sched->hw_rq_count);
590 
591 		if (!full_recovery)
592 			continue;
593 
594 		if (fence) {
595 			r = dma_fence_add_callback(fence, &s_job->cb,
596 						   drm_sched_job_done_cb);
597 			if (r == -ENOENT)
598 				drm_sched_job_done(s_job, fence->error);
599 			else if (r)
600 				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
601 					  r);
602 		} else
603 			drm_sched_job_done(s_job, -ECANCELED);
604 	}
605 
606 	if (full_recovery)
607 		drm_sched_start_timeout_unlocked(sched);
608 
609 	drm_sched_wqueue_start(sched);
610 }
611 EXPORT_SYMBOL(drm_sched_start);
612 
613 /**
614  * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
615  *
616  * @sched: scheduler instance
617  *
618  * Re-submitting jobs was a concept AMD came up as cheap way to implement
619  * recovery after a job timeout.
620  *
621  * This turned out to be not working very well. First of all there are many
622  * problem with the dma_fence implementation and requirements. Either the
623  * implementation is risking deadlocks with core memory management or violating
624  * documented implementation details of the dma_fence object.
625  *
626  * Drivers can still save and restore their state for recovery operations, but
627  * we shouldn't make this a general scheduler feature around the dma_fence
628  * interface.
629  */
630 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
631 {
632 	struct drm_sched_job *s_job, *tmp;
633 	uint64_t guilty_context;
634 	bool found_guilty = false;
635 	struct dma_fence *fence;
636 
637 	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
638 		struct drm_sched_fence *s_fence = s_job->s_fence;
639 
640 		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
641 			found_guilty = true;
642 			guilty_context = s_job->s_fence->scheduled.context;
643 		}
644 
645 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
646 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
647 
648 		fence = sched->ops->run_job(s_job);
649 
650 		if (IS_ERR_OR_NULL(fence)) {
651 			if (IS_ERR(fence))
652 				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
653 
654 			s_job->s_fence->parent = NULL;
655 		} else {
656 
657 			s_job->s_fence->parent = dma_fence_get(fence);
658 
659 			/* Drop for orignal kref_init */
660 			dma_fence_put(fence);
661 		}
662 	}
663 }
664 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
665 
666 /**
667  * drm_sched_job_init - init a scheduler job
668  * @job: scheduler job to init
669  * @entity: scheduler entity to use
670  * @owner: job owner for debugging
671  *
672  * Refer to drm_sched_entity_push_job() documentation
673  * for locking considerations.
674  *
675  * Drivers must make sure drm_sched_job_cleanup() if this function returns
676  * successfully, even when @job is aborted before drm_sched_job_arm() is called.
677  *
678  * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
679  * has died, which can mean that there's no valid runqueue for a @entity.
680  * This function returns -ENOENT in this case (which probably should be -EIO as
681  * a more meanigful return value).
682  *
683  * Returns 0 for success, negative error code otherwise.
684  */
685 int drm_sched_job_init(struct drm_sched_job *job,
686 		       struct drm_sched_entity *entity,
687 		       void *owner)
688 {
689 	if (!entity->rq) {
690 		/* This will most likely be followed by missing frames
691 		 * or worse--a blank screen--leave a trail in the
692 		 * logs, so this can be debugged easier.
693 		 */
694 		drm_err(job->sched, "%s: entity has no rq!\n", __func__);
695 		return -ENOENT;
696 	}
697 
698 	job->entity = entity;
699 	job->s_fence = drm_sched_fence_alloc(entity, owner);
700 	if (!job->s_fence)
701 		return -ENOMEM;
702 
703 	INIT_LIST_HEAD(&job->list);
704 
705 	xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
706 
707 	return 0;
708 }
709 EXPORT_SYMBOL(drm_sched_job_init);
710 
711 /**
712  * drm_sched_job_arm - arm a scheduler job for execution
713  * @job: scheduler job to arm
714  *
715  * This arms a scheduler job for execution. Specifically it initializes the
716  * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
717  * or other places that need to track the completion of this job.
718  *
719  * Refer to drm_sched_entity_push_job() documentation for locking
720  * considerations.
721  *
722  * This can only be called if drm_sched_job_init() succeeded.
723  */
724 void drm_sched_job_arm(struct drm_sched_job *job)
725 {
726 	struct drm_gpu_scheduler *sched;
727 	struct drm_sched_entity *entity = job->entity;
728 
729 	BUG_ON(!entity);
730 	drm_sched_entity_select_rq(entity);
731 	sched = entity->rq->sched;
732 
733 	job->sched = sched;
734 	job->s_priority = entity->priority;
735 	job->id = atomic64_inc_return(&sched->job_id_count);
736 
737 	drm_sched_fence_init(job->s_fence, job->entity);
738 }
739 EXPORT_SYMBOL(drm_sched_job_arm);
740 
741 /**
742  * drm_sched_job_add_dependency - adds the fence as a job dependency
743  * @job: scheduler job to add the dependencies to
744  * @fence: the dma_fence to add to the list of dependencies.
745  *
746  * Note that @fence is consumed in both the success and error cases.
747  *
748  * Returns:
749  * 0 on success, or an error on failing to expand the array.
750  */
751 int drm_sched_job_add_dependency(struct drm_sched_job *job,
752 				 struct dma_fence *fence)
753 {
754 	struct dma_fence *entry;
755 	unsigned long index;
756 	u32 id = 0;
757 	int ret;
758 
759 	if (!fence)
760 		return 0;
761 
762 	/* Deduplicate if we already depend on a fence from the same context.
763 	 * This lets the size of the array of deps scale with the number of
764 	 * engines involved, rather than the number of BOs.
765 	 */
766 	xa_for_each(&job->dependencies, index, entry) {
767 		if (entry->context != fence->context)
768 			continue;
769 
770 		if (dma_fence_is_later(fence, entry)) {
771 			dma_fence_put(entry);
772 			xa_store(&job->dependencies, index, fence, GFP_KERNEL);
773 		} else {
774 			dma_fence_put(fence);
775 		}
776 		return 0;
777 	}
778 
779 	ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
780 	if (ret != 0)
781 		dma_fence_put(fence);
782 
783 	return ret;
784 }
785 EXPORT_SYMBOL(drm_sched_job_add_dependency);
786 
787 /**
788  * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
789  * @job: scheduler job to add the dependencies to
790  * @file: drm file private pointer
791  * @handle: syncobj handle to lookup
792  * @point: timeline point
793  *
794  * This adds the fence matching the given syncobj to @job.
795  *
796  * Returns:
797  * 0 on success, or an error on failing to expand the array.
798  */
799 int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
800 					 struct drm_file *file,
801 					 u32 handle,
802 					 u32 point)
803 {
804 	struct dma_fence *fence;
805 	int ret;
806 
807 	ret = drm_syncobj_find_fence(file, handle, point, 0, &fence);
808 	if (ret)
809 		return ret;
810 
811 	return drm_sched_job_add_dependency(job, fence);
812 }
813 EXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency);
814 
815 /**
816  * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
817  * @job: scheduler job to add the dependencies to
818  * @resv: the dma_resv object to get the fences from
819  * @usage: the dma_resv_usage to use to filter the fences
820  *
821  * This adds all fences matching the given usage from @resv to @job.
822  * Must be called with the @resv lock held.
823  *
824  * Returns:
825  * 0 on success, or an error on failing to expand the array.
826  */
827 int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
828 					struct dma_resv *resv,
829 					enum dma_resv_usage usage)
830 {
831 	struct dma_resv_iter cursor;
832 	struct dma_fence *fence;
833 	int ret;
834 
835 	dma_resv_assert_held(resv);
836 
837 	dma_resv_for_each_fence(&cursor, resv, usage, fence) {
838 		/* Make sure to grab an additional ref on the added fence */
839 		dma_fence_get(fence);
840 		ret = drm_sched_job_add_dependency(job, fence);
841 		if (ret) {
842 			dma_fence_put(fence);
843 			return ret;
844 		}
845 	}
846 	return 0;
847 }
848 EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
849 
850 /**
851  * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
852  *   dependencies
853  * @job: scheduler job to add the dependencies to
854  * @obj: the gem object to add new dependencies from.
855  * @write: whether the job might write the object (so we need to depend on
856  * shared fences in the reservation object).
857  *
858  * This should be called after drm_gem_lock_reservations() on your array of
859  * GEM objects used in the job but before updating the reservations with your
860  * own fences.
861  *
862  * Returns:
863  * 0 on success, or an error on failing to expand the array.
864  */
865 int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
866 					    struct drm_gem_object *obj,
867 					    bool write)
868 {
869 	return drm_sched_job_add_resv_dependencies(job, obj->resv,
870 						   dma_resv_usage_rw(write));
871 }
872 EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
873 
874 /**
875  * drm_sched_job_cleanup - clean up scheduler job resources
876  * @job: scheduler job to clean up
877  *
878  * Cleans up the resources allocated with drm_sched_job_init().
879  *
880  * Drivers should call this from their error unwind code if @job is aborted
881  * before drm_sched_job_arm() is called.
882  *
883  * After that point of no return @job is committed to be executed by the
884  * scheduler, and this function should be called from the
885  * &drm_sched_backend_ops.free_job callback.
886  */
887 void drm_sched_job_cleanup(struct drm_sched_job *job)
888 {
889 	struct dma_fence *fence;
890 	unsigned long index;
891 
892 	if (kref_read(&job->s_fence->finished.refcount)) {
893 		/* drm_sched_job_arm() has been called */
894 		dma_fence_put(&job->s_fence->finished);
895 	} else {
896 		/* aborted job before committing to run it */
897 		drm_sched_fence_free(job->s_fence);
898 	}
899 
900 	job->s_fence = NULL;
901 
902 	xa_for_each(&job->dependencies, index, fence) {
903 		dma_fence_put(fence);
904 	}
905 	xa_destroy(&job->dependencies);
906 
907 }
908 EXPORT_SYMBOL(drm_sched_job_cleanup);
909 
910 /**
911  * drm_sched_can_queue -- Can we queue more to the hardware?
912  * @sched: scheduler instance
913  *
914  * Return true if we can push more jobs to the hw, otherwise false.
915  */
916 static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
917 {
918 	return atomic_read(&sched->hw_rq_count) <
919 		sched->hw_submission_limit;
920 }
921 
922 /**
923  * drm_sched_wakeup_if_can_queue - Wake up the scheduler
924  * @sched: scheduler instance
925  *
926  * Wake up the scheduler if we can queue jobs.
927  */
928 void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
929 {
930 	if (drm_sched_can_queue(sched))
931 		__drm_sched_run_job_queue(sched);
932 }
933 
934 /**
935  * drm_sched_select_entity - Select next entity to process
936  *
937  * @sched: scheduler instance
938  *
939  * Returns the entity to process or NULL if none are found.
940  */
941 static struct drm_sched_entity *
942 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
943 {
944 	struct drm_sched_entity *entity;
945 	int i;
946 
947 	if (!drm_sched_can_queue(sched))
948 		return NULL;
949 
950 	/* Kernel run queue has higher priority than normal run queue*/
951 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
952 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
953 			drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
954 			drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
955 		if (entity)
956 			break;
957 	}
958 
959 	return entity;
960 }
961 
962 /**
963  * drm_sched_get_finished_job - fetch the next finished job to be destroyed
964  *
965  * @sched: scheduler instance
966  *
967  * Returns the next finished job from the pending list (if there is one)
968  * ready for it to be destroyed.
969  */
970 static struct drm_sched_job *
971 drm_sched_get_finished_job(struct drm_gpu_scheduler *sched)
972 {
973 	struct drm_sched_job *job, *next;
974 
975 	spin_lock(&sched->job_list_lock);
976 
977 	job = list_first_entry_or_null(&sched->pending_list,
978 				       struct drm_sched_job, list);
979 
980 	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
981 		/* remove job from pending_list */
982 		list_del_init(&job->list);
983 
984 		/* cancel this job's TO timer */
985 		cancel_delayed_work(&sched->work_tdr);
986 		/* make the scheduled timestamp more accurate */
987 		next = list_first_entry_or_null(&sched->pending_list,
988 						typeof(*next), list);
989 
990 		if (next) {
991 			if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
992 				     &next->s_fence->scheduled.flags))
993 				next->s_fence->scheduled.timestamp =
994 					dma_fence_timestamp(&job->s_fence->finished);
995 			/* start TO timer for next job */
996 			drm_sched_start_timeout(sched);
997 		}
998 	} else {
999 		job = NULL;
1000 	}
1001 
1002 	spin_unlock(&sched->job_list_lock);
1003 
1004 	return job;
1005 }
1006 
1007 /**
1008  * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
1009  * @sched_list: list of drm_gpu_schedulers
1010  * @num_sched_list: number of drm_gpu_schedulers in the sched_list
1011  *
1012  * Returns pointer of the sched with the least load or NULL if none of the
1013  * drm_gpu_schedulers are ready
1014  */
1015 struct drm_gpu_scheduler *
1016 drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
1017 		     unsigned int num_sched_list)
1018 {
1019 	struct drm_gpu_scheduler *sched, *picked_sched = NULL;
1020 	int i;
1021 	unsigned int min_score = UINT_MAX, num_score;
1022 
1023 	for (i = 0; i < num_sched_list; ++i) {
1024 		sched = sched_list[i];
1025 
1026 		if (!sched->ready) {
1027 			DRM_WARN("scheduler %s is not ready, skipping",
1028 				 sched->name);
1029 			continue;
1030 		}
1031 
1032 		num_score = atomic_read(sched->score);
1033 		if (num_score < min_score) {
1034 			min_score = num_score;
1035 			picked_sched = sched;
1036 		}
1037 	}
1038 
1039 	return picked_sched;
1040 }
1041 EXPORT_SYMBOL(drm_sched_pick_best);
1042 
1043 /**
1044  * drm_sched_run_job_queue - enqueue run-job work if there are ready entities
1045  * @sched: scheduler instance
1046  */
1047 static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
1048 {
1049 	if (drm_sched_select_entity(sched))
1050 		__drm_sched_run_job_queue(sched);
1051 }
1052 
1053 /**
1054  * drm_sched_free_job_work - worker to call free_job
1055  *
1056  * @w: free job work
1057  */
1058 static void drm_sched_free_job_work(struct work_struct *w)
1059 {
1060 	struct drm_gpu_scheduler *sched =
1061 		container_of(w, struct drm_gpu_scheduler, work_free_job);
1062 	struct drm_sched_job *job;
1063 
1064 	if (READ_ONCE(sched->pause_submit))
1065 		return;
1066 
1067 	job = drm_sched_get_finished_job(sched);
1068 	if (job)
1069 		sched->ops->free_job(job);
1070 
1071 	drm_sched_run_free_queue(sched);
1072 	drm_sched_run_job_queue(sched);
1073 }
1074 
1075 /**
1076  * drm_sched_run_job_work - worker to call run_job
1077  *
1078  * @w: run job work
1079  */
1080 static void drm_sched_run_job_work(struct work_struct *w)
1081 {
1082 	struct drm_gpu_scheduler *sched =
1083 		container_of(w, struct drm_gpu_scheduler, work_run_job);
1084 	struct drm_sched_entity *entity;
1085 	struct dma_fence *fence;
1086 	struct drm_sched_fence *s_fence;
1087 	struct drm_sched_job *sched_job;
1088 	int r;
1089 
1090 	if (READ_ONCE(sched->pause_submit))
1091 		return;
1092 
1093 	entity = drm_sched_select_entity(sched);
1094 	if (!entity)
1095 		return;
1096 
1097 	sched_job = drm_sched_entity_pop_job(entity);
1098 	if (!sched_job) {
1099 		complete_all(&entity->entity_idle);
1100 		return;	/* No more work */
1101 	}
1102 
1103 	s_fence = sched_job->s_fence;
1104 
1105 	atomic_inc(&sched->hw_rq_count);
1106 	drm_sched_job_begin(sched_job);
1107 
1108 	trace_drm_run_job(sched_job, entity);
1109 	fence = sched->ops->run_job(sched_job);
1110 	complete_all(&entity->entity_idle);
1111 	drm_sched_fence_scheduled(s_fence, fence);
1112 
1113 	if (!IS_ERR_OR_NULL(fence)) {
1114 		/* Drop for original kref_init of the fence */
1115 		dma_fence_put(fence);
1116 
1117 		r = dma_fence_add_callback(fence, &sched_job->cb,
1118 					   drm_sched_job_done_cb);
1119 		if (r == -ENOENT)
1120 			drm_sched_job_done(sched_job, fence->error);
1121 		else if (r)
1122 			DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
1123 	} else {
1124 		drm_sched_job_done(sched_job, IS_ERR(fence) ?
1125 				   PTR_ERR(fence) : 0);
1126 	}
1127 
1128 	wake_up(&sched->job_scheduled);
1129 	drm_sched_run_job_queue(sched);
1130 }
1131 
1132 /**
1133  * drm_sched_init - Init a gpu scheduler instance
1134  *
1135  * @sched: scheduler instance
1136  * @ops: backend operations for this scheduler
1137  * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
1138  *	       allocated and used
1139  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
1140  * @hw_submission: number of hw submissions that can be in flight
1141  * @hang_limit: number of times to allow a job to hang before dropping it
1142  * @timeout: timeout value in jiffies for the scheduler
1143  * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
1144  *		used
1145  * @score: optional score atomic shared with other schedulers
1146  * @name: name used for debugging
1147  * @dev: target &struct device
1148  *
1149  * Return 0 on success, otherwise error code.
1150  */
1151 int drm_sched_init(struct drm_gpu_scheduler *sched,
1152 		   const struct drm_sched_backend_ops *ops,
1153 		   struct workqueue_struct *submit_wq,
1154 		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
1155 		   long timeout, struct workqueue_struct *timeout_wq,
1156 		   atomic_t *score, const char *name, struct device *dev)
1157 {
1158 	int i, ret;
1159 
1160 	sched->ops = ops;
1161 	sched->hw_submission_limit = hw_submission;
1162 	sched->name = name;
1163 	sched->timeout = timeout;
1164 	sched->timeout_wq = timeout_wq ? : system_wq;
1165 	sched->hang_limit = hang_limit;
1166 	sched->score = score ? score : &sched->_score;
1167 	sched->dev = dev;
1168 
1169 	if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
1170 		/* This is a gross violation--tell drivers what the  problem is.
1171 		 */
1172 		drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
1173 			__func__);
1174 		return -EINVAL;
1175 	} else if (sched->sched_rq) {
1176 		/* Not an error, but warn anyway so drivers can
1177 		 * fine-tune their DRM calling order, and return all
1178 		 * is good.
1179 		 */
1180 		drm_warn(sched, "%s: scheduler already initialized!\n", __func__);
1181 		return 0;
1182 	}
1183 
1184 	if (submit_wq) {
1185 		sched->submit_wq = submit_wq;
1186 		sched->own_submit_wq = false;
1187 	} else {
1188 		sched->submit_wq = alloc_ordered_workqueue(name, 0);
1189 		if (!sched->submit_wq)
1190 			return -ENOMEM;
1191 
1192 		sched->own_submit_wq = true;
1193 	}
1194 	ret = -ENOMEM;
1195 	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
1196 					GFP_KERNEL | __GFP_ZERO);
1197 	if (!sched->sched_rq)
1198 		goto Out_free;
1199 	sched->num_rqs = num_rqs;
1200 	for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
1201 		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
1202 		if (!sched->sched_rq[i])
1203 			goto Out_unroll;
1204 		drm_sched_rq_init(sched, sched->sched_rq[i]);
1205 	}
1206 
1207 	init_waitqueue_head(&sched->job_scheduled);
1208 	INIT_LIST_HEAD(&sched->pending_list);
1209 	spin_lock_init(&sched->job_list_lock);
1210 	atomic_set(&sched->hw_rq_count, 0);
1211 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1212 	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
1213 	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
1214 	atomic_set(&sched->_score, 0);
1215 	atomic64_set(&sched->job_id_count, 0);
1216 	sched->pause_submit = false;
1217 
1218 	sched->ready = true;
1219 	return 0;
1220 Out_unroll:
1221 	for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
1222 		kfree(sched->sched_rq[i]);
1223 Out_free:
1224 	kfree(sched->sched_rq);
1225 	sched->sched_rq = NULL;
1226 	if (sched->own_submit_wq)
1227 		destroy_workqueue(sched->submit_wq);
1228 	drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
1229 	return ret;
1230 }
1231 EXPORT_SYMBOL(drm_sched_init);
1232 
1233 /**
1234  * drm_sched_fini - Destroy a gpu scheduler
1235  *
1236  * @sched: scheduler instance
1237  *
1238  * Tears down and cleans up the scheduler.
1239  */
1240 void drm_sched_fini(struct drm_gpu_scheduler *sched)
1241 {
1242 	struct drm_sched_entity *s_entity;
1243 	int i;
1244 
1245 	drm_sched_wqueue_stop(sched);
1246 
1247 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
1248 		struct drm_sched_rq *rq = sched->sched_rq[i];
1249 
1250 		spin_lock(&rq->lock);
1251 		list_for_each_entry(s_entity, &rq->entities, list)
1252 			/*
1253 			 * Prevents reinsertion and marks job_queue as idle,
1254 			 * it will removed from rq in drm_sched_entity_fini
1255 			 * eventually
1256 			 */
1257 			s_entity->stopped = true;
1258 		spin_unlock(&rq->lock);
1259 		kfree(sched->sched_rq[i]);
1260 	}
1261 
1262 	/* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
1263 	wake_up_all(&sched->job_scheduled);
1264 
1265 	/* Confirm no work left behind accessing device structures */
1266 	cancel_delayed_work_sync(&sched->work_tdr);
1267 
1268 	if (sched->own_submit_wq)
1269 		destroy_workqueue(sched->submit_wq);
1270 	sched->ready = false;
1271 	kfree(sched->sched_rq);
1272 	sched->sched_rq = NULL;
1273 }
1274 EXPORT_SYMBOL(drm_sched_fini);
1275 
1276 /**
1277  * drm_sched_increase_karma - Update sched_entity guilty flag
1278  *
1279  * @bad: The job guilty of time out
1280  *
1281  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
1282  * limit of the scheduler then the respective sched entity is marked guilty and
1283  * jobs from it will not be scheduled further
1284  */
1285 void drm_sched_increase_karma(struct drm_sched_job *bad)
1286 {
1287 	int i;
1288 	struct drm_sched_entity *tmp;
1289 	struct drm_sched_entity *entity;
1290 	struct drm_gpu_scheduler *sched = bad->sched;
1291 
1292 	/* don't change @bad's karma if it's from KERNEL RQ,
1293 	 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1294 	 * corrupt but keep in mind that kernel jobs always considered good.
1295 	 */
1296 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1297 		atomic_inc(&bad->karma);
1298 
1299 		for (i = DRM_SCHED_PRIORITY_MIN;
1300 		     i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL);
1301 		     i++) {
1302 			struct drm_sched_rq *rq = sched->sched_rq[i];
1303 
1304 			spin_lock(&rq->lock);
1305 			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1306 				if (bad->s_fence->scheduled.context ==
1307 				    entity->fence_context) {
1308 					if (entity->guilty)
1309 						atomic_set(entity->guilty, 1);
1310 					break;
1311 				}
1312 			}
1313 			spin_unlock(&rq->lock);
1314 			if (&entity->list != &rq->entities)
1315 				break;
1316 		}
1317 	}
1318 }
1319 EXPORT_SYMBOL(drm_sched_increase_karma);
1320 
1321 /**
1322  * drm_sched_wqueue_ready - Is the scheduler ready for submission
1323  *
1324  * @sched: scheduler instance
1325  *
1326  * Returns true if submission is ready
1327  */
1328 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
1329 {
1330 	return sched->ready;
1331 }
1332 EXPORT_SYMBOL(drm_sched_wqueue_ready);
1333 
1334 /**
1335  * drm_sched_wqueue_stop - stop scheduler submission
1336  *
1337  * @sched: scheduler instance
1338  */
1339 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
1340 {
1341 	WRITE_ONCE(sched->pause_submit, true);
1342 	cancel_work_sync(&sched->work_run_job);
1343 	cancel_work_sync(&sched->work_free_job);
1344 }
1345 EXPORT_SYMBOL(drm_sched_wqueue_stop);
1346 
1347 /**
1348  * drm_sched_wqueue_start - start scheduler submission
1349  *
1350  * @sched: scheduler instance
1351  */
1352 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
1353 {
1354 	WRITE_ONCE(sched->pause_submit, false);
1355 	queue_work(sched->submit_wq, &sched->work_run_job);
1356 	queue_work(sched->submit_wq, &sched->work_free_job);
1357 }
1358 EXPORT_SYMBOL(drm_sched_wqueue_start);
1359