Line data Source code
1 : /*
2 : * Copyright 2015 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : /**
25 : * DOC: Overview
26 : *
27 : * The GPU scheduler provides entities which allow userspace to push jobs
28 : * into software queues which are then scheduled on a hardware run queue.
29 : * The software queues have a priority among them. The scheduler selects the entities
30 : * from the run queue using a FIFO. The scheduler provides dependency handling
31 : * features among jobs. The driver is supposed to provide callback functions for
32 : * backend operations to the scheduler like submitting a job to hardware run queue,
33 : * returning the dependencies of a job etc.
34 : *
35 : * The organisation of the scheduler is the following:
36 : *
37 : * 1. Each hw run queue has one scheduler
38 : * 2. Each scheduler has multiple run queues with different priorities
39 : * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40 : * 3. Each scheduler run queue has a queue of entities to schedule
41 : * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42 : * the hardware.
43 : *
44 : * The jobs in a entity are always scheduled in the order that they were pushed.
45 : */
46 :
47 : #include <linux/kthread.h>
48 : #include <linux/wait.h>
49 : #include <linux/sched.h>
50 : #include <linux/completion.h>
51 : #include <linux/dma-resv.h>
52 : #include <uapi/linux/sched/types.h>
53 :
54 : #include <drm/drm_print.h>
55 : #include <drm/drm_gem.h>
56 : #include <drm/gpu_scheduler.h>
57 : #include <drm/spsc_queue.h>
58 :
59 : #define CREATE_TRACE_POINTS
60 : #include "gpu_scheduler_trace.h"
61 :
62 : #define to_drm_sched_job(sched_job) \
63 : container_of((sched_job), struct drm_sched_job, queue_node)
64 :
65 : /**
66 : * drm_sched_rq_init - initialize a given run queue struct
67 : *
68 : * @sched: scheduler instance to associate with this run queue
69 : * @rq: scheduler run queue
70 : *
71 : * Initializes a scheduler runqueue.
72 : */
73 : static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
74 : struct drm_sched_rq *rq)
75 : {
76 0 : spin_lock_init(&rq->lock);
77 0 : INIT_LIST_HEAD(&rq->entities);
78 0 : rq->current_entity = NULL;
79 0 : rq->sched = sched;
80 : }
81 :
82 : /**
83 : * drm_sched_rq_add_entity - add an entity
84 : *
85 : * @rq: scheduler run queue
86 : * @entity: scheduler entity
87 : *
88 : * Adds a scheduler entity to the run queue.
89 : */
90 0 : void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
91 : struct drm_sched_entity *entity)
92 : {
93 0 : if (!list_empty(&entity->list))
94 : return;
95 0 : spin_lock(&rq->lock);
96 0 : atomic_inc(rq->sched->score);
97 0 : list_add_tail(&entity->list, &rq->entities);
98 0 : spin_unlock(&rq->lock);
99 : }
100 :
101 : /**
102 : * drm_sched_rq_remove_entity - remove an entity
103 : *
104 : * @rq: scheduler run queue
105 : * @entity: scheduler entity
106 : *
107 : * Removes a scheduler entity from the run queue.
108 : */
109 0 : void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
110 : struct drm_sched_entity *entity)
111 : {
112 0 : if (list_empty(&entity->list))
113 : return;
114 0 : spin_lock(&rq->lock);
115 0 : atomic_dec(rq->sched->score);
116 0 : list_del_init(&entity->list);
117 0 : if (rq->current_entity == entity)
118 0 : rq->current_entity = NULL;
119 0 : spin_unlock(&rq->lock);
120 : }
121 :
122 : /**
123 : * drm_sched_rq_select_entity - Select an entity which could provide a job to run
124 : *
125 : * @rq: scheduler run queue to check.
126 : *
127 : * Try to find a ready entity, returns NULL if none found.
128 : */
129 : static struct drm_sched_entity *
130 0 : drm_sched_rq_select_entity(struct drm_sched_rq *rq)
131 : {
132 : struct drm_sched_entity *entity;
133 :
134 0 : spin_lock(&rq->lock);
135 :
136 0 : entity = rq->current_entity;
137 0 : if (entity) {
138 0 : list_for_each_entry_continue(entity, &rq->entities, list) {
139 0 : if (drm_sched_entity_is_ready(entity)) {
140 0 : rq->current_entity = entity;
141 0 : reinit_completion(&entity->entity_idle);
142 0 : spin_unlock(&rq->lock);
143 0 : return entity;
144 : }
145 : }
146 : }
147 :
148 0 : list_for_each_entry(entity, &rq->entities, list) {
149 :
150 0 : if (drm_sched_entity_is_ready(entity)) {
151 0 : rq->current_entity = entity;
152 0 : reinit_completion(&entity->entity_idle);
153 0 : spin_unlock(&rq->lock);
154 0 : return entity;
155 : }
156 :
157 0 : if (entity == rq->current_entity)
158 : break;
159 : }
160 :
161 0 : spin_unlock(&rq->lock);
162 :
163 0 : return NULL;
164 : }
165 :
166 : /**
167 : * drm_sched_job_done - complete a job
168 : * @s_job: pointer to the job which is done
169 : *
170 : * Finish the job's fence and wake up the worker thread.
171 : */
172 0 : static void drm_sched_job_done(struct drm_sched_job *s_job)
173 : {
174 0 : struct drm_sched_fence *s_fence = s_job->s_fence;
175 0 : struct drm_gpu_scheduler *sched = s_fence->sched;
176 :
177 0 : atomic_dec(&sched->hw_rq_count);
178 0 : atomic_dec(sched->score);
179 :
180 0 : trace_drm_sched_process_job(s_fence);
181 :
182 0 : dma_fence_get(&s_fence->finished);
183 0 : drm_sched_fence_finished(s_fence);
184 0 : dma_fence_put(&s_fence->finished);
185 0 : wake_up_interruptible(&sched->wake_up_worker);
186 0 : }
187 :
188 : /**
189 : * drm_sched_job_done_cb - the callback for a done job
190 : * @f: fence
191 : * @cb: fence callbacks
192 : */
193 0 : static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
194 : {
195 0 : struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
196 :
197 0 : drm_sched_job_done(s_job);
198 0 : }
199 :
200 : /**
201 : * drm_sched_dependency_optimized
202 : *
203 : * @fence: the dependency fence
204 : * @entity: the entity which depends on the above fence
205 : *
206 : * Returns true if the dependency can be optimized and false otherwise
207 : */
208 0 : bool drm_sched_dependency_optimized(struct dma_fence* fence,
209 : struct drm_sched_entity *entity)
210 : {
211 0 : struct drm_gpu_scheduler *sched = entity->rq->sched;
212 : struct drm_sched_fence *s_fence;
213 :
214 0 : if (!fence || dma_fence_is_signaled(fence))
215 : return false;
216 0 : if (fence->context == entity->fence_context)
217 : return true;
218 0 : s_fence = to_drm_sched_fence(fence);
219 0 : if (s_fence && s_fence->sched == sched)
220 : return true;
221 :
222 0 : return false;
223 : }
224 : EXPORT_SYMBOL(drm_sched_dependency_optimized);
225 :
226 : /**
227 : * drm_sched_start_timeout - start timeout for reset worker
228 : *
229 : * @sched: scheduler instance to start the worker for
230 : *
231 : * Start the timeout for the given scheduler.
232 : */
233 0 : static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
234 : {
235 0 : if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
236 0 : !list_empty(&sched->pending_list))
237 0 : queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
238 0 : }
239 :
240 : /**
241 : * drm_sched_fault - immediately start timeout handler
242 : *
243 : * @sched: scheduler where the timeout handling should be started.
244 : *
245 : * Start timeout handling immediately when the driver detects a hardware fault.
246 : */
247 0 : void drm_sched_fault(struct drm_gpu_scheduler *sched)
248 : {
249 0 : mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
250 0 : }
251 : EXPORT_SYMBOL(drm_sched_fault);
252 :
253 : /**
254 : * drm_sched_suspend_timeout - Suspend scheduler job timeout
255 : *
256 : * @sched: scheduler instance for which to suspend the timeout
257 : *
258 : * Suspend the delayed work timeout for the scheduler. This is done by
259 : * modifying the delayed work timeout to an arbitrary large value,
260 : * MAX_SCHEDULE_TIMEOUT in this case.
261 : *
262 : * Returns the timeout remaining
263 : *
264 : */
265 0 : unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
266 : {
267 0 : unsigned long sched_timeout, now = jiffies;
268 :
269 0 : sched_timeout = sched->work_tdr.timer.expires;
270 :
271 : /*
272 : * Modify the timeout to an arbitrarily large value. This also prevents
273 : * the timeout to be restarted when new submissions arrive
274 : */
275 0 : if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
276 0 : && time_after(sched_timeout, now))
277 0 : return sched_timeout - now;
278 : else
279 0 : return sched->timeout;
280 : }
281 : EXPORT_SYMBOL(drm_sched_suspend_timeout);
282 :
283 : /**
284 : * drm_sched_resume_timeout - Resume scheduler job timeout
285 : *
286 : * @sched: scheduler instance for which to resume the timeout
287 : * @remaining: remaining timeout
288 : *
289 : * Resume the delayed work timeout for the scheduler.
290 : */
291 0 : void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
292 : unsigned long remaining)
293 : {
294 0 : spin_lock(&sched->job_list_lock);
295 :
296 0 : if (list_empty(&sched->pending_list))
297 0 : cancel_delayed_work(&sched->work_tdr);
298 : else
299 0 : mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
300 :
301 0 : spin_unlock(&sched->job_list_lock);
302 0 : }
303 : EXPORT_SYMBOL(drm_sched_resume_timeout);
304 :
305 : static void drm_sched_job_begin(struct drm_sched_job *s_job)
306 : {
307 0 : struct drm_gpu_scheduler *sched = s_job->sched;
308 :
309 0 : spin_lock(&sched->job_list_lock);
310 0 : list_add_tail(&s_job->list, &sched->pending_list);
311 0 : drm_sched_start_timeout(sched);
312 0 : spin_unlock(&sched->job_list_lock);
313 : }
314 :
315 0 : static void drm_sched_job_timedout(struct work_struct *work)
316 : {
317 : struct drm_gpu_scheduler *sched;
318 : struct drm_sched_job *job;
319 0 : enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
320 :
321 0 : sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
322 :
323 : /* Protects against concurrent deletion in drm_sched_get_cleanup_job */
324 0 : spin_lock(&sched->job_list_lock);
325 0 : job = list_first_entry_or_null(&sched->pending_list,
326 : struct drm_sched_job, list);
327 :
328 0 : if (job) {
329 : /*
330 : * Remove the bad job so it cannot be freed by concurrent
331 : * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
332 : * is parked at which point it's safe.
333 : */
334 0 : list_del_init(&job->list);
335 0 : spin_unlock(&sched->job_list_lock);
336 :
337 0 : status = job->sched->ops->timedout_job(job);
338 :
339 : /*
340 : * Guilty job did complete and hence needs to be manually removed
341 : * See drm_sched_stop doc.
342 : */
343 0 : if (sched->free_guilty) {
344 0 : job->sched->ops->free_job(job);
345 0 : sched->free_guilty = false;
346 : }
347 : } else {
348 0 : spin_unlock(&sched->job_list_lock);
349 : }
350 :
351 0 : if (status != DRM_GPU_SCHED_STAT_ENODEV) {
352 0 : spin_lock(&sched->job_list_lock);
353 0 : drm_sched_start_timeout(sched);
354 0 : spin_unlock(&sched->job_list_lock);
355 : }
356 0 : }
357 :
358 : /**
359 : * drm_sched_increase_karma - Update sched_entity guilty flag
360 : *
361 : * @bad: The job guilty of time out
362 : *
363 : * Increment on every hang caused by the 'bad' job. If this exceeds the hang
364 : * limit of the scheduler then the respective sched entity is marked guilty and
365 : * jobs from it will not be scheduled further
366 : */
367 0 : void drm_sched_increase_karma(struct drm_sched_job *bad)
368 : {
369 0 : drm_sched_increase_karma_ext(bad, 1);
370 0 : }
371 : EXPORT_SYMBOL(drm_sched_increase_karma);
372 :
373 0 : void drm_sched_reset_karma(struct drm_sched_job *bad)
374 : {
375 0 : drm_sched_increase_karma_ext(bad, 0);
376 0 : }
377 : EXPORT_SYMBOL(drm_sched_reset_karma);
378 :
379 : /**
380 : * drm_sched_stop - stop the scheduler
381 : *
382 : * @sched: scheduler instance
383 : * @bad: job which caused the time out
384 : *
385 : * Stop the scheduler and also removes and frees all completed jobs.
386 : * Note: bad job will not be freed as it might be used later and so it's
387 : * callers responsibility to release it manually if it's not part of the
388 : * pending list any more.
389 : *
390 : */
391 0 : void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
392 : {
393 : struct drm_sched_job *s_job, *tmp;
394 :
395 0 : kthread_park(sched->thread);
396 :
397 : /*
398 : * Reinsert back the bad job here - now it's safe as
399 : * drm_sched_get_cleanup_job cannot race against us and release the
400 : * bad job at this point - we parked (waited for) any in progress
401 : * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
402 : * now until the scheduler thread is unparked.
403 : */
404 0 : if (bad && bad->sched == sched)
405 : /*
406 : * Add at the head of the queue to reflect it was the earliest
407 : * job extracted.
408 : */
409 0 : list_add(&bad->list, &sched->pending_list);
410 :
411 : /*
412 : * Iterate the job list from later to earlier one and either deactive
413 : * their HW callbacks or remove them from pending list if they already
414 : * signaled.
415 : * This iteration is thread safe as sched thread is stopped.
416 : */
417 0 : list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
418 : list) {
419 0 : if (s_job->s_fence->parent &&
420 0 : dma_fence_remove_callback(s_job->s_fence->parent,
421 : &s_job->cb)) {
422 0 : dma_fence_put(s_job->s_fence->parent);
423 0 : s_job->s_fence->parent = NULL;
424 0 : atomic_dec(&sched->hw_rq_count);
425 : } else {
426 : /*
427 : * remove job from pending_list.
428 : * Locking here is for concurrent resume timeout
429 : */
430 0 : spin_lock(&sched->job_list_lock);
431 0 : list_del_init(&s_job->list);
432 0 : spin_unlock(&sched->job_list_lock);
433 :
434 : /*
435 : * Wait for job's HW fence callback to finish using s_job
436 : * before releasing it.
437 : *
438 : * Job is still alive so fence refcount at least 1
439 : */
440 0 : dma_fence_wait(&s_job->s_fence->finished, false);
441 :
442 : /*
443 : * We must keep bad job alive for later use during
444 : * recovery by some of the drivers but leave a hint
445 : * that the guilty job must be released.
446 : */
447 0 : if (bad != s_job)
448 0 : sched->ops->free_job(s_job);
449 : else
450 0 : sched->free_guilty = true;
451 : }
452 : }
453 :
454 : /*
455 : * Stop pending timer in flight as we rearm it in drm_sched_start. This
456 : * avoids the pending timeout work in progress to fire right away after
457 : * this TDR finished and before the newly restarted jobs had a
458 : * chance to complete.
459 : */
460 0 : cancel_delayed_work(&sched->work_tdr);
461 0 : }
462 :
463 : EXPORT_SYMBOL(drm_sched_stop);
464 :
465 : /**
466 : * drm_sched_start - recover jobs after a reset
467 : *
468 : * @sched: scheduler instance
469 : * @full_recovery: proceed with complete sched restart
470 : *
471 : */
472 0 : void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
473 : {
474 : struct drm_sched_job *s_job, *tmp;
475 : int r;
476 :
477 : /*
478 : * Locking the list is not required here as the sched thread is parked
479 : * so no new jobs are being inserted or removed. Also concurrent
480 : * GPU recovers can't run in parallel.
481 : */
482 0 : list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
483 0 : struct dma_fence *fence = s_job->s_fence->parent;
484 :
485 0 : atomic_inc(&sched->hw_rq_count);
486 :
487 0 : if (!full_recovery)
488 0 : continue;
489 :
490 0 : if (fence) {
491 0 : r = dma_fence_add_callback(fence, &s_job->cb,
492 : drm_sched_job_done_cb);
493 0 : if (r == -ENOENT)
494 0 : drm_sched_job_done(s_job);
495 0 : else if (r)
496 0 : DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
497 : r);
498 : } else
499 0 : drm_sched_job_done(s_job);
500 : }
501 :
502 0 : if (full_recovery) {
503 0 : spin_lock(&sched->job_list_lock);
504 0 : drm_sched_start_timeout(sched);
505 0 : spin_unlock(&sched->job_list_lock);
506 : }
507 :
508 0 : kthread_unpark(sched->thread);
509 0 : }
510 : EXPORT_SYMBOL(drm_sched_start);
511 :
512 : /**
513 : * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
514 : *
515 : * @sched: scheduler instance
516 : *
517 : */
518 0 : void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
519 : {
520 0 : drm_sched_resubmit_jobs_ext(sched, INT_MAX);
521 0 : }
522 : EXPORT_SYMBOL(drm_sched_resubmit_jobs);
523 :
524 : /**
525 : * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from mirror ring list
526 : *
527 : * @sched: scheduler instance
528 : * @max: job numbers to relaunch
529 : *
530 : */
531 0 : void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
532 : {
533 : struct drm_sched_job *s_job, *tmp;
534 : uint64_t guilty_context;
535 0 : bool found_guilty = false;
536 : struct dma_fence *fence;
537 0 : int i = 0;
538 :
539 0 : list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
540 0 : struct drm_sched_fence *s_fence = s_job->s_fence;
541 :
542 0 : if (i >= max)
543 : break;
544 :
545 0 : if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
546 0 : found_guilty = true;
547 0 : guilty_context = s_job->s_fence->scheduled.context;
548 : }
549 :
550 0 : if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
551 0 : dma_fence_set_error(&s_fence->finished, -ECANCELED);
552 :
553 0 : fence = sched->ops->run_job(s_job);
554 0 : i++;
555 :
556 0 : if (IS_ERR_OR_NULL(fence)) {
557 0 : if (IS_ERR(fence))
558 0 : dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
559 :
560 0 : s_job->s_fence->parent = NULL;
561 : } else {
562 :
563 0 : s_job->s_fence->parent = dma_fence_get(fence);
564 :
565 : /* Drop for orignal kref_init */
566 : dma_fence_put(fence);
567 : }
568 : }
569 0 : }
570 : EXPORT_SYMBOL(drm_sched_resubmit_jobs_ext);
571 :
572 : /**
573 : * drm_sched_job_init - init a scheduler job
574 : * @job: scheduler job to init
575 : * @entity: scheduler entity to use
576 : * @owner: job owner for debugging
577 : *
578 : * Refer to drm_sched_entity_push_job() documentation
579 : * for locking considerations.
580 : *
581 : * Drivers must make sure drm_sched_job_cleanup() if this function returns
582 : * successfully, even when @job is aborted before drm_sched_job_arm() is called.
583 : *
584 : * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
585 : * has died, which can mean that there's no valid runqueue for a @entity.
586 : * This function returns -ENOENT in this case (which probably should be -EIO as
587 : * a more meanigful return value).
588 : *
589 : * Returns 0 for success, negative error code otherwise.
590 : */
591 0 : int drm_sched_job_init(struct drm_sched_job *job,
592 : struct drm_sched_entity *entity,
593 : void *owner)
594 : {
595 0 : drm_sched_entity_select_rq(entity);
596 0 : if (!entity->rq)
597 : return -ENOENT;
598 :
599 0 : job->entity = entity;
600 0 : job->s_fence = drm_sched_fence_alloc(entity, owner);
601 0 : if (!job->s_fence)
602 : return -ENOMEM;
603 :
604 0 : INIT_LIST_HEAD(&job->list);
605 :
606 0 : xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
607 :
608 0 : return 0;
609 : }
610 : EXPORT_SYMBOL(drm_sched_job_init);
611 :
612 : /**
613 : * drm_sched_job_arm - arm a scheduler job for execution
614 : * @job: scheduler job to arm
615 : *
616 : * This arms a scheduler job for execution. Specifically it initializes the
617 : * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
618 : * or other places that need to track the completion of this job.
619 : *
620 : * Refer to drm_sched_entity_push_job() documentation for locking
621 : * considerations.
622 : *
623 : * This can only be called if drm_sched_job_init() succeeded.
624 : */
625 0 : void drm_sched_job_arm(struct drm_sched_job *job)
626 : {
627 : struct drm_gpu_scheduler *sched;
628 0 : struct drm_sched_entity *entity = job->entity;
629 :
630 0 : BUG_ON(!entity);
631 :
632 0 : sched = entity->rq->sched;
633 :
634 0 : job->sched = sched;
635 0 : job->s_priority = entity->rq - sched->sched_rq;
636 0 : job->id = atomic64_inc_return(&sched->job_id_count);
637 :
638 0 : drm_sched_fence_init(job->s_fence, job->entity);
639 0 : }
640 : EXPORT_SYMBOL(drm_sched_job_arm);
641 :
642 : /**
643 : * drm_sched_job_add_dependency - adds the fence as a job dependency
644 : * @job: scheduler job to add the dependencies to
645 : * @fence: the dma_fence to add to the list of dependencies.
646 : *
647 : * Note that @fence is consumed in both the success and error cases.
648 : *
649 : * Returns:
650 : * 0 on success, or an error on failing to expand the array.
651 : */
652 0 : int drm_sched_job_add_dependency(struct drm_sched_job *job,
653 : struct dma_fence *fence)
654 : {
655 : struct dma_fence *entry;
656 : unsigned long index;
657 0 : u32 id = 0;
658 : int ret;
659 :
660 0 : if (!fence)
661 : return 0;
662 :
663 : /* Deduplicate if we already depend on a fence from the same context.
664 : * This lets the size of the array of deps scale with the number of
665 : * engines involved, rather than the number of BOs.
666 : */
667 0 : xa_for_each(&job->dependencies, index, entry) {
668 0 : if (entry->context != fence->context)
669 0 : continue;
670 :
671 0 : if (dma_fence_is_later(fence, entry)) {
672 0 : dma_fence_put(entry);
673 0 : xa_store(&job->dependencies, index, fence, GFP_KERNEL);
674 : } else {
675 : dma_fence_put(fence);
676 : }
677 : return 0;
678 : }
679 :
680 0 : ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
681 0 : if (ret != 0)
682 : dma_fence_put(fence);
683 :
684 : return ret;
685 : }
686 : EXPORT_SYMBOL(drm_sched_job_add_dependency);
687 :
688 : /**
689 : * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
690 : * dependencies
691 : * @job: scheduler job to add the dependencies to
692 : * @obj: the gem object to add new dependencies from.
693 : * @write: whether the job might write the object (so we need to depend on
694 : * shared fences in the reservation object).
695 : *
696 : * This should be called after drm_gem_lock_reservations() on your array of
697 : * GEM objects used in the job but before updating the reservations with your
698 : * own fences.
699 : *
700 : * Returns:
701 : * 0 on success, or an error on failing to expand the array.
702 : */
703 0 : int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
704 : struct drm_gem_object *obj,
705 : bool write)
706 : {
707 : struct dma_resv_iter cursor;
708 : struct dma_fence *fence;
709 : int ret;
710 :
711 0 : dma_resv_assert_held(obj->resv);
712 :
713 0 : dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
714 : fence) {
715 : /* Make sure to grab an additional ref on the added fence */
716 0 : dma_fence_get(fence);
717 0 : ret = drm_sched_job_add_dependency(job, fence);
718 0 : if (ret) {
719 0 : dma_fence_put(fence);
720 0 : return ret;
721 : }
722 : }
723 : return 0;
724 : }
725 : EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
726 :
727 :
728 : /**
729 : * drm_sched_job_cleanup - clean up scheduler job resources
730 : * @job: scheduler job to clean up
731 : *
732 : * Cleans up the resources allocated with drm_sched_job_init().
733 : *
734 : * Drivers should call this from their error unwind code if @job is aborted
735 : * before drm_sched_job_arm() is called.
736 : *
737 : * After that point of no return @job is committed to be executed by the
738 : * scheduler, and this function should be called from the
739 : * &drm_sched_backend_ops.free_job callback.
740 : */
741 0 : void drm_sched_job_cleanup(struct drm_sched_job *job)
742 : {
743 : struct dma_fence *fence;
744 : unsigned long index;
745 :
746 0 : if (kref_read(&job->s_fence->finished.refcount)) {
747 : /* drm_sched_job_arm() has been called */
748 0 : dma_fence_put(&job->s_fence->finished);
749 : } else {
750 : /* aborted job before committing to run it */
751 0 : drm_sched_fence_free(job->s_fence);
752 : }
753 :
754 0 : job->s_fence = NULL;
755 :
756 0 : xa_for_each(&job->dependencies, index, fence) {
757 0 : dma_fence_put(fence);
758 : }
759 0 : xa_destroy(&job->dependencies);
760 :
761 0 : }
762 : EXPORT_SYMBOL(drm_sched_job_cleanup);
763 :
764 : /**
765 : * drm_sched_ready - is the scheduler ready
766 : *
767 : * @sched: scheduler instance
768 : *
769 : * Return true if we can push more jobs to the hw, otherwise false.
770 : */
771 : static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
772 : {
773 0 : return atomic_read(&sched->hw_rq_count) <
774 0 : sched->hw_submission_limit;
775 : }
776 :
777 : /**
778 : * drm_sched_wakeup - Wake up the scheduler when it is ready
779 : *
780 : * @sched: scheduler instance
781 : *
782 : */
783 0 : void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
784 : {
785 0 : if (drm_sched_ready(sched))
786 0 : wake_up_interruptible(&sched->wake_up_worker);
787 0 : }
788 :
789 : /**
790 : * drm_sched_select_entity - Select next entity to process
791 : *
792 : * @sched: scheduler instance
793 : *
794 : * Returns the entity to process or NULL if none are found.
795 : */
796 : static struct drm_sched_entity *
797 0 : drm_sched_select_entity(struct drm_gpu_scheduler *sched)
798 : {
799 : struct drm_sched_entity *entity;
800 : int i;
801 :
802 0 : if (!drm_sched_ready(sched))
803 : return NULL;
804 :
805 : /* Kernel run queue has higher priority than normal run queue*/
806 0 : for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
807 0 : entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
808 0 : if (entity)
809 : break;
810 : }
811 :
812 : return entity;
813 : }
814 :
815 : /**
816 : * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
817 : *
818 : * @sched: scheduler instance
819 : *
820 : * Returns the next finished job from the pending list (if there is one)
821 : * ready for it to be destroyed.
822 : */
823 : static struct drm_sched_job *
824 0 : drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
825 : {
826 : struct drm_sched_job *job, *next;
827 :
828 0 : spin_lock(&sched->job_list_lock);
829 :
830 0 : job = list_first_entry_or_null(&sched->pending_list,
831 : struct drm_sched_job, list);
832 :
833 0 : if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
834 : /* remove job from pending_list */
835 0 : list_del_init(&job->list);
836 :
837 : /* cancel this job's TO timer */
838 0 : cancel_delayed_work(&sched->work_tdr);
839 : /* make the scheduled timestamp more accurate */
840 0 : next = list_first_entry_or_null(&sched->pending_list,
841 : typeof(*next), list);
842 :
843 0 : if (next) {
844 0 : next->s_fence->scheduled.timestamp =
845 0 : job->s_fence->finished.timestamp;
846 : /* start TO timer for next job */
847 0 : drm_sched_start_timeout(sched);
848 : }
849 : } else {
850 : job = NULL;
851 : }
852 :
853 0 : spin_unlock(&sched->job_list_lock);
854 :
855 0 : return job;
856 : }
857 :
858 : /**
859 : * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
860 : * @sched_list: list of drm_gpu_schedulers
861 : * @num_sched_list: number of drm_gpu_schedulers in the sched_list
862 : *
863 : * Returns pointer of the sched with the least load or NULL if none of the
864 : * drm_gpu_schedulers are ready
865 : */
866 : struct drm_gpu_scheduler *
867 0 : drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
868 : unsigned int num_sched_list)
869 : {
870 0 : struct drm_gpu_scheduler *sched, *picked_sched = NULL;
871 : int i;
872 0 : unsigned int min_score = UINT_MAX, num_score;
873 :
874 0 : for (i = 0; i < num_sched_list; ++i) {
875 0 : sched = sched_list[i];
876 :
877 0 : if (!sched->ready) {
878 0 : DRM_WARN("scheduler %s is not ready, skipping",
879 : sched->name);
880 0 : continue;
881 : }
882 :
883 0 : num_score = atomic_read(sched->score);
884 0 : if (num_score < min_score) {
885 0 : min_score = num_score;
886 0 : picked_sched = sched;
887 : }
888 : }
889 :
890 0 : return picked_sched;
891 : }
892 : EXPORT_SYMBOL(drm_sched_pick_best);
893 :
894 : /**
895 : * drm_sched_blocked - check if the scheduler is blocked
896 : *
897 : * @sched: scheduler instance
898 : *
899 : * Returns true if blocked, otherwise false.
900 : */
901 : static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
902 : {
903 0 : if (kthread_should_park()) {
904 0 : kthread_parkme();
905 : return true;
906 : }
907 :
908 : return false;
909 : }
910 :
911 : /**
912 : * drm_sched_main - main scheduler thread
913 : *
914 : * @param: scheduler instance
915 : *
916 : * Returns 0.
917 : */
918 0 : static int drm_sched_main(void *param)
919 : {
920 0 : struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
921 : int r;
922 :
923 0 : sched_set_fifo_low(current);
924 :
925 0 : while (!kthread_should_stop()) {
926 0 : struct drm_sched_entity *entity = NULL;
927 : struct drm_sched_fence *s_fence;
928 : struct drm_sched_job *sched_job;
929 : struct dma_fence *fence;
930 0 : struct drm_sched_job *cleanup_job = NULL;
931 :
932 0 : wait_event_interruptible(sched->wake_up_worker,
933 : (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
934 : (!drm_sched_blocked(sched) &&
935 : (entity = drm_sched_select_entity(sched))) ||
936 : kthread_should_stop());
937 :
938 0 : if (cleanup_job)
939 0 : sched->ops->free_job(cleanup_job);
940 :
941 0 : if (!entity)
942 0 : continue;
943 :
944 0 : sched_job = drm_sched_entity_pop_job(entity);
945 :
946 0 : if (!sched_job) {
947 0 : complete(&entity->entity_idle);
948 0 : continue;
949 : }
950 :
951 0 : s_fence = sched_job->s_fence;
952 :
953 0 : atomic_inc(&sched->hw_rq_count);
954 0 : drm_sched_job_begin(sched_job);
955 :
956 0 : trace_drm_run_job(sched_job, entity);
957 0 : fence = sched->ops->run_job(sched_job);
958 0 : complete(&entity->entity_idle);
959 0 : drm_sched_fence_scheduled(s_fence);
960 :
961 0 : if (!IS_ERR_OR_NULL(fence)) {
962 0 : s_fence->parent = dma_fence_get(fence);
963 : /* Drop for original kref_init of the fence */
964 0 : dma_fence_put(fence);
965 :
966 0 : r = dma_fence_add_callback(fence, &sched_job->cb,
967 : drm_sched_job_done_cb);
968 0 : if (r == -ENOENT)
969 0 : drm_sched_job_done(sched_job);
970 0 : else if (r)
971 0 : DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
972 : r);
973 : } else {
974 0 : if (IS_ERR(fence))
975 0 : dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
976 :
977 0 : drm_sched_job_done(sched_job);
978 : }
979 :
980 0 : wake_up(&sched->job_scheduled);
981 : }
982 0 : return 0;
983 : }
984 :
985 : /**
986 : * drm_sched_init - Init a gpu scheduler instance
987 : *
988 : * @sched: scheduler instance
989 : * @ops: backend operations for this scheduler
990 : * @hw_submission: number of hw submissions that can be in flight
991 : * @hang_limit: number of times to allow a job to hang before dropping it
992 : * @timeout: timeout value in jiffies for the scheduler
993 : * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
994 : * used
995 : * @score: optional score atomic shared with other schedulers
996 : * @name: name used for debugging
997 : *
998 : * Return 0 on success, otherwise error code.
999 : */
1000 0 : int drm_sched_init(struct drm_gpu_scheduler *sched,
1001 : const struct drm_sched_backend_ops *ops,
1002 : unsigned hw_submission, unsigned hang_limit,
1003 : long timeout, struct workqueue_struct *timeout_wq,
1004 : atomic_t *score, const char *name, struct device *dev)
1005 : {
1006 : int i, ret;
1007 0 : sched->ops = ops;
1008 0 : sched->hw_submission_limit = hw_submission;
1009 0 : sched->name = name;
1010 0 : sched->timeout = timeout;
1011 0 : sched->timeout_wq = timeout_wq ? : system_wq;
1012 0 : sched->hang_limit = hang_limit;
1013 0 : sched->score = score ? score : &sched->_score;
1014 0 : sched->dev = dev;
1015 0 : for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
1016 0 : drm_sched_rq_init(sched, &sched->sched_rq[i]);
1017 :
1018 0 : init_waitqueue_head(&sched->wake_up_worker);
1019 0 : init_waitqueue_head(&sched->job_scheduled);
1020 0 : INIT_LIST_HEAD(&sched->pending_list);
1021 0 : spin_lock_init(&sched->job_list_lock);
1022 0 : atomic_set(&sched->hw_rq_count, 0);
1023 0 : INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1024 0 : atomic_set(&sched->_score, 0);
1025 0 : atomic64_set(&sched->job_id_count, 0);
1026 :
1027 : /* Each scheduler will run on a seperate kernel thread */
1028 0 : sched->thread = kthread_run(drm_sched_main, sched, sched->name);
1029 0 : if (IS_ERR(sched->thread)) {
1030 0 : ret = PTR_ERR(sched->thread);
1031 0 : sched->thread = NULL;
1032 0 : DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
1033 0 : return ret;
1034 : }
1035 :
1036 0 : sched->ready = true;
1037 0 : return 0;
1038 : }
1039 : EXPORT_SYMBOL(drm_sched_init);
1040 :
1041 : /**
1042 : * drm_sched_fini - Destroy a gpu scheduler
1043 : *
1044 : * @sched: scheduler instance
1045 : *
1046 : * Tears down and cleans up the scheduler.
1047 : */
1048 0 : void drm_sched_fini(struct drm_gpu_scheduler *sched)
1049 : {
1050 : struct drm_sched_entity *s_entity;
1051 : int i;
1052 :
1053 0 : if (sched->thread)
1054 0 : kthread_stop(sched->thread);
1055 :
1056 0 : for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
1057 0 : struct drm_sched_rq *rq = &sched->sched_rq[i];
1058 :
1059 0 : if (!rq)
1060 0 : continue;
1061 :
1062 0 : spin_lock(&rq->lock);
1063 0 : list_for_each_entry(s_entity, &rq->entities, list)
1064 : /*
1065 : * Prevents reinsertion and marks job_queue as idle,
1066 : * it will removed from rq in drm_sched_entity_fini
1067 : * eventually
1068 : */
1069 0 : s_entity->stopped = true;
1070 0 : spin_unlock(&rq->lock);
1071 :
1072 : }
1073 :
1074 : /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
1075 0 : wake_up_all(&sched->job_scheduled);
1076 :
1077 : /* Confirm no work left behind accessing device structures */
1078 0 : cancel_delayed_work_sync(&sched->work_tdr);
1079 :
1080 0 : sched->ready = false;
1081 0 : }
1082 : EXPORT_SYMBOL(drm_sched_fini);
1083 :
1084 : /**
1085 : * drm_sched_increase_karma_ext - Update sched_entity guilty flag
1086 : *
1087 : * @bad: The job guilty of time out
1088 : * @type: type for increase/reset karma
1089 : *
1090 : */
1091 0 : void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
1092 : {
1093 : int i;
1094 : struct drm_sched_entity *tmp;
1095 : struct drm_sched_entity *entity;
1096 0 : struct drm_gpu_scheduler *sched = bad->sched;
1097 :
1098 : /* don't change @bad's karma if it's from KERNEL RQ,
1099 : * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1100 : * corrupt but keep in mind that kernel jobs always considered good.
1101 : */
1102 0 : if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1103 0 : if (type == 0)
1104 0 : atomic_set(&bad->karma, 0);
1105 0 : else if (type == 1)
1106 0 : atomic_inc(&bad->karma);
1107 :
1108 0 : for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
1109 0 : i++) {
1110 0 : struct drm_sched_rq *rq = &sched->sched_rq[i];
1111 :
1112 0 : spin_lock(&rq->lock);
1113 0 : list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1114 0 : if (bad->s_fence->scheduled.context ==
1115 0 : entity->fence_context) {
1116 0 : if (entity->guilty)
1117 0 : atomic_set(entity->guilty, type);
1118 : break;
1119 : }
1120 : }
1121 0 : spin_unlock(&rq->lock);
1122 0 : if (&entity->list != &rq->entities)
1123 : break;
1124 : }
1125 : }
1126 0 : }
1127 : EXPORT_SYMBOL(drm_sched_increase_karma_ext);
|