Line data Source code
1 : /*
2 : * Copyright 2015 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/kthread.h>
25 : #include <linux/slab.h>
26 : #include <linux/completion.h>
27 :
28 : #include <drm/drm_print.h>
29 : #include <drm/gpu_scheduler.h>
30 :
31 : #include "gpu_scheduler_trace.h"
32 :
33 : #define to_drm_sched_job(sched_job) \
34 : container_of((sched_job), struct drm_sched_job, queue_node)
35 :
36 : /**
37 : * drm_sched_entity_init - Init a context entity used by scheduler when
38 : * submit to HW ring.
39 : *
40 : * @entity: scheduler entity to init
41 : * @priority: priority of the entity
42 : * @sched_list: the list of drm scheds on which jobs from this
43 : * entity can be submitted
44 : * @num_sched_list: number of drm sched in sched_list
45 : * @guilty: atomic_t set to 1 when a job on this queue
46 : * is found to be guilty causing a timeout
47 : *
48 : * Note that the &sched_list must have at least one element to schedule the entity.
49 : *
50 : * For changing @priority later on at runtime see
51 : * drm_sched_entity_set_priority(). For changing the set of schedulers
52 : * @sched_list at runtime see drm_sched_entity_modify_sched().
53 : *
54 : * An entity is cleaned up by callind drm_sched_entity_fini(). See also
55 : * drm_sched_entity_destroy().
56 : *
57 : * Returns 0 on success or a negative error code on failure.
58 : */
59 0 : int drm_sched_entity_init(struct drm_sched_entity *entity,
60 : enum drm_sched_priority priority,
61 : struct drm_gpu_scheduler **sched_list,
62 : unsigned int num_sched_list,
63 : atomic_t *guilty)
64 : {
65 0 : if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
66 : return -EINVAL;
67 :
68 0 : memset(entity, 0, sizeof(struct drm_sched_entity));
69 0 : INIT_LIST_HEAD(&entity->list);
70 0 : entity->rq = NULL;
71 0 : entity->guilty = guilty;
72 0 : entity->num_sched_list = num_sched_list;
73 0 : entity->priority = priority;
74 0 : entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
75 0 : entity->last_scheduled = NULL;
76 :
77 0 : if(num_sched_list)
78 0 : entity->rq = &sched_list[0]->sched_rq[entity->priority];
79 :
80 0 : init_completion(&entity->entity_idle);
81 :
82 : /* We start in an idle state. */
83 0 : complete(&entity->entity_idle);
84 :
85 0 : spin_lock_init(&entity->rq_lock);
86 0 : spsc_queue_init(&entity->job_queue);
87 :
88 0 : atomic_set(&entity->fence_seq, 0);
89 0 : entity->fence_context = dma_fence_context_alloc(2);
90 :
91 0 : return 0;
92 : }
93 : EXPORT_SYMBOL(drm_sched_entity_init);
94 :
95 : /**
96 : * drm_sched_entity_modify_sched - Modify sched of an entity
97 : * @entity: scheduler entity to init
98 : * @sched_list: the list of new drm scheds which will replace
99 : * existing entity->sched_list
100 : * @num_sched_list: number of drm sched in sched_list
101 : *
102 : * Note that this must be called under the same common lock for @entity as
103 : * drm_sched_job_arm() and drm_sched_entity_push_job(), or the driver needs to
104 : * guarantee through some other means that this is never called while new jobs
105 : * can be pushed to @entity.
106 : */
107 0 : void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
108 : struct drm_gpu_scheduler **sched_list,
109 : unsigned int num_sched_list)
110 : {
111 0 : WARN_ON(!num_sched_list || !sched_list);
112 :
113 0 : entity->sched_list = sched_list;
114 0 : entity->num_sched_list = num_sched_list;
115 0 : }
116 : EXPORT_SYMBOL(drm_sched_entity_modify_sched);
117 :
118 : static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
119 : {
120 0 : rmb(); /* for list_empty to work without lock */
121 :
122 0 : if (list_empty(&entity->list) ||
123 0 : spsc_queue_count(&entity->job_queue) == 0 ||
124 0 : entity->stopped)
125 : return true;
126 :
127 : return false;
128 : }
129 :
130 : /* Return true if entity could provide a job. */
131 0 : bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
132 : {
133 0 : if (spsc_queue_peek(&entity->job_queue) == NULL)
134 : return false;
135 :
136 0 : if (READ_ONCE(entity->dependency))
137 : return false;
138 :
139 0 : return true;
140 : }
141 :
142 : /**
143 : * drm_sched_entity_flush - Flush a context entity
144 : *
145 : * @entity: scheduler entity
146 : * @timeout: time to wait in for Q to become empty in jiffies.
147 : *
148 : * Splitting drm_sched_entity_fini() into two functions, The first one does the
149 : * waiting, removes the entity from the runqueue and returns an error when the
150 : * process was killed.
151 : *
152 : * Returns the remaining time in jiffies left from the input timeout
153 : */
154 0 : long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
155 : {
156 : struct drm_gpu_scheduler *sched;
157 : struct task_struct *last_user;
158 0 : long ret = timeout;
159 :
160 0 : if (!entity->rq)
161 : return 0;
162 :
163 0 : sched = entity->rq->sched;
164 : /**
165 : * The client will not queue more IBs during this fini, consume existing
166 : * queued IBs or discard them on SIGKILL
167 : */
168 0 : if (current->flags & PF_EXITING) {
169 0 : if (timeout)
170 0 : ret = wait_event_timeout(
171 : sched->job_scheduled,
172 : drm_sched_entity_is_idle(entity),
173 : timeout);
174 : } else {
175 0 : wait_event_killable(sched->job_scheduled,
176 : drm_sched_entity_is_idle(entity));
177 : }
178 :
179 : /* For killed process disable any more IBs enqueue right now */
180 0 : last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
181 0 : if ((!last_user || last_user == current->group_leader) &&
182 0 : (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) {
183 0 : spin_lock(&entity->rq_lock);
184 0 : entity->stopped = true;
185 0 : drm_sched_rq_remove_entity(entity->rq, entity);
186 0 : spin_unlock(&entity->rq_lock);
187 : }
188 :
189 : return ret;
190 : }
191 : EXPORT_SYMBOL(drm_sched_entity_flush);
192 :
193 0 : static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
194 : {
195 0 : struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
196 :
197 0 : drm_sched_fence_finished(job->s_fence);
198 0 : WARN_ON(job->s_fence->parent);
199 0 : job->sched->ops->free_job(job);
200 0 : }
201 :
202 :
203 : /* Signal the scheduler finished fence when the entity in question is killed. */
204 0 : static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
205 : struct dma_fence_cb *cb)
206 : {
207 0 : struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
208 : finish_cb);
209 :
210 0 : init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
211 0 : irq_work_queue(&job->work);
212 0 : }
213 :
214 : static struct dma_fence *
215 0 : drm_sched_job_dependency(struct drm_sched_job *job,
216 : struct drm_sched_entity *entity)
217 : {
218 0 : if (!xa_empty(&job->dependencies))
219 0 : return xa_erase(&job->dependencies, job->last_dependency++);
220 :
221 0 : if (job->sched->ops->dependency)
222 0 : return job->sched->ops->dependency(job, entity);
223 :
224 : return NULL;
225 : }
226 :
227 0 : static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
228 : {
229 : struct drm_sched_job *job;
230 : struct dma_fence *f;
231 : int r;
232 :
233 0 : while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
234 0 : struct drm_sched_fence *s_fence = job->s_fence;
235 :
236 : /* Wait for all dependencies to avoid data corruptions */
237 0 : while ((f = drm_sched_job_dependency(job, entity)))
238 : dma_fence_wait(f, false);
239 :
240 0 : drm_sched_fence_scheduled(s_fence);
241 0 : dma_fence_set_error(&s_fence->finished, -ESRCH);
242 :
243 : /*
244 : * When pipe is hanged by older entity, new entity might
245 : * not even have chance to submit it's first job to HW
246 : * and so entity->last_scheduled will remain NULL
247 : */
248 0 : if (!entity->last_scheduled) {
249 0 : drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
250 0 : continue;
251 : }
252 :
253 0 : r = dma_fence_add_callback(entity->last_scheduled,
254 : &job->finish_cb,
255 : drm_sched_entity_kill_jobs_cb);
256 0 : if (r == -ENOENT)
257 0 : drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
258 0 : else if (r)
259 0 : DRM_ERROR("fence add callback failed (%d)\n", r);
260 : }
261 0 : }
262 :
263 : /**
264 : * drm_sched_entity_fini - Destroy a context entity
265 : *
266 : * @entity: scheduler entity
267 : *
268 : * Cleanups up @entity which has been initialized by drm_sched_entity_init().
269 : *
270 : * If there are potentially job still in flight or getting newly queued
271 : * drm_sched_entity_flush() must be called first. This function then goes over
272 : * the entity and signals all jobs with an error code if the process was killed.
273 : */
274 0 : void drm_sched_entity_fini(struct drm_sched_entity *entity)
275 : {
276 0 : struct drm_gpu_scheduler *sched = NULL;
277 :
278 0 : if (entity->rq) {
279 0 : sched = entity->rq->sched;
280 0 : drm_sched_rq_remove_entity(entity->rq, entity);
281 : }
282 :
283 : /* Consumption of existing IBs wasn't completed. Forcefully
284 : * remove them here.
285 : */
286 0 : if (spsc_queue_count(&entity->job_queue)) {
287 0 : if (sched) {
288 : /*
289 : * Wait for thread to idle to make sure it isn't processing
290 : * this entity.
291 : */
292 0 : wait_for_completion(&entity->entity_idle);
293 :
294 : }
295 0 : if (entity->dependency) {
296 0 : dma_fence_remove_callback(entity->dependency,
297 : &entity->cb);
298 0 : dma_fence_put(entity->dependency);
299 0 : entity->dependency = NULL;
300 : }
301 :
302 0 : drm_sched_entity_kill_jobs(entity);
303 : }
304 :
305 0 : dma_fence_put(entity->last_scheduled);
306 0 : entity->last_scheduled = NULL;
307 0 : }
308 : EXPORT_SYMBOL(drm_sched_entity_fini);
309 :
310 : /**
311 : * drm_sched_entity_destroy - Destroy a context entity
312 : * @entity: scheduler entity
313 : *
314 : * Calls drm_sched_entity_flush() and drm_sched_entity_fini() as a
315 : * convenience wrapper.
316 : */
317 0 : void drm_sched_entity_destroy(struct drm_sched_entity *entity)
318 : {
319 0 : drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
320 0 : drm_sched_entity_fini(entity);
321 0 : }
322 : EXPORT_SYMBOL(drm_sched_entity_destroy);
323 :
324 : /* drm_sched_entity_clear_dep - callback to clear the entities dependency */
325 0 : static void drm_sched_entity_clear_dep(struct dma_fence *f,
326 : struct dma_fence_cb *cb)
327 : {
328 0 : struct drm_sched_entity *entity =
329 0 : container_of(cb, struct drm_sched_entity, cb);
330 :
331 0 : entity->dependency = NULL;
332 0 : dma_fence_put(f);
333 0 : }
334 :
335 : /*
336 : * drm_sched_entity_clear_dep - callback to clear the entities dependency and
337 : * wake up scheduler
338 : */
339 0 : static void drm_sched_entity_wakeup(struct dma_fence *f,
340 : struct dma_fence_cb *cb)
341 : {
342 0 : struct drm_sched_entity *entity =
343 0 : container_of(cb, struct drm_sched_entity, cb);
344 :
345 0 : drm_sched_entity_clear_dep(f, cb);
346 0 : drm_sched_wakeup(entity->rq->sched);
347 0 : }
348 :
349 : /**
350 : * drm_sched_entity_set_priority - Sets priority of the entity
351 : *
352 : * @entity: scheduler entity
353 : * @priority: scheduler priority
354 : *
355 : * Update the priority of runqueus used for the entity.
356 : */
357 0 : void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
358 : enum drm_sched_priority priority)
359 : {
360 0 : spin_lock(&entity->rq_lock);
361 0 : entity->priority = priority;
362 0 : spin_unlock(&entity->rq_lock);
363 0 : }
364 : EXPORT_SYMBOL(drm_sched_entity_set_priority);
365 :
366 : /*
367 : * Add a callback to the current dependency of the entity to wake up the
368 : * scheduler when the entity becomes available.
369 : */
370 0 : static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
371 : {
372 0 : struct drm_gpu_scheduler *sched = entity->rq->sched;
373 0 : struct dma_fence *fence = entity->dependency;
374 : struct drm_sched_fence *s_fence;
375 :
376 0 : if (fence->context == entity->fence_context ||
377 0 : fence->context == entity->fence_context + 1) {
378 : /*
379 : * Fence is a scheduled/finished fence from a job
380 : * which belongs to the same entity, we can ignore
381 : * fences from ourself
382 : */
383 0 : dma_fence_put(entity->dependency);
384 : return false;
385 : }
386 :
387 0 : s_fence = to_drm_sched_fence(fence);
388 0 : if (s_fence && s_fence->sched == sched) {
389 :
390 : /*
391 : * Fence is from the same scheduler, only need to wait for
392 : * it to be scheduled
393 : */
394 0 : fence = dma_fence_get(&s_fence->scheduled);
395 0 : dma_fence_put(entity->dependency);
396 0 : entity->dependency = fence;
397 0 : if (!dma_fence_add_callback(fence, &entity->cb,
398 : drm_sched_entity_clear_dep))
399 : return true;
400 :
401 : /* Ignore it when it is already scheduled */
402 : dma_fence_put(fence);
403 : return false;
404 : }
405 :
406 0 : if (!dma_fence_add_callback(entity->dependency, &entity->cb,
407 : drm_sched_entity_wakeup))
408 : return true;
409 :
410 0 : dma_fence_put(entity->dependency);
411 : return false;
412 : }
413 :
414 0 : struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
415 : {
416 : struct drm_sched_job *sched_job;
417 :
418 0 : sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
419 0 : if (!sched_job)
420 : return NULL;
421 :
422 0 : while ((entity->dependency =
423 0 : drm_sched_job_dependency(sched_job, entity))) {
424 0 : trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
425 :
426 0 : if (drm_sched_entity_add_dependency_cb(entity))
427 : return NULL;
428 : }
429 :
430 : /* skip jobs from entity that marked guilty */
431 0 : if (entity->guilty && atomic_read(entity->guilty))
432 0 : dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
433 :
434 0 : dma_fence_put(entity->last_scheduled);
435 :
436 0 : entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
437 :
438 : /*
439 : * If the queue is empty we allow drm_sched_entity_select_rq() to
440 : * locklessly access ->last_scheduled. This only works if we set the
441 : * pointer before we dequeue and if we a write barrier here.
442 : */
443 0 : smp_wmb();
444 :
445 0 : spsc_queue_pop(&entity->job_queue);
446 : return sched_job;
447 : }
448 :
449 0 : void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
450 : {
451 : struct dma_fence *fence;
452 : struct drm_gpu_scheduler *sched;
453 : struct drm_sched_rq *rq;
454 :
455 : /* single possible engine and already selected */
456 0 : if (!entity->sched_list)
457 : return;
458 :
459 : /* queue non-empty, stay on the same engine */
460 0 : if (spsc_queue_count(&entity->job_queue))
461 : return;
462 :
463 : /*
464 : * Only when the queue is empty are we guaranteed that the scheduler
465 : * thread cannot change ->last_scheduled. To enforce ordering we need
466 : * a read barrier here. See drm_sched_entity_pop_job() for the other
467 : * side.
468 : */
469 0 : smp_rmb();
470 :
471 0 : fence = entity->last_scheduled;
472 :
473 : /* stay on the same engine if the previous job hasn't finished */
474 0 : if (fence && !dma_fence_is_signaled(fence))
475 : return;
476 :
477 0 : spin_lock(&entity->rq_lock);
478 0 : sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
479 0 : rq = sched ? &sched->sched_rq[entity->priority] : NULL;
480 0 : if (rq != entity->rq) {
481 0 : drm_sched_rq_remove_entity(entity->rq, entity);
482 0 : entity->rq = rq;
483 : }
484 0 : spin_unlock(&entity->rq_lock);
485 :
486 0 : if (entity->num_sched_list == 1)
487 0 : entity->sched_list = NULL;
488 : }
489 :
490 : /**
491 : * drm_sched_entity_push_job - Submit a job to the entity's job queue
492 : * @sched_job: job to submit
493 : *
494 : * Note: To guarantee that the order of insertion to queue matches the job's
495 : * fence sequence number this function should be called with drm_sched_job_arm()
496 : * under common lock for the struct drm_sched_entity that was set up for
497 : * @sched_job in drm_sched_job_init().
498 : *
499 : * Returns 0 for success, negative error code otherwise.
500 : */
501 0 : void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
502 : {
503 0 : struct drm_sched_entity *entity = sched_job->entity;
504 : bool first;
505 :
506 0 : trace_drm_sched_job(sched_job, entity);
507 0 : atomic_inc(entity->rq->sched->score);
508 0 : WRITE_ONCE(entity->last_user, current->group_leader);
509 0 : first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
510 :
511 : /* first job wakes up scheduler */
512 0 : if (first) {
513 : /* Add the entity to the run queue */
514 0 : spin_lock(&entity->rq_lock);
515 0 : if (entity->stopped) {
516 0 : spin_unlock(&entity->rq_lock);
517 :
518 0 : DRM_ERROR("Trying to push to a killed entity\n");
519 0 : return;
520 : }
521 0 : drm_sched_rq_add_entity(entity->rq, entity);
522 0 : spin_unlock(&entity->rq_lock);
523 0 : drm_sched_wakeup(entity->rq->sched);
524 : }
525 : }
526 : EXPORT_SYMBOL(drm_sched_entity_push_job);
|