LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - amdgpu_job.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 137 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 11 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2015 Advanced Micro Devices, Inc.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included in
      12             :  * all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      17             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      18             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      19             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      20             :  * OTHER DEALINGS IN THE SOFTWARE.
      21             :  *
      22             :  *
      23             :  */
      24             : #include <linux/kthread.h>
      25             : #include <linux/wait.h>
      26             : #include <linux/sched.h>
      27             : 
      28             : #include <drm/drm_drv.h>
      29             : 
      30             : #include "amdgpu.h"
      31             : #include "amdgpu_trace.h"
      32             : #include "amdgpu_reset.h"
      33             : 
      34           0 : static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
      35             : {
      36           0 :         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
      37           0 :         struct amdgpu_job *job = to_amdgpu_job(s_job);
      38             :         struct amdgpu_task_info ti;
      39           0 :         struct amdgpu_device *adev = ring->adev;
      40             :         int idx;
      41             :         int r;
      42             : 
      43           0 :         if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
      44           0 :                 DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s",
      45             :                          __func__, s_job->sched->name);
      46             : 
      47             :                 /* Effectively the job is aborted as the device is gone */
      48           0 :                 return DRM_GPU_SCHED_STAT_ENODEV;
      49             :         }
      50             : 
      51           0 :         memset(&ti, 0, sizeof(struct amdgpu_task_info));
      52           0 :         adev->job_hang = true;
      53             : 
      54           0 :         if (amdgpu_gpu_recovery &&
      55           0 :             amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
      56           0 :                 DRM_ERROR("ring %s timeout, but soft recovered\n",
      57             :                           s_job->sched->name);
      58           0 :                 goto exit;
      59             :         }
      60             : 
      61           0 :         amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
      62           0 :         DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
      63             :                   job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
      64             :                   ring->fence_drv.sync_seq);
      65           0 :         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
      66             :                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
      67             : 
      68           0 :         if (amdgpu_device_should_recover_gpu(ring->adev)) {
      69             :                 struct amdgpu_reset_context reset_context;
      70           0 :                 memset(&reset_context, 0, sizeof(reset_context));
      71             : 
      72           0 :                 reset_context.method = AMD_RESET_METHOD_NONE;
      73           0 :                 reset_context.reset_req_dev = adev;
      74           0 :                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
      75           0 :                 clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
      76             : 
      77           0 :                 r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
      78           0 :                 if (r)
      79           0 :                         DRM_ERROR("GPU Recovery Failed: %d\n", r);
      80             :         } else {
      81           0 :                 drm_sched_suspend_timeout(&ring->sched);
      82           0 :                 if (amdgpu_sriov_vf(adev))
      83           0 :                         adev->virt.tdr_debug = true;
      84             :         }
      85             : 
      86             : exit:
      87           0 :         adev->job_hang = false;
      88           0 :         drm_dev_exit(idx);
      89           0 :         return DRM_GPU_SCHED_STAT_NOMINAL;
      90             : }
      91             : 
      92           0 : int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
      93             :                      struct amdgpu_job **job, struct amdgpu_vm *vm)
      94             : {
      95           0 :         if (num_ibs == 0)
      96             :                 return -EINVAL;
      97             : 
      98           0 :         *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
      99           0 :         if (!*job)
     100             :                 return -ENOMEM;
     101             : 
     102             :         /*
     103             :          * Initialize the scheduler to at least some ring so that we always
     104             :          * have a pointer to adev.
     105             :          */
     106           0 :         (*job)->base.sched = &adev->rings[0]->sched;
     107           0 :         (*job)->vm = vm;
     108           0 :         (*job)->num_ibs = num_ibs;
     109             : 
     110           0 :         amdgpu_sync_create(&(*job)->sync);
     111           0 :         amdgpu_sync_create(&(*job)->sched_sync);
     112           0 :         (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
     113           0 :         (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
     114             : 
     115           0 :         return 0;
     116             : }
     117             : 
     118           0 : int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
     119             :                 enum amdgpu_ib_pool_type pool_type,
     120             :                 struct amdgpu_job **job)
     121             : {
     122             :         int r;
     123             : 
     124           0 :         r = amdgpu_job_alloc(adev, 1, job, NULL);
     125           0 :         if (r)
     126             :                 return r;
     127             : 
     128           0 :         r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
     129           0 :         if (r)
     130           0 :                 kfree(*job);
     131             : 
     132             :         return r;
     133             : }
     134             : 
     135           0 : void amdgpu_job_free_resources(struct amdgpu_job *job)
     136             : {
     137           0 :         struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
     138             :         struct dma_fence *f;
     139             :         unsigned i;
     140             : 
     141             :         /* use sched fence if available */
     142           0 :         f = job->base.s_fence ? &job->base.s_fence->finished :  &job->hw_fence;
     143           0 :         for (i = 0; i < job->num_ibs; ++i)
     144           0 :                 amdgpu_ib_free(ring->adev, &job->ibs[i], f);
     145           0 : }
     146             : 
     147           0 : static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
     148             : {
     149           0 :         struct amdgpu_job *job = to_amdgpu_job(s_job);
     150             : 
     151           0 :         drm_sched_job_cleanup(s_job);
     152             : 
     153           0 :         amdgpu_sync_free(&job->sync);
     154           0 :         amdgpu_sync_free(&job->sched_sync);
     155             : 
     156           0 :         dma_fence_put(&job->hw_fence);
     157           0 : }
     158             : 
     159           0 : void amdgpu_job_free(struct amdgpu_job *job)
     160             : {
     161           0 :         amdgpu_job_free_resources(job);
     162           0 :         amdgpu_sync_free(&job->sync);
     163           0 :         amdgpu_sync_free(&job->sched_sync);
     164             : 
     165           0 :         if (!job->hw_fence.ops)
     166           0 :                 kfree(job);
     167             :         else
     168           0 :                 dma_fence_put(&job->hw_fence);
     169           0 : }
     170             : 
     171           0 : int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
     172             :                       void *owner, struct dma_fence **f)
     173             : {
     174             :         int r;
     175             : 
     176           0 :         if (!f)
     177             :                 return -EINVAL;
     178             : 
     179           0 :         r = drm_sched_job_init(&job->base, entity, owner);
     180           0 :         if (r)
     181             :                 return r;
     182             : 
     183           0 :         drm_sched_job_arm(&job->base);
     184             : 
     185           0 :         *f = dma_fence_get(&job->base.s_fence->finished);
     186           0 :         amdgpu_job_free_resources(job);
     187           0 :         drm_sched_entity_push_job(&job->base);
     188             : 
     189           0 :         return 0;
     190             : }
     191             : 
     192           0 : int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
     193             :                              struct dma_fence **fence)
     194             : {
     195             :         int r;
     196             : 
     197           0 :         job->base.sched = &ring->sched;
     198           0 :         r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
     199             : 
     200           0 :         if (r)
     201             :                 return r;
     202             : 
     203           0 :         amdgpu_job_free(job);
     204           0 :         return 0;
     205             : }
     206             : 
     207           0 : static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
     208             :                                                struct drm_sched_entity *s_entity)
     209             : {
     210           0 :         struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
     211           0 :         struct amdgpu_job *job = to_amdgpu_job(sched_job);
     212           0 :         struct amdgpu_vm *vm = job->vm;
     213             :         struct dma_fence *fence;
     214             :         int r;
     215             : 
     216           0 :         fence = amdgpu_sync_get_fence(&job->sync);
     217           0 :         if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
     218           0 :                 r = amdgpu_sync_fence(&job->sched_sync, fence);
     219           0 :                 if (r)
     220           0 :                         DRM_ERROR("Error adding fence (%d)\n", r);
     221             :         }
     222             : 
     223           0 :         while (fence == NULL && vm && !job->vmid) {
     224           0 :                 r = amdgpu_vmid_grab(vm, ring, &job->sync,
     225           0 :                                      &job->base.s_fence->finished,
     226             :                                      job);
     227           0 :                 if (r)
     228           0 :                         DRM_ERROR("Error getting VM ID (%d)\n", r);
     229             : 
     230           0 :                 fence = amdgpu_sync_get_fence(&job->sync);
     231             :         }
     232             : 
     233           0 :         return fence;
     234             : }
     235             : 
     236           0 : static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
     237             : {
     238           0 :         struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
     239           0 :         struct dma_fence *fence = NULL, *finished;
     240             :         struct amdgpu_job *job;
     241           0 :         int r = 0;
     242             : 
     243           0 :         job = to_amdgpu_job(sched_job);
     244           0 :         finished = &job->base.s_fence->finished;
     245             : 
     246           0 :         BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
     247             : 
     248           0 :         trace_amdgpu_sched_run_job(job);
     249             : 
     250           0 :         if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
     251           0 :                 dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
     252             : 
     253           0 :         if (finished->error < 0) {
     254           0 :                 DRM_INFO("Skip scheduling IBs!\n");
     255             :         } else {
     256           0 :                 r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
     257             :                                        &fence);
     258           0 :                 if (r)
     259           0 :                         DRM_ERROR("Error scheduling IBs (%d)\n", r);
     260             :         }
     261             : 
     262           0 :         job->job_run_counter++;
     263           0 :         amdgpu_job_free_resources(job);
     264             : 
     265           0 :         fence = r ? ERR_PTR(r) : fence;
     266           0 :         return fence;
     267             : }
     268             : 
     269             : #define to_drm_sched_job(sched_job)             \
     270             :                 container_of((sched_job), struct drm_sched_job, queue_node)
     271             : 
     272           0 : void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
     273             : {
     274             :         struct drm_sched_job *s_job;
     275           0 :         struct drm_sched_entity *s_entity = NULL;
     276             :         int i;
     277             : 
     278             :         /* Signal all jobs not yet scheduled */
     279           0 :         for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
     280           0 :                 struct drm_sched_rq *rq = &sched->sched_rq[i];
     281           0 :                 spin_lock(&rq->lock);
     282           0 :                 list_for_each_entry(s_entity, &rq->entities, list) {
     283           0 :                         while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
     284           0 :                                 struct drm_sched_fence *s_fence = s_job->s_fence;
     285             : 
     286           0 :                                 dma_fence_signal(&s_fence->scheduled);
     287           0 :                                 dma_fence_set_error(&s_fence->finished, -EHWPOISON);
     288           0 :                                 dma_fence_signal(&s_fence->finished);
     289             :                         }
     290             :                 }
     291           0 :                 spin_unlock(&rq->lock);
     292             :         }
     293             : 
     294             :         /* Signal all jobs already scheduled to HW */
     295           0 :         list_for_each_entry(s_job, &sched->pending_list, list) {
     296           0 :                 struct drm_sched_fence *s_fence = s_job->s_fence;
     297             : 
     298           0 :                 dma_fence_set_error(&s_fence->finished, -EHWPOISON);
     299           0 :                 dma_fence_signal(&s_fence->finished);
     300             :         }
     301           0 : }
     302             : 
     303             : const struct drm_sched_backend_ops amdgpu_sched_ops = {
     304             :         .dependency = amdgpu_job_dependency,
     305             :         .run_job = amdgpu_job_run,
     306             :         .timedout_job = amdgpu_job_timedout,
     307             :         .free_job = amdgpu_job_free_cb
     308             : };

Generated by: LCOV version 1.14