LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - amdgpu_mes.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 704 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 35 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2019 Advanced Micro Devices, Inc.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included in
      12             :  * all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      17             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      18             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      19             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      20             :  * OTHER DEALINGS IN THE SOFTWARE.
      21             :  *
      22             :  */
      23             : 
      24             : #include "amdgpu_mes.h"
      25             : #include "amdgpu.h"
      26             : #include "soc15_common.h"
      27             : #include "amdgpu_mes_ctx.h"
      28             : 
      29             : #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
      30             : #define AMDGPU_ONE_DOORBELL_SIZE 8
      31             : 
      32           0 : int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
      33             : {
      34           0 :         return roundup(AMDGPU_ONE_DOORBELL_SIZE *
      35             :                        AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
      36             :                        PAGE_SIZE);
      37             : }
      38             : 
      39           0 : int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
      40             :                                       unsigned int *doorbell_index)
      41             : {
      42           0 :         int r = ida_simple_get(&adev->mes.doorbell_ida, 2,
      43             :                                adev->mes.max_doorbell_slices,
      44             :                                GFP_KERNEL);
      45           0 :         if (r > 0)
      46           0 :                 *doorbell_index = r;
      47             : 
      48           0 :         return r;
      49             : }
      50             : 
      51           0 : void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
      52             :                                       unsigned int doorbell_index)
      53             : {
      54           0 :         if (doorbell_index)
      55           0 :                 ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index);
      56           0 : }
      57             : 
      58           0 : unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
      59             :                                         struct amdgpu_device *adev,
      60             :                                         uint32_t doorbell_index,
      61             :                                         unsigned int doorbell_id)
      62             : {
      63           0 :         return ((doorbell_index *
      64           0 :                 amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) +
      65           0 :                 doorbell_id * 2);
      66             : }
      67             : 
      68           0 : static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev,
      69             :                                          struct amdgpu_mes_process *process,
      70             :                                          int ip_type, uint64_t *doorbell_index)
      71             : {
      72             :         unsigned int offset, found;
      73             : 
      74           0 :         if (ip_type == AMDGPU_RING_TYPE_SDMA) {
      75           0 :                 offset = adev->doorbell_index.sdma_engine[0];
      76           0 :                 found = find_next_zero_bit(process->doorbell_bitmap,
      77             :                                            AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
      78             :                                            offset);
      79             :         } else {
      80           0 :                 found = find_first_zero_bit(process->doorbell_bitmap,
      81             :                                             AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS);
      82             :         }
      83             : 
      84           0 :         if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) {
      85           0 :                 DRM_WARN("No doorbell available\n");
      86             :                 return -ENOSPC;
      87             :         }
      88             : 
      89           0 :         set_bit(found, process->doorbell_bitmap);
      90             : 
      91           0 :         *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
      92             :                                 process->doorbell_index, found);
      93             : 
      94             :         return 0;
      95             : }
      96             : 
      97           0 : static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev,
      98             :                                            struct amdgpu_mes_process *process,
      99             :                                            uint32_t doorbell_index)
     100             : {
     101             :         unsigned int old, doorbell_id;
     102             : 
     103           0 :         doorbell_id = doorbell_index -
     104           0 :                 (process->doorbell_index *
     105           0 :                  amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32);
     106           0 :         doorbell_id /= 2;
     107             : 
     108           0 :         old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap);
     109           0 :         WARN_ON(!old);
     110           0 : }
     111             : 
     112           0 : static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
     113             : {
     114             :         size_t doorbell_start_offset;
     115             :         size_t doorbell_aperture_size;
     116             :         size_t doorbell_process_limit;
     117             :         size_t aggregated_doorbell_start;
     118             :         int i;
     119             : 
     120           0 :         aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
     121           0 :         aggregated_doorbell_start =
     122           0 :                 roundup(aggregated_doorbell_start, PAGE_SIZE);
     123             : 
     124           0 :         doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
     125           0 :         doorbell_start_offset =
     126           0 :                 roundup(doorbell_start_offset,
     127             :                         amdgpu_mes_doorbell_process_slice(adev));
     128             : 
     129           0 :         doorbell_aperture_size = adev->doorbell.size;
     130           0 :         doorbell_aperture_size =
     131           0 :                         rounddown(doorbell_aperture_size,
     132             :                                   amdgpu_mes_doorbell_process_slice(adev));
     133             : 
     134           0 :         if (doorbell_aperture_size > doorbell_start_offset)
     135           0 :                 doorbell_process_limit =
     136           0 :                         (doorbell_aperture_size - doorbell_start_offset) /
     137           0 :                         amdgpu_mes_doorbell_process_slice(adev);
     138             :         else
     139             :                 return -ENOSPC;
     140             : 
     141           0 :         adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
     142           0 :         adev->mes.max_doorbell_slices = doorbell_process_limit;
     143             : 
     144             :         /* allocate Qword range for aggregated doorbell */
     145           0 :         for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
     146           0 :                 adev->mes.aggregated_doorbells[i] =
     147           0 :                         aggregated_doorbell_start / sizeof(u32) + i * 2;
     148             : 
     149           0 :         DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
     150           0 :         return 0;
     151             : }
     152             : 
     153           0 : int amdgpu_mes_init(struct amdgpu_device *adev)
     154             : {
     155             :         int i, r;
     156             : 
     157           0 :         adev->mes.adev = adev;
     158             : 
     159           0 :         idr_init(&adev->mes.pasid_idr);
     160           0 :         idr_init(&adev->mes.gang_id_idr);
     161           0 :         idr_init(&adev->mes.queue_id_idr);
     162           0 :         ida_init(&adev->mes.doorbell_ida);
     163           0 :         spin_lock_init(&adev->mes.queue_id_lock);
     164           0 :         spin_lock_init(&adev->mes.ring_lock);
     165           0 :         mutex_init(&adev->mes.mutex_hidden);
     166             : 
     167           0 :         adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
     168           0 :         adev->mes.vmid_mask_mmhub = 0xffffff00;
     169           0 :         adev->mes.vmid_mask_gfxhub = 0xffffff00;
     170             : 
     171           0 :         for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
     172             :                 /* use only 1st MEC pipes */
     173           0 :                 if (i >= 4)
     174           0 :                         continue;
     175           0 :                 adev->mes.compute_hqd_mask[i] = 0xc;
     176             :         }
     177             : 
     178           0 :         for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
     179           0 :                 adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
     180             : 
     181           0 :         for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
     182           0 :                 if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
     183           0 :                         adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
     184             :                 /* zero sdma_hqd_mask for non-existent engine */
     185           0 :                 else if (adev->sdma.num_instances == 1)
     186           0 :                         adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
     187             :                 else
     188           0 :                         adev->mes.sdma_hqd_mask[i] = 0xfc;
     189             :         }
     190             : 
     191           0 :         r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
     192           0 :         if (r) {
     193           0 :                 dev_err(adev->dev,
     194             :                         "(%d) ring trail_fence_offs wb alloc failed\n", r);
     195           0 :                 goto error_ids;
     196             :         }
     197           0 :         adev->mes.sch_ctx_gpu_addr =
     198           0 :                 adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
     199           0 :         adev->mes.sch_ctx_ptr =
     200           0 :                 (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
     201             : 
     202           0 :         r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
     203           0 :         if (r) {
     204           0 :                 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
     205           0 :                 dev_err(adev->dev,
     206             :                         "(%d) query_status_fence_offs wb alloc failed\n", r);
     207           0 :                 goto error_ids;
     208             :         }
     209           0 :         adev->mes.query_status_fence_gpu_addr =
     210           0 :                 adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
     211           0 :         adev->mes.query_status_fence_ptr =
     212           0 :                 (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
     213             : 
     214           0 :         r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
     215           0 :         if (r) {
     216           0 :                 amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
     217           0 :                 amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
     218           0 :                 dev_err(adev->dev,
     219             :                         "(%d) read_val_offs alloc failed\n", r);
     220           0 :                 goto error_ids;
     221             :         }
     222           0 :         adev->mes.read_val_gpu_addr =
     223           0 :                 adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
     224           0 :         adev->mes.read_val_ptr =
     225           0 :                 (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
     226             : 
     227           0 :         r = amdgpu_mes_doorbell_init(adev);
     228           0 :         if (r)
     229             :                 goto error;
     230             : 
     231             :         return 0;
     232             : 
     233             : error:
     234           0 :         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
     235           0 :         amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
     236           0 :         amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
     237             : error_ids:
     238           0 :         idr_destroy(&adev->mes.pasid_idr);
     239           0 :         idr_destroy(&adev->mes.gang_id_idr);
     240           0 :         idr_destroy(&adev->mes.queue_id_idr);
     241           0 :         ida_destroy(&adev->mes.doorbell_ida);
     242           0 :         mutex_destroy(&adev->mes.mutex_hidden);
     243           0 :         return r;
     244             : }
     245             : 
     246           0 : void amdgpu_mes_fini(struct amdgpu_device *adev)
     247             : {
     248           0 :         amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
     249           0 :         amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
     250           0 :         amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
     251             : 
     252           0 :         idr_destroy(&adev->mes.pasid_idr);
     253           0 :         idr_destroy(&adev->mes.gang_id_idr);
     254           0 :         idr_destroy(&adev->mes.queue_id_idr);
     255           0 :         ida_destroy(&adev->mes.doorbell_ida);
     256           0 :         mutex_destroy(&adev->mes.mutex_hidden);
     257           0 : }
     258             : 
     259             : static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
     260             : {
     261           0 :         amdgpu_bo_free_kernel(&q->mqd_obj,
     262           0 :                               &q->mqd_gpu_addr,
     263             :                               &q->mqd_cpu_ptr);
     264             : }
     265             : 
     266           0 : int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
     267             :                               struct amdgpu_vm *vm)
     268             : {
     269             :         struct amdgpu_mes_process *process;
     270             :         int r;
     271             : 
     272             :         /* allocate the mes process buffer */
     273           0 :         process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
     274           0 :         if (!process) {
     275           0 :                 DRM_ERROR("no more memory to create mes process\n");
     276           0 :                 return -ENOMEM;
     277             :         }
     278             : 
     279           0 :         process->doorbell_bitmap =
     280           0 :                 kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
     281             :                                      BITS_PER_BYTE), GFP_KERNEL);
     282           0 :         if (!process->doorbell_bitmap) {
     283           0 :                 DRM_ERROR("failed to allocate doorbell bitmap\n");
     284           0 :                 kfree(process);
     285           0 :                 return -ENOMEM;
     286             :         }
     287             : 
     288             :         /* allocate the process context bo and map it */
     289           0 :         r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
     290             :                                     AMDGPU_GEM_DOMAIN_GTT,
     291             :                                     &process->proc_ctx_bo,
     292           0 :                                     &process->proc_ctx_gpu_addr,
     293             :                                     &process->proc_ctx_cpu_ptr);
     294           0 :         if (r) {
     295           0 :                 DRM_ERROR("failed to allocate process context bo\n");
     296           0 :                 goto clean_up_memory;
     297             :         }
     298           0 :         memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
     299             : 
     300             :         /*
     301             :          * Avoid taking any other locks under MES lock to avoid circular
     302             :          * lock dependencies.
     303             :          */
     304           0 :         amdgpu_mes_lock(&adev->mes);
     305             : 
     306             :         /* add the mes process to idr list */
     307           0 :         r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
     308             :                       GFP_KERNEL);
     309           0 :         if (r < 0) {
     310           0 :                 DRM_ERROR("failed to lock pasid=%d\n", pasid);
     311           0 :                 goto clean_up_ctx;
     312             :         }
     313             : 
     314             :         /* allocate the starting doorbell index of the process */
     315           0 :         r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
     316           0 :         if (r < 0) {
     317           0 :                 DRM_ERROR("failed to allocate doorbell for process\n");
     318             :                 goto clean_up_pasid;
     319             :         }
     320             : 
     321           0 :         DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
     322             : 
     323           0 :         INIT_LIST_HEAD(&process->gang_list);
     324           0 :         process->vm = vm;
     325           0 :         process->pasid = pasid;
     326           0 :         process->process_quantum = adev->mes.default_process_quantum;
     327           0 :         process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
     328             : 
     329           0 :         amdgpu_mes_unlock(&adev->mes);
     330           0 :         return 0;
     331             : 
     332             : clean_up_pasid:
     333           0 :         idr_remove(&adev->mes.pasid_idr, pasid);
     334           0 :         amdgpu_mes_unlock(&adev->mes);
     335             : clean_up_ctx:
     336           0 :         amdgpu_bo_free_kernel(&process->proc_ctx_bo,
     337             :                               &process->proc_ctx_gpu_addr,
     338             :                               &process->proc_ctx_cpu_ptr);
     339             : clean_up_memory:
     340           0 :         kfree(process->doorbell_bitmap);
     341           0 :         kfree(process);
     342           0 :         return r;
     343             : }
     344             : 
     345           0 : void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
     346             : {
     347             :         struct amdgpu_mes_process *process;
     348             :         struct amdgpu_mes_gang *gang, *tmp1;
     349             :         struct amdgpu_mes_queue *queue, *tmp2;
     350             :         struct mes_remove_queue_input queue_input;
     351             :         unsigned long flags;
     352             :         int r;
     353             : 
     354             :         /*
     355             :          * Avoid taking any other locks under MES lock to avoid circular
     356             :          * lock dependencies.
     357             :          */
     358           0 :         amdgpu_mes_lock(&adev->mes);
     359             : 
     360           0 :         process = idr_find(&adev->mes.pasid_idr, pasid);
     361           0 :         if (!process) {
     362           0 :                 DRM_WARN("pasid %d doesn't exist\n", pasid);
     363           0 :                 amdgpu_mes_unlock(&adev->mes);
     364           0 :                 return;
     365             :         }
     366             : 
     367             :         /* Remove all queues from hardware */
     368           0 :         list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
     369           0 :                 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
     370           0 :                         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
     371           0 :                         idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
     372           0 :                         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     373             : 
     374           0 :                         queue_input.doorbell_offset = queue->doorbell_off;
     375           0 :                         queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
     376             : 
     377           0 :                         r = adev->mes.funcs->remove_hw_queue(&adev->mes,
     378             :                                                              &queue_input);
     379           0 :                         if (r)
     380           0 :                                 DRM_WARN("failed to remove hardware queue\n");
     381             :                 }
     382             : 
     383           0 :                 idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
     384             :         }
     385             : 
     386           0 :         amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
     387           0 :         idr_remove(&adev->mes.pasid_idr, pasid);
     388           0 :         amdgpu_mes_unlock(&adev->mes);
     389             : 
     390             :         /* free all memory allocated by the process */
     391           0 :         list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
     392             :                 /* free all queues in the gang */
     393           0 :                 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
     394           0 :                         amdgpu_mes_queue_free_mqd(queue);
     395           0 :                         list_del(&queue->list);
     396           0 :                         kfree(queue);
     397             :                 }
     398           0 :                 amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
     399           0 :                                       &gang->gang_ctx_gpu_addr,
     400             :                                       &gang->gang_ctx_cpu_ptr);
     401           0 :                 list_del(&gang->list);
     402           0 :                 kfree(gang);
     403             : 
     404             :         }
     405           0 :         amdgpu_bo_free_kernel(&process->proc_ctx_bo,
     406           0 :                               &process->proc_ctx_gpu_addr,
     407             :                               &process->proc_ctx_cpu_ptr);
     408           0 :         kfree(process->doorbell_bitmap);
     409           0 :         kfree(process);
     410             : }
     411             : 
     412           0 : int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
     413             :                         struct amdgpu_mes_gang_properties *gprops,
     414             :                         int *gang_id)
     415             : {
     416             :         struct amdgpu_mes_process *process;
     417             :         struct amdgpu_mes_gang *gang;
     418             :         int r;
     419             : 
     420             :         /* allocate the mes gang buffer */
     421           0 :         gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
     422           0 :         if (!gang) {
     423             :                 return -ENOMEM;
     424             :         }
     425             : 
     426             :         /* allocate the gang context bo and map it to cpu space */
     427           0 :         r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
     428             :                                     AMDGPU_GEM_DOMAIN_GTT,
     429             :                                     &gang->gang_ctx_bo,
     430           0 :                                     &gang->gang_ctx_gpu_addr,
     431             :                                     &gang->gang_ctx_cpu_ptr);
     432           0 :         if (r) {
     433           0 :                 DRM_ERROR("failed to allocate process context bo\n");
     434           0 :                 goto clean_up_mem;
     435             :         }
     436           0 :         memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
     437             : 
     438             :         /*
     439             :          * Avoid taking any other locks under MES lock to avoid circular
     440             :          * lock dependencies.
     441             :          */
     442           0 :         amdgpu_mes_lock(&adev->mes);
     443             : 
     444           0 :         process = idr_find(&adev->mes.pasid_idr, pasid);
     445           0 :         if (!process) {
     446           0 :                 DRM_ERROR("pasid %d doesn't exist\n", pasid);
     447           0 :                 r = -EINVAL;
     448           0 :                 goto clean_up_ctx;
     449             :         }
     450             : 
     451             :         /* add the mes gang to idr list */
     452           0 :         r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
     453             :                       GFP_KERNEL);
     454           0 :         if (r < 0) {
     455           0 :                 DRM_ERROR("failed to allocate idr for gang\n");
     456           0 :                 goto clean_up_ctx;
     457             :         }
     458             : 
     459           0 :         gang->gang_id = r;
     460           0 :         *gang_id = r;
     461             : 
     462           0 :         INIT_LIST_HEAD(&gang->queue_list);
     463           0 :         gang->process = process;
     464           0 :         gang->priority = gprops->priority;
     465           0 :         gang->gang_quantum = gprops->gang_quantum ?
     466           0 :                 gprops->gang_quantum : adev->mes.default_gang_quantum;
     467           0 :         gang->global_priority_level = gprops->global_priority_level;
     468           0 :         gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
     469           0 :         list_add_tail(&gang->list, &process->gang_list);
     470             : 
     471           0 :         amdgpu_mes_unlock(&adev->mes);
     472           0 :         return 0;
     473             : 
     474             : clean_up_ctx:
     475           0 :         amdgpu_mes_unlock(&adev->mes);
     476           0 :         amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
     477             :                               &gang->gang_ctx_gpu_addr,
     478             :                               &gang->gang_ctx_cpu_ptr);
     479             : clean_up_mem:
     480           0 :         kfree(gang);
     481           0 :         return r;
     482             : }
     483             : 
     484           0 : int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
     485             : {
     486             :         struct amdgpu_mes_gang *gang;
     487             : 
     488             :         /*
     489             :          * Avoid taking any other locks under MES lock to avoid circular
     490             :          * lock dependencies.
     491             :          */
     492           0 :         amdgpu_mes_lock(&adev->mes);
     493             : 
     494           0 :         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
     495           0 :         if (!gang) {
     496           0 :                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
     497           0 :                 amdgpu_mes_unlock(&adev->mes);
     498           0 :                 return -EINVAL;
     499             :         }
     500             : 
     501           0 :         if (!list_empty(&gang->queue_list)) {
     502           0 :                 DRM_ERROR("queue list is not empty\n");
     503           0 :                 amdgpu_mes_unlock(&adev->mes);
     504           0 :                 return -EBUSY;
     505             :         }
     506             : 
     507           0 :         idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
     508           0 :         list_del(&gang->list);
     509           0 :         amdgpu_mes_unlock(&adev->mes);
     510             : 
     511           0 :         amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
     512           0 :                               &gang->gang_ctx_gpu_addr,
     513             :                               &gang->gang_ctx_cpu_ptr);
     514             : 
     515           0 :         kfree(gang);
     516             : 
     517           0 :         return 0;
     518             : }
     519             : 
     520           0 : int amdgpu_mes_suspend(struct amdgpu_device *adev)
     521             : {
     522             :         struct idr *idp;
     523             :         struct amdgpu_mes_process *process;
     524             :         struct amdgpu_mes_gang *gang;
     525             :         struct mes_suspend_gang_input input;
     526             :         int r, pasid;
     527             : 
     528             :         /*
     529             :          * Avoid taking any other locks under MES lock to avoid circular
     530             :          * lock dependencies.
     531             :          */
     532           0 :         amdgpu_mes_lock(&adev->mes);
     533             : 
     534           0 :         idp = &adev->mes.pasid_idr;
     535             : 
     536           0 :         idr_for_each_entry(idp, process, pasid) {
     537           0 :                 list_for_each_entry(gang, &process->gang_list, list) {
     538           0 :                         r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
     539           0 :                         if (r)
     540           0 :                                 DRM_ERROR("failed to suspend pasid %d gangid %d",
     541             :                                          pasid, gang->gang_id);
     542             :                 }
     543             :         }
     544             : 
     545           0 :         amdgpu_mes_unlock(&adev->mes);
     546           0 :         return 0;
     547             : }
     548             : 
     549           0 : int amdgpu_mes_resume(struct amdgpu_device *adev)
     550             : {
     551             :         struct idr *idp;
     552             :         struct amdgpu_mes_process *process;
     553             :         struct amdgpu_mes_gang *gang;
     554             :         struct mes_resume_gang_input input;
     555             :         int r, pasid;
     556             : 
     557             :         /*
     558             :          * Avoid taking any other locks under MES lock to avoid circular
     559             :          * lock dependencies.
     560             :          */
     561           0 :         amdgpu_mes_lock(&adev->mes);
     562             : 
     563           0 :         idp = &adev->mes.pasid_idr;
     564             : 
     565           0 :         idr_for_each_entry(idp, process, pasid) {
     566           0 :                 list_for_each_entry(gang, &process->gang_list, list) {
     567           0 :                         r = adev->mes.funcs->resume_gang(&adev->mes, &input);
     568           0 :                         if (r)
     569           0 :                                 DRM_ERROR("failed to resume pasid %d gangid %d",
     570             :                                          pasid, gang->gang_id);
     571             :                 }
     572             :         }
     573             : 
     574           0 :         amdgpu_mes_unlock(&adev->mes);
     575           0 :         return 0;
     576             : }
     577             : 
     578           0 : static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
     579             :                                      struct amdgpu_mes_queue *q,
     580             :                                      struct amdgpu_mes_queue_properties *p)
     581             : {
     582           0 :         struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
     583           0 :         u32 mqd_size = mqd_mgr->mqd_size;
     584             :         int r;
     585             : 
     586           0 :         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
     587             :                                     AMDGPU_GEM_DOMAIN_GTT,
     588             :                                     &q->mqd_obj,
     589           0 :                                     &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
     590           0 :         if (r) {
     591           0 :                 dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
     592             :                 return r;
     593             :         }
     594           0 :         memset(q->mqd_cpu_ptr, 0, mqd_size);
     595             : 
     596           0 :         r = amdgpu_bo_reserve(q->mqd_obj, false);
     597           0 :         if (unlikely(r != 0))
     598             :                 goto clean_up;
     599             : 
     600             :         return 0;
     601             : 
     602             : clean_up:
     603           0 :         amdgpu_bo_free_kernel(&q->mqd_obj,
     604             :                               &q->mqd_gpu_addr,
     605             :                               &q->mqd_cpu_ptr);
     606             :         return r;
     607             : }
     608             : 
     609           0 : static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
     610             :                                      struct amdgpu_mes_queue *q,
     611             :                                      struct amdgpu_mes_queue_properties *p)
     612             : {
     613           0 :         struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
     614           0 :         struct amdgpu_mqd_prop mqd_prop = {0};
     615             : 
     616           0 :         mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
     617           0 :         mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
     618           0 :         mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
     619           0 :         mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
     620           0 :         mqd_prop.queue_size = p->queue_size;
     621           0 :         mqd_prop.use_doorbell = true;
     622           0 :         mqd_prop.doorbell_index = p->doorbell_off;
     623           0 :         mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
     624           0 :         mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
     625           0 :         mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
     626             :         mqd_prop.hqd_active = false;
     627             : 
     628           0 :         mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
     629             : 
     630           0 :         amdgpu_bo_unreserve(q->mqd_obj);
     631           0 : }
     632             : 
     633           0 : int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
     634             :                             struct amdgpu_mes_queue_properties *qprops,
     635             :                             int *queue_id)
     636             : {
     637             :         struct amdgpu_mes_queue *queue;
     638             :         struct amdgpu_mes_gang *gang;
     639             :         struct mes_add_queue_input queue_input;
     640             :         unsigned long flags;
     641             :         int r;
     642             : 
     643             :         /* allocate the mes queue buffer */
     644           0 :         queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
     645           0 :         if (!queue) {
     646           0 :                 DRM_ERROR("Failed to allocate memory for queue\n");
     647           0 :                 return -ENOMEM;
     648             :         }
     649             : 
     650             :         /* Allocate the queue mqd */
     651           0 :         r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
     652           0 :         if (r)
     653             :                 goto clean_up_memory;
     654             : 
     655             :         /*
     656             :          * Avoid taking any other locks under MES lock to avoid circular
     657             :          * lock dependencies.
     658             :          */
     659           0 :         amdgpu_mes_lock(&adev->mes);
     660             : 
     661           0 :         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
     662           0 :         if (!gang) {
     663           0 :                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
     664           0 :                 r = -EINVAL;
     665           0 :                 goto clean_up_mqd;
     666             :         }
     667             : 
     668             :         /* add the mes gang to idr list */
     669           0 :         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
     670           0 :         r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
     671             :                       GFP_ATOMIC);
     672           0 :         if (r < 0) {
     673           0 :                 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     674             :                 goto clean_up_mqd;
     675             :         }
     676           0 :         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     677           0 :         *queue_id = queue->queue_id = r;
     678             : 
     679             :         /* allocate a doorbell index for the queue */
     680           0 :         r = amdgpu_mes_queue_doorbell_get(adev, gang->process,
     681             :                                           qprops->queue_type,
     682             :                                           &qprops->doorbell_off);
     683           0 :         if (r)
     684             :                 goto clean_up_queue_id;
     685             : 
     686             :         /* initialize the queue mqd */
     687           0 :         amdgpu_mes_queue_init_mqd(adev, queue, qprops);
     688             : 
     689             :         /* add hw queue to mes */
     690           0 :         queue_input.process_id = gang->process->pasid;
     691             : 
     692           0 :         queue_input.page_table_base_addr =
     693           0 :                 adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
     694           0 :                 adev->gmc.vram_start;
     695             : 
     696           0 :         queue_input.process_va_start = 0;
     697           0 :         queue_input.process_va_end =
     698           0 :                 (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
     699           0 :         queue_input.process_quantum = gang->process->process_quantum;
     700           0 :         queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
     701           0 :         queue_input.gang_quantum = gang->gang_quantum;
     702           0 :         queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
     703           0 :         queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
     704           0 :         queue_input.gang_global_priority_level = gang->global_priority_level;
     705           0 :         queue_input.doorbell_offset = qprops->doorbell_off;
     706           0 :         queue_input.mqd_addr = queue->mqd_gpu_addr;
     707           0 :         queue_input.wptr_addr = qprops->wptr_gpu_addr;
     708           0 :         queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
     709           0 :         queue_input.queue_type = qprops->queue_type;
     710           0 :         queue_input.paging = qprops->paging;
     711           0 :         queue_input.is_kfd_process = 0;
     712             : 
     713           0 :         r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
     714           0 :         if (r) {
     715           0 :                 DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
     716             :                           qprops->doorbell_off);
     717             :                 goto clean_up_doorbell;
     718             :         }
     719             : 
     720           0 :         DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
     721             :                   "queue type=%d, doorbell=0x%llx\n",
     722             :                   gang->process->pasid, gang_id, qprops->queue_type,
     723             :                   qprops->doorbell_off);
     724             : 
     725           0 :         queue->ring = qprops->ring;
     726           0 :         queue->doorbell_off = qprops->doorbell_off;
     727           0 :         queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
     728           0 :         queue->queue_type = qprops->queue_type;
     729           0 :         queue->paging = qprops->paging;
     730           0 :         queue->gang = gang;
     731           0 :         queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
     732           0 :         list_add_tail(&queue->list, &gang->queue_list);
     733             : 
     734           0 :         amdgpu_mes_unlock(&adev->mes);
     735           0 :         return 0;
     736             : 
     737             : clean_up_doorbell:
     738           0 :         amdgpu_mes_queue_doorbell_free(adev, gang->process,
     739           0 :                                        qprops->doorbell_off);
     740             : clean_up_queue_id:
     741           0 :         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
     742           0 :         idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
     743           0 :         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     744             : clean_up_mqd:
     745           0 :         amdgpu_mes_unlock(&adev->mes);
     746             :         amdgpu_mes_queue_free_mqd(queue);
     747             : clean_up_memory:
     748           0 :         kfree(queue);
     749           0 :         return r;
     750             : }
     751             : 
     752           0 : int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
     753             : {
     754             :         unsigned long flags;
     755             :         struct amdgpu_mes_queue *queue;
     756             :         struct amdgpu_mes_gang *gang;
     757             :         struct mes_remove_queue_input queue_input;
     758             :         int r;
     759             : 
     760             :         /*
     761             :          * Avoid taking any other locks under MES lock to avoid circular
     762             :          * lock dependencies.
     763             :          */
     764           0 :         amdgpu_mes_lock(&adev->mes);
     765             : 
     766             :         /* remove the mes gang from idr list */
     767           0 :         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
     768             : 
     769           0 :         queue = idr_find(&adev->mes.queue_id_idr, queue_id);
     770           0 :         if (!queue) {
     771           0 :                 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     772           0 :                 amdgpu_mes_unlock(&adev->mes);
     773           0 :                 DRM_ERROR("queue id %d doesn't exist\n", queue_id);
     774           0 :                 return -EINVAL;
     775             :         }
     776             : 
     777           0 :         idr_remove(&adev->mes.queue_id_idr, queue_id);
     778           0 :         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
     779             : 
     780           0 :         DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
     781             :                   queue->doorbell_off);
     782             : 
     783           0 :         gang = queue->gang;
     784           0 :         queue_input.doorbell_offset = queue->doorbell_off;
     785           0 :         queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
     786             : 
     787           0 :         r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
     788           0 :         if (r)
     789           0 :                 DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
     790             :                           queue_id);
     791             : 
     792           0 :         list_del(&queue->list);
     793           0 :         amdgpu_mes_queue_doorbell_free(adev, gang->process,
     794           0 :                                        queue->doorbell_off);
     795           0 :         amdgpu_mes_unlock(&adev->mes);
     796             : 
     797           0 :         amdgpu_mes_queue_free_mqd(queue);
     798           0 :         kfree(queue);
     799           0 :         return 0;
     800             : }
     801             : 
     802           0 : int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
     803             :                                   struct amdgpu_ring *ring,
     804             :                                   enum amdgpu_unmap_queues_action action,
     805             :                                   u64 gpu_addr, u64 seq)
     806             : {
     807             :         struct mes_unmap_legacy_queue_input queue_input;
     808             :         int r;
     809             : 
     810           0 :         queue_input.action = action;
     811           0 :         queue_input.queue_type = ring->funcs->type;
     812           0 :         queue_input.doorbell_offset = ring->doorbell_index;
     813           0 :         queue_input.pipe_id = ring->pipe;
     814           0 :         queue_input.queue_id = ring->queue;
     815           0 :         queue_input.trail_fence_addr = gpu_addr;
     816           0 :         queue_input.trail_fence_data = seq;
     817             : 
     818           0 :         r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
     819           0 :         if (r)
     820           0 :                 DRM_ERROR("failed to unmap legacy queue\n");
     821             : 
     822           0 :         return r;
     823             : }
     824             : 
     825           0 : uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
     826             : {
     827             :         struct mes_misc_op_input op_input;
     828           0 :         int r, val = 0;
     829             : 
     830           0 :         op_input.op = MES_MISC_OP_READ_REG;
     831           0 :         op_input.read_reg.reg_offset = reg;
     832           0 :         op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
     833             : 
     834           0 :         if (!adev->mes.funcs->misc_op) {
     835           0 :                 DRM_ERROR("mes rreg is not supported!\n");
     836           0 :                 goto error;
     837             :         }
     838             : 
     839           0 :         r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
     840           0 :         if (r)
     841           0 :                 DRM_ERROR("failed to read reg (0x%x)\n", reg);
     842             :         else
     843           0 :                 val = *(adev->mes.read_val_ptr);
     844             : 
     845             : error:
     846           0 :         return val;
     847             : }
     848             : 
     849           0 : int amdgpu_mes_wreg(struct amdgpu_device *adev,
     850             :                     uint32_t reg, uint32_t val)
     851             : {
     852             :         struct mes_misc_op_input op_input;
     853             :         int r;
     854             : 
     855           0 :         op_input.op = MES_MISC_OP_WRITE_REG;
     856           0 :         op_input.write_reg.reg_offset = reg;
     857           0 :         op_input.write_reg.reg_value = val;
     858             : 
     859           0 :         if (!adev->mes.funcs->misc_op) {
     860           0 :                 DRM_ERROR("mes wreg is not supported!\n");
     861           0 :                 r = -EINVAL;
     862           0 :                 goto error;
     863             :         }
     864             : 
     865           0 :         r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
     866           0 :         if (r)
     867           0 :                 DRM_ERROR("failed to write reg (0x%x)\n", reg);
     868             : 
     869             : error:
     870           0 :         return r;
     871             : }
     872             : 
     873           0 : int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
     874             :                                   uint32_t reg0, uint32_t reg1,
     875             :                                   uint32_t ref, uint32_t mask)
     876             : {
     877             :         struct mes_misc_op_input op_input;
     878             :         int r;
     879             : 
     880           0 :         op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
     881           0 :         op_input.wrm_reg.reg0 = reg0;
     882           0 :         op_input.wrm_reg.reg1 = reg1;
     883           0 :         op_input.wrm_reg.ref = ref;
     884           0 :         op_input.wrm_reg.mask = mask;
     885             : 
     886           0 :         if (!adev->mes.funcs->misc_op) {
     887           0 :                 DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
     888           0 :                 r = -EINVAL;
     889           0 :                 goto error;
     890             :         }
     891             : 
     892           0 :         r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
     893           0 :         if (r)
     894           0 :                 DRM_ERROR("failed to reg_write_reg_wait\n");
     895             : 
     896             : error:
     897           0 :         return r;
     898             : }
     899             : 
     900           0 : int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
     901             :                         uint32_t val, uint32_t mask)
     902             : {
     903             :         struct mes_misc_op_input op_input;
     904             :         int r;
     905             : 
     906           0 :         op_input.op = MES_MISC_OP_WRM_REG_WAIT;
     907           0 :         op_input.wrm_reg.reg0 = reg;
     908           0 :         op_input.wrm_reg.ref = val;
     909           0 :         op_input.wrm_reg.mask = mask;
     910             : 
     911           0 :         if (!adev->mes.funcs->misc_op) {
     912           0 :                 DRM_ERROR("mes reg wait is not supported!\n");
     913           0 :                 r = -EINVAL;
     914           0 :                 goto error;
     915             :         }
     916             : 
     917           0 :         r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
     918           0 :         if (r)
     919           0 :                 DRM_ERROR("failed to reg_write_reg_wait\n");
     920             : 
     921             : error:
     922           0 :         return r;
     923             : }
     924             : 
     925             : static void
     926             : amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
     927             :                                struct amdgpu_ring *ring,
     928             :                                struct amdgpu_mes_queue_properties *props)
     929             : {
     930           0 :         props->queue_type = ring->funcs->type;
     931           0 :         props->hqd_base_gpu_addr = ring->gpu_addr;
     932           0 :         props->rptr_gpu_addr = ring->rptr_gpu_addr;
     933           0 :         props->wptr_gpu_addr = ring->wptr_gpu_addr;
     934           0 :         props->wptr_mc_addr =
     935           0 :                 ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
     936           0 :         props->queue_size = ring->ring_size;
     937           0 :         props->eop_gpu_addr = ring->eop_gpu_addr;
     938           0 :         props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
     939           0 :         props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
     940           0 :         props->paging = false;
     941           0 :         props->ring = ring;
     942             : }
     943             : 
     944             : #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)                        \
     945             : do {                                                                    \
     946             :        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)                                \
     947             :                 return offsetof(struct amdgpu_mes_ctx_meta_data,        \
     948             :                                 _eng[ring->idx].slots[id_offs]);        \
     949             :        else if (id_offs == AMDGPU_MES_CTX_RING_OFFS)                    \
     950             :                 return offsetof(struct amdgpu_mes_ctx_meta_data,        \
     951             :                                 _eng[ring->idx].ring);                  \
     952             :        else if (id_offs == AMDGPU_MES_CTX_IB_OFFS)                      \
     953             :                 return offsetof(struct amdgpu_mes_ctx_meta_data,        \
     954             :                                 _eng[ring->idx].ib);                    \
     955             :        else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS)                 \
     956             :                 return offsetof(struct amdgpu_mes_ctx_meta_data,        \
     957             :                                 _eng[ring->idx].padding);               \
     958             : } while(0)
     959             : 
     960           0 : int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
     961             : {
     962           0 :         switch (ring->funcs->type) {
     963             :         case AMDGPU_RING_TYPE_GFX:
     964           0 :                 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
     965             :                 break;
     966             :         case AMDGPU_RING_TYPE_COMPUTE:
     967           0 :                 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
     968             :                 break;
     969             :         case AMDGPU_RING_TYPE_SDMA:
     970           0 :                 DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
     971             :                 break;
     972             :         default:
     973             :                 break;
     974             :         }
     975             : 
     976           0 :         WARN_ON(1);
     977           0 :         return -EINVAL;
     978             : }
     979             : 
     980           0 : int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
     981             :                         int queue_type, int idx,
     982             :                         struct amdgpu_mes_ctx_data *ctx_data,
     983             :                         struct amdgpu_ring **out)
     984             : {
     985             :         struct amdgpu_ring *ring;
     986             :         struct amdgpu_mes_gang *gang;
     987           0 :         struct amdgpu_mes_queue_properties qprops = {0};
     988             :         int r, queue_id, pasid;
     989             : 
     990             :         /*
     991             :          * Avoid taking any other locks under MES lock to avoid circular
     992             :          * lock dependencies.
     993             :          */
     994           0 :         amdgpu_mes_lock(&adev->mes);
     995           0 :         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
     996           0 :         if (!gang) {
     997           0 :                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
     998           0 :                 amdgpu_mes_unlock(&adev->mes);
     999           0 :                 return -EINVAL;
    1000             :         }
    1001           0 :         pasid = gang->process->pasid;
    1002             : 
    1003           0 :         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    1004           0 :         if (!ring) {
    1005           0 :                 amdgpu_mes_unlock(&adev->mes);
    1006           0 :                 return -ENOMEM;
    1007             :         }
    1008             : 
    1009           0 :         ring->ring_obj = NULL;
    1010           0 :         ring->use_doorbell = true;
    1011           0 :         ring->is_mes_queue = true;
    1012           0 :         ring->mes_ctx = ctx_data;
    1013           0 :         ring->idx = idx;
    1014           0 :         ring->no_scheduler = true;
    1015             : 
    1016           0 :         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    1017           0 :                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    1018             :                                       compute[ring->idx].mec_hpd);
    1019           0 :                 ring->eop_gpu_addr =
    1020           0 :                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    1021             :         }
    1022             : 
    1023           0 :         switch (queue_type) {
    1024             :         case AMDGPU_RING_TYPE_GFX:
    1025           0 :                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    1026           0 :                 break;
    1027             :         case AMDGPU_RING_TYPE_COMPUTE:
    1028           0 :                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    1029           0 :                 break;
    1030             :         case AMDGPU_RING_TYPE_SDMA:
    1031           0 :                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    1032           0 :                 break;
    1033             :         default:
    1034           0 :                 BUG();
    1035             :         }
    1036             : 
    1037           0 :         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    1038             :                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    1039           0 :         if (r)
    1040             :                 goto clean_up_memory;
    1041             : 
    1042           0 :         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    1043             : 
    1044           0 :         dma_fence_wait(gang->process->vm->last_update, false);
    1045           0 :         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    1046           0 :         amdgpu_mes_unlock(&adev->mes);
    1047             : 
    1048           0 :         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    1049           0 :         if (r)
    1050             :                 goto clean_up_ring;
    1051             : 
    1052           0 :         ring->hw_queue_id = queue_id;
    1053           0 :         ring->doorbell_index = qprops.doorbell_off;
    1054             : 
    1055           0 :         if (queue_type == AMDGPU_RING_TYPE_GFX)
    1056           0 :                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
    1057           0 :         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    1058           0 :                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    1059             :                         queue_id);
    1060           0 :         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    1061           0 :                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    1062             :                         queue_id);
    1063             :         else
    1064           0 :                 BUG();
    1065             : 
    1066           0 :         *out = ring;
    1067           0 :         return 0;
    1068             : 
    1069             : clean_up_ring:
    1070           0 :         amdgpu_ring_fini(ring);
    1071             : clean_up_memory:
    1072           0 :         kfree(ring);
    1073           0 :         amdgpu_mes_unlock(&adev->mes);
    1074           0 :         return r;
    1075             : }
    1076             : 
    1077           0 : void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
    1078             :                             struct amdgpu_ring *ring)
    1079             : {
    1080           0 :         if (!ring)
    1081             :                 return;
    1082             : 
    1083           0 :         amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
    1084           0 :         amdgpu_ring_fini(ring);
    1085           0 :         kfree(ring);
    1086             : }
    1087             : 
    1088           0 : uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
    1089             :                                                    enum amdgpu_mes_priority_level prio)
    1090             : {
    1091           0 :         return adev->mes.aggregated_doorbells[prio];
    1092             : }
    1093             : 
    1094           0 : int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
    1095             :                                    struct amdgpu_mes_ctx_data *ctx_data)
    1096             : {
    1097             :         int r;
    1098             : 
    1099           0 :         r = amdgpu_bo_create_kernel(adev,
    1100             :                             sizeof(struct amdgpu_mes_ctx_meta_data),
    1101             :                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    1102             :                             &ctx_data->meta_data_obj,
    1103           0 :                             &ctx_data->meta_data_mc_addr,
    1104             :                             &ctx_data->meta_data_ptr);
    1105           0 :         if (!ctx_data->meta_data_obj)
    1106             :                 return -ENOMEM;
    1107             : 
    1108           0 :         memset(ctx_data->meta_data_ptr, 0,
    1109             :                sizeof(struct amdgpu_mes_ctx_meta_data));
    1110             : 
    1111           0 :         return 0;
    1112             : }
    1113             : 
    1114           0 : void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
    1115             : {
    1116           0 :         if (ctx_data->meta_data_obj)
    1117           0 :                 amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
    1118           0 :                                       &ctx_data->meta_data_mc_addr,
    1119             :                                       &ctx_data->meta_data_ptr);
    1120           0 : }
    1121             : 
    1122           0 : int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
    1123             :                                  struct amdgpu_vm *vm,
    1124             :                                  struct amdgpu_mes_ctx_data *ctx_data)
    1125             : {
    1126             :         struct amdgpu_bo_va *bo_va;
    1127             :         struct ww_acquire_ctx ticket;
    1128             :         struct list_head list;
    1129             :         struct amdgpu_bo_list_entry pd;
    1130             :         struct ttm_validate_buffer csa_tv;
    1131             :         struct amdgpu_sync sync;
    1132             :         int r;
    1133             : 
    1134           0 :         amdgpu_sync_create(&sync);
    1135           0 :         INIT_LIST_HEAD(&list);
    1136           0 :         INIT_LIST_HEAD(&csa_tv.head);
    1137             : 
    1138           0 :         csa_tv.bo = &ctx_data->meta_data_obj->tbo;
    1139           0 :         csa_tv.num_shared = 1;
    1140             : 
    1141           0 :         list_add(&csa_tv.head, &list);
    1142           0 :         amdgpu_vm_get_pd_bo(vm, &list, &pd);
    1143             : 
    1144           0 :         r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
    1145           0 :         if (r) {
    1146           0 :                 DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
    1147           0 :                 return r;
    1148             :         }
    1149             : 
    1150           0 :         bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
    1151           0 :         if (!bo_va) {
    1152           0 :                 ttm_eu_backoff_reservation(&ticket, &list);
    1153           0 :                 DRM_ERROR("failed to create bo_va for meta data BO\n");
    1154           0 :                 return -ENOMEM;
    1155             :         }
    1156             : 
    1157           0 :         r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
    1158             :                              sizeof(struct amdgpu_mes_ctx_meta_data),
    1159             :                              AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
    1160             :                              AMDGPU_PTE_EXECUTABLE);
    1161             : 
    1162           0 :         if (r) {
    1163           0 :                 DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
    1164           0 :                 goto error;
    1165             :         }
    1166             : 
    1167           0 :         r = amdgpu_vm_bo_update(adev, bo_va, false);
    1168           0 :         if (r) {
    1169           0 :                 DRM_ERROR("failed to do vm_bo_update on meta data\n");
    1170           0 :                 goto error;
    1171             :         }
    1172           0 :         amdgpu_sync_fence(&sync, bo_va->last_pt_update);
    1173             : 
    1174           0 :         r = amdgpu_vm_update_pdes(adev, vm, false);
    1175           0 :         if (r) {
    1176           0 :                 DRM_ERROR("failed to update pdes on meta data\n");
    1177           0 :                 goto error;
    1178             :         }
    1179           0 :         amdgpu_sync_fence(&sync, vm->last_update);
    1180             : 
    1181           0 :         amdgpu_sync_wait(&sync, false);
    1182           0 :         ttm_eu_backoff_reservation(&ticket, &list);
    1183             : 
    1184           0 :         amdgpu_sync_free(&sync);
    1185           0 :         ctx_data->meta_data_va = bo_va;
    1186           0 :         return 0;
    1187             : 
    1188             : error:
    1189           0 :         amdgpu_vm_bo_del(adev, bo_va);
    1190           0 :         ttm_eu_backoff_reservation(&ticket, &list);
    1191           0 :         amdgpu_sync_free(&sync);
    1192           0 :         return r;
    1193             : }
    1194             : 
    1195           0 : int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
    1196             :                                    struct amdgpu_mes_ctx_data *ctx_data)
    1197             : {
    1198           0 :         struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
    1199           0 :         struct amdgpu_bo *bo = ctx_data->meta_data_obj;
    1200           0 :         struct amdgpu_vm *vm = bo_va->base.vm;
    1201             :         struct amdgpu_bo_list_entry vm_pd;
    1202             :         struct list_head list, duplicates;
    1203           0 :         struct dma_fence *fence = NULL;
    1204             :         struct ttm_validate_buffer tv;
    1205             :         struct ww_acquire_ctx ticket;
    1206           0 :         long r = 0;
    1207             : 
    1208           0 :         INIT_LIST_HEAD(&list);
    1209           0 :         INIT_LIST_HEAD(&duplicates);
    1210             : 
    1211           0 :         tv.bo = &bo->tbo;
    1212           0 :         tv.num_shared = 2;
    1213           0 :         list_add(&tv.head, &list);
    1214             : 
    1215           0 :         amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
    1216             : 
    1217           0 :         r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
    1218           0 :         if (r) {
    1219           0 :                 dev_err(adev->dev, "leaking bo va because "
    1220             :                         "we fail to reserve bo (%ld)\n", r);
    1221           0 :                 return r;
    1222             :         }
    1223             : 
    1224           0 :         amdgpu_vm_bo_del(adev, bo_va);
    1225           0 :         if (!amdgpu_vm_ready(vm))
    1226             :                 goto out_unlock;
    1227             : 
    1228           0 :         r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
    1229           0 :         if (r)
    1230             :                 goto out_unlock;
    1231           0 :         if (fence) {
    1232           0 :                 amdgpu_bo_fence(bo, fence, true);
    1233           0 :                 fence = NULL;
    1234             :         }
    1235             : 
    1236           0 :         r = amdgpu_vm_clear_freed(adev, vm, &fence);
    1237           0 :         if (r || !fence)
    1238             :                 goto out_unlock;
    1239             : 
    1240           0 :         dma_fence_wait(fence, false);
    1241           0 :         amdgpu_bo_fence(bo, fence, true);
    1242           0 :         dma_fence_put(fence);
    1243             : 
    1244             : out_unlock:
    1245           0 :         if (unlikely(r < 0))
    1246           0 :                 dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
    1247           0 :         ttm_eu_backoff_reservation(&ticket, &list);
    1248             : 
    1249           0 :         return r;
    1250             : }
    1251             : 
    1252           0 : static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
    1253             :                                           int pasid, int *gang_id,
    1254             :                                           int queue_type, int num_queue,
    1255             :                                           struct amdgpu_ring **added_rings,
    1256             :                                           struct amdgpu_mes_ctx_data *ctx_data)
    1257             : {
    1258             :         struct amdgpu_ring *ring;
    1259           0 :         struct amdgpu_mes_gang_properties gprops = {0};
    1260             :         int r, j;
    1261             : 
    1262             :         /* create a gang for the process */
    1263           0 :         gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
    1264           0 :         gprops.gang_quantum = adev->mes.default_gang_quantum;
    1265           0 :         gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
    1266           0 :         gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
    1267           0 :         gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
    1268             : 
    1269           0 :         r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
    1270           0 :         if (r) {
    1271           0 :                 DRM_ERROR("failed to add gang\n");
    1272           0 :                 return r;
    1273             :         }
    1274             : 
    1275             :         /* create queues for the gang */
    1276           0 :         for (j = 0; j < num_queue; j++) {
    1277           0 :                 r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
    1278             :                                         ctx_data, &ring);
    1279           0 :                 if (r) {
    1280           0 :                         DRM_ERROR("failed to add ring\n");
    1281           0 :                         break;
    1282             :                 }
    1283             : 
    1284           0 :                 DRM_INFO("ring %s was added\n", ring->name);
    1285           0 :                 added_rings[j] = ring;
    1286             :         }
    1287             : 
    1288             :         return 0;
    1289             : }
    1290             : 
    1291           0 : static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
    1292             : {
    1293             :         struct amdgpu_ring *ring;
    1294             :         int i, r;
    1295             : 
    1296           0 :         for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
    1297           0 :                 ring = added_rings[i];
    1298           0 :                 if (!ring)
    1299           0 :                         continue;
    1300             : 
    1301           0 :                 r = amdgpu_ring_test_ring(ring);
    1302           0 :                 if (r) {
    1303           0 :                         DRM_DEV_ERROR(ring->adev->dev,
    1304             :                                       "ring %s test failed (%d)\n",
    1305             :                                       ring->name, r);
    1306           0 :                         return r;
    1307             :                 } else
    1308           0 :                         DRM_INFO("ring %s test pass\n", ring->name);
    1309             : 
    1310           0 :                 r = amdgpu_ring_test_ib(ring, 1000 * 10);
    1311           0 :                 if (r) {
    1312           0 :                         DRM_DEV_ERROR(ring->adev->dev,
    1313             :                                       "ring %s ib test failed (%d)\n",
    1314             :                                       ring->name, r);
    1315           0 :                         return r;
    1316             :                 } else
    1317           0 :                         DRM_INFO("ring %s ib test pass\n", ring->name);
    1318             :         }
    1319             : 
    1320             :         return 0;
    1321             : }
    1322             : 
    1323           0 : int amdgpu_mes_self_test(struct amdgpu_device *adev)
    1324             : {
    1325           0 :         struct amdgpu_vm *vm = NULL;
    1326           0 :         struct amdgpu_mes_ctx_data ctx_data = {0};
    1327           0 :         struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
    1328           0 :         int gang_ids[3] = {0};
    1329           0 :         int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX,
    1330             :                                    AMDGPU_MES_CTX_MAX_GFX_RINGS},
    1331             :                                  { AMDGPU_RING_TYPE_COMPUTE,
    1332             :                                    AMDGPU_MES_CTX_MAX_COMPUTE_RINGS},
    1333             :                                  { AMDGPU_RING_TYPE_SDMA,
    1334             :                                    AMDGPU_MES_CTX_MAX_SDMA_RINGS } };
    1335           0 :         int i, r, pasid, k = 0;
    1336             : 
    1337           0 :         pasid = amdgpu_pasid_alloc(16);
    1338           0 :         if (pasid < 0) {
    1339           0 :                 dev_warn(adev->dev, "No more PASIDs available!");
    1340           0 :                 pasid = 0;
    1341             :         }
    1342             : 
    1343           0 :         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
    1344           0 :         if (!vm) {
    1345             :                 r = -ENOMEM;
    1346             :                 goto error_pasid;
    1347             :         }
    1348             : 
    1349           0 :         r = amdgpu_vm_init(adev, vm);
    1350           0 :         if (r) {
    1351           0 :                 DRM_ERROR("failed to initialize vm\n");
    1352           0 :                 goto error_pasid;
    1353             :         }
    1354             : 
    1355           0 :         r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
    1356           0 :         if (r) {
    1357           0 :                 DRM_ERROR("failed to alloc ctx meta data\n");
    1358           0 :                 goto error_fini;
    1359             :         }
    1360             : 
    1361           0 :         ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
    1362           0 :         r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
    1363           0 :         if (r) {
    1364           0 :                 DRM_ERROR("failed to map ctx meta data\n");
    1365           0 :                 goto error_vm;
    1366             :         }
    1367             : 
    1368           0 :         r = amdgpu_mes_create_process(adev, pasid, vm);
    1369           0 :         if (r) {
    1370           0 :                 DRM_ERROR("failed to create MES process\n");
    1371           0 :                 goto error_vm;
    1372             :         }
    1373             : 
    1374           0 :         for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
    1375             :                 /* On GFX v10.3, fw hasn't supported to map sdma queue. */
    1376           0 :                 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
    1377           0 :                     adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
    1378           0 :                     queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
    1379           0 :                         continue;
    1380             : 
    1381           0 :                 r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
    1382             :                                                            &gang_ids[i],
    1383             :                                                            queue_types[i][0],
    1384             :                                                            queue_types[i][1],
    1385             :                                                            &added_rings[k],
    1386             :                                                            &ctx_data);
    1387           0 :                 if (r)
    1388             :                         goto error_queues;
    1389             : 
    1390           0 :                 k += queue_types[i][1];
    1391             :         }
    1392             : 
    1393             :         /* start ring test and ib test for MES queues */
    1394           0 :         amdgpu_mes_test_queues(added_rings);
    1395             : 
    1396             : error_queues:
    1397             :         /* remove all queues */
    1398           0 :         for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
    1399           0 :                 if (!added_rings[i])
    1400           0 :                         continue;
    1401           0 :                 amdgpu_mes_remove_ring(adev, added_rings[i]);
    1402             :         }
    1403             : 
    1404           0 :         for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
    1405           0 :                 if (!gang_ids[i])
    1406           0 :                         continue;
    1407           0 :                 amdgpu_mes_remove_gang(adev, gang_ids[i]);
    1408             :         }
    1409             : 
    1410           0 :         amdgpu_mes_destroy_process(adev, pasid);
    1411             : 
    1412             : error_vm:
    1413           0 :         amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
    1414             : 
    1415             : error_fini:
    1416           0 :         amdgpu_vm_fini(adev, vm);
    1417             : 
    1418             : error_pasid:
    1419           0 :         if (pasid)
    1420           0 :                 amdgpu_pasid_free(pasid);
    1421             : 
    1422           0 :         amdgpu_mes_ctx_free_meta_data(&ctx_data);
    1423           0 :         kfree(vm);
    1424           0 :         return 0;
    1425             : }

Generated by: LCOV version 1.14