LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - amdgpu_gmc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 280 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 24 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2018 Advanced Micro Devices, Inc.
       3             :  * All Rights Reserved.
       4             :  *
       5             :  * Permission is hereby granted, free of charge, to any person obtaining a
       6             :  * copy of this software and associated documentation files (the
       7             :  * "Software"), to deal in the Software without restriction, including
       8             :  * without limitation the rights to use, copy, modify, merge, publish,
       9             :  * distribute, sub license, and/or sell copies of the Software, and to
      10             :  * permit persons to whom the Software is furnished to do so, subject to
      11             :  * the following conditions:
      12             :  *
      13             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      14             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      15             :  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
      16             :  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
      17             :  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
      18             :  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
      19             :  * USE OR OTHER DEALINGS IN THE SOFTWARE.
      20             :  *
      21             :  * The above copyright notice and this permission notice (including the
      22             :  * next paragraph) shall be included in all copies or substantial portions
      23             :  * of the Software.
      24             :  *
      25             :  */
      26             : 
      27             : #include <linux/io-64-nonatomic-lo-hi.h>
      28             : #ifdef CONFIG_X86
      29             : #include <asm/hypervisor.h>
      30             : #endif
      31             : 
      32             : #include "amdgpu.h"
      33             : #include "amdgpu_gmc.h"
      34             : #include "amdgpu_ras.h"
      35             : #include "amdgpu_xgmi.h"
      36             : 
      37             : #include <drm/drm_drv.h>
      38             : 
      39             : /**
      40             :  * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
      41             :  *
      42             :  * @adev: amdgpu_device pointer
      43             :  *
      44             :  * Allocate video memory for pdb0 and map it for CPU access
      45             :  * Returns 0 for success, error for failure.
      46             :  */
      47           0 : int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
      48             : {
      49             :         int r;
      50             :         struct amdgpu_bo_param bp;
      51           0 :         u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
      52           0 :         uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
      53           0 :         uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift;
      54             : 
      55           0 :         memset(&bp, 0, sizeof(bp));
      56           0 :         bp.size = PAGE_ALIGN((npdes + 1) * 8);
      57           0 :         bp.byte_align = PAGE_SIZE;
      58           0 :         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
      59           0 :         bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
      60             :                 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
      61           0 :         bp.type = ttm_bo_type_kernel;
      62           0 :         bp.resv = NULL;
      63           0 :         bp.bo_ptr_size = sizeof(struct amdgpu_bo);
      64             : 
      65           0 :         r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
      66           0 :         if (r)
      67             :                 return r;
      68             : 
      69           0 :         r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
      70           0 :         if (unlikely(r != 0))
      71             :                 goto bo_reserve_failure;
      72             : 
      73           0 :         r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
      74           0 :         if (r)
      75             :                 goto bo_pin_failure;
      76           0 :         r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
      77           0 :         if (r)
      78             :                 goto bo_kmap_failure;
      79             : 
      80           0 :         amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
      81           0 :         return 0;
      82             : 
      83             : bo_kmap_failure:
      84           0 :         amdgpu_bo_unpin(adev->gmc.pdb0_bo);
      85             : bo_pin_failure:
      86           0 :         amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
      87             : bo_reserve_failure:
      88           0 :         amdgpu_bo_unref(&adev->gmc.pdb0_bo);
      89           0 :         return r;
      90             : }
      91             : 
      92             : /**
      93             :  * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
      94             :  *
      95             :  * @bo: the BO to get the PDE for
      96             :  * @level: the level in the PD hirarchy
      97             :  * @addr: resulting addr
      98             :  * @flags: resulting flags
      99             :  *
     100             :  * Get the address and flags to be used for a PDE (Page Directory Entry).
     101             :  */
     102           0 : void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
     103             :                                uint64_t *addr, uint64_t *flags)
     104             : {
     105           0 :         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
     106             : 
     107           0 :         switch (bo->tbo.resource->mem_type) {
     108             :         case TTM_PL_TT:
     109           0 :                 *addr = bo->tbo.ttm->dma_address[0];
     110           0 :                 break;
     111             :         case TTM_PL_VRAM:
     112           0 :                 *addr = amdgpu_bo_gpu_offset(bo);
     113           0 :                 break;
     114             :         default:
     115           0 :                 *addr = 0;
     116           0 :                 break;
     117             :         }
     118           0 :         *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
     119           0 :         amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
     120           0 : }
     121             : 
     122             : /*
     123             :  * amdgpu_gmc_pd_addr - return the address of the root directory
     124             :  */
     125           0 : uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
     126             : {
     127           0 :         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
     128             :         uint64_t pd_addr;
     129             : 
     130             :         /* TODO: move that into ASIC specific code */
     131           0 :         if (adev->asic_type >= CHIP_VEGA10) {
     132           0 :                 uint64_t flags = AMDGPU_PTE_VALID;
     133             : 
     134           0 :                 amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
     135           0 :                 pd_addr |= flags;
     136             :         } else {
     137           0 :                 pd_addr = amdgpu_bo_gpu_offset(bo);
     138             :         }
     139           0 :         return pd_addr;
     140             : }
     141             : 
     142             : /**
     143             :  * amdgpu_gmc_set_pte_pde - update the page tables using CPU
     144             :  *
     145             :  * @adev: amdgpu_device pointer
     146             :  * @cpu_pt_addr: cpu address of the page table
     147             :  * @gpu_page_idx: entry in the page table to update
     148             :  * @addr: dst addr to write into pte/pde
     149             :  * @flags: access flags
     150             :  *
     151             :  * Update the page tables using CPU.
     152             :  */
     153           0 : int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
     154             :                                 uint32_t gpu_page_idx, uint64_t addr,
     155             :                                 uint64_t flags)
     156             : {
     157           0 :         void __iomem *ptr = (void *)cpu_pt_addr;
     158             :         uint64_t value;
     159             : 
     160             :         /*
     161             :          * The following is for PTE only. GART does not have PDEs.
     162             :         */
     163           0 :         value = addr & 0x0000FFFFFFFFF000ULL;
     164           0 :         value |= flags;
     165           0 :         writeq(value, ptr + (gpu_page_idx * 8));
     166             : 
     167           0 :         return 0;
     168             : }
     169             : 
     170             : /**
     171             :  * amdgpu_gmc_agp_addr - return the address in the AGP address space
     172             :  *
     173             :  * @bo: TTM BO which needs the address, must be in GTT domain
     174             :  *
     175             :  * Tries to figure out how to access the BO through the AGP aperture. Returns
     176             :  * AMDGPU_BO_INVALID_OFFSET if that is not possible.
     177             :  */
     178           0 : uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
     179             : {
     180           0 :         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
     181             : 
     182           0 :         if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
     183             :                 return AMDGPU_BO_INVALID_OFFSET;
     184             : 
     185           0 :         if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
     186             :                 return AMDGPU_BO_INVALID_OFFSET;
     187             : 
     188           0 :         return adev->gmc.agp_start + bo->ttm->dma_address[0];
     189             : }
     190             : 
     191             : /**
     192             :  * amdgpu_gmc_vram_location - try to find VRAM location
     193             :  *
     194             :  * @adev: amdgpu device structure holding all necessary information
     195             :  * @mc: memory controller structure holding memory information
     196             :  * @base: base address at which to put VRAM
     197             :  *
     198             :  * Function will try to place VRAM at base address provided
     199             :  * as parameter.
     200             :  */
     201           0 : void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
     202             :                               u64 base)
     203             : {
     204           0 :         uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
     205             : 
     206           0 :         mc->vram_start = base;
     207           0 :         mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
     208           0 :         if (limit && limit < mc->real_vram_size)
     209           0 :                 mc->real_vram_size = limit;
     210             : 
     211           0 :         if (mc->xgmi.num_physical_nodes == 0) {
     212           0 :                 mc->fb_start = mc->vram_start;
     213           0 :                 mc->fb_end = mc->vram_end;
     214             :         }
     215           0 :         dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
     216             :                         mc->mc_vram_size >> 20, mc->vram_start,
     217             :                         mc->vram_end, mc->real_vram_size >> 20);
     218           0 : }
     219             : 
     220             : /** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
     221             :  *
     222             :  * @adev: amdgpu device structure holding all necessary information
     223             :  * @mc: memory controller structure holding memory information
     224             :  *
     225             :  * This function is only used if use GART for FB translation. In such
     226             :  * case, we use sysvm aperture (vmid0 page tables) for both vram
     227             :  * and gart (aka system memory) access.
     228             :  *
     229             :  * GPUVM (and our organization of vmid0 page tables) require sysvm
     230             :  * aperture to be placed at a location aligned with 8 times of native
     231             :  * page size. For example, if vm_context0_cntl.page_table_block_size
     232             :  * is 12, then native page size is 8G (2M*2^12), sysvm should start
     233             :  * with a 64G aligned address. For simplicity, we just put sysvm at
     234             :  * address 0. So vram start at address 0 and gart is right after vram.
     235             :  */
     236           0 : void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
     237             : {
     238           0 :         u64 hive_vram_start = 0;
     239           0 :         u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
     240           0 :         mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
     241           0 :         mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
     242           0 :         mc->gart_start = hive_vram_end + 1;
     243           0 :         mc->gart_end = mc->gart_start + mc->gart_size - 1;
     244           0 :         mc->fb_start = hive_vram_start;
     245           0 :         mc->fb_end = hive_vram_end;
     246           0 :         dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
     247             :                         mc->mc_vram_size >> 20, mc->vram_start,
     248             :                         mc->vram_end, mc->real_vram_size >> 20);
     249           0 :         dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
     250             :                         mc->gart_size >> 20, mc->gart_start, mc->gart_end);
     251           0 : }
     252             : 
     253             : /**
     254             :  * amdgpu_gmc_gart_location - try to find GART location
     255             :  *
     256             :  * @adev: amdgpu device structure holding all necessary information
     257             :  * @mc: memory controller structure holding memory information
     258             :  *
     259             :  * Function will place try to place GART before or after VRAM.
     260             :  * If GART size is bigger than space left then we ajust GART size.
     261             :  * Thus function will never fails.
     262             :  */
     263           0 : void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
     264             : {
     265           0 :         const uint64_t four_gb = 0x100000000ULL;
     266             :         u64 size_af, size_bf;
     267             :         /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
     268           0 :         u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
     269             : 
     270             :         /* VCE doesn't like it when BOs cross a 4GB segment, so align
     271             :          * the GART base on a 4GB boundary as well.
     272             :          */
     273           0 :         size_bf = mc->fb_start;
     274           0 :         size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
     275             : 
     276           0 :         if (mc->gart_size > max(size_bf, size_af)) {
     277           0 :                 dev_warn(adev->dev, "limiting GART\n");
     278           0 :                 mc->gart_size = max(size_bf, size_af);
     279             :         }
     280             : 
     281           0 :         if ((size_bf >= mc->gart_size && size_bf < size_af) ||
     282             :             (size_af < mc->gart_size))
     283           0 :                 mc->gart_start = 0;
     284             :         else
     285           0 :                 mc->gart_start = max_mc_address - mc->gart_size + 1;
     286             : 
     287           0 :         mc->gart_start &= ~(four_gb - 1);
     288           0 :         mc->gart_end = mc->gart_start + mc->gart_size - 1;
     289           0 :         dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
     290             :                         mc->gart_size >> 20, mc->gart_start, mc->gart_end);
     291           0 : }
     292             : 
     293             : /**
     294             :  * amdgpu_gmc_agp_location - try to find AGP location
     295             :  * @adev: amdgpu device structure holding all necessary information
     296             :  * @mc: memory controller structure holding memory information
     297             :  *
     298             :  * Function will place try to find a place for the AGP BAR in the MC address
     299             :  * space.
     300             :  *
     301             :  * AGP BAR will be assigned the largest available hole in the address space.
     302             :  * Should be called after VRAM and GART locations are setup.
     303             :  */
     304           0 : void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
     305             : {
     306           0 :         const uint64_t sixteen_gb = 1ULL << 34;
     307           0 :         const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
     308             :         u64 size_af, size_bf;
     309             : 
     310           0 :         if (amdgpu_sriov_vf(adev)) {
     311           0 :                 mc->agp_start = 0xffffffffffff;
     312           0 :                 mc->agp_end = 0x0;
     313           0 :                 mc->agp_size = 0;
     314             : 
     315           0 :                 return;
     316             :         }
     317             : 
     318           0 :         if (mc->fb_start > mc->gart_start) {
     319           0 :                 size_bf = (mc->fb_start & sixteen_gb_mask) -
     320           0 :                         ALIGN(mc->gart_end + 1, sixteen_gb);
     321           0 :                 size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
     322             :         } else {
     323           0 :                 size_bf = mc->fb_start & sixteen_gb_mask;
     324           0 :                 size_af = (mc->gart_start & sixteen_gb_mask) -
     325           0 :                         ALIGN(mc->fb_end + 1, sixteen_gb);
     326             :         }
     327             : 
     328           0 :         if (size_bf > size_af) {
     329           0 :                 mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
     330           0 :                 mc->agp_size = size_bf;
     331             :         } else {
     332           0 :                 mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
     333           0 :                 mc->agp_size = size_af;
     334             :         }
     335             : 
     336           0 :         mc->agp_end = mc->agp_start + mc->agp_size - 1;
     337           0 :         dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
     338             :                         mc->agp_size >> 20, mc->agp_start, mc->agp_end);
     339             : }
     340             : 
     341             : /**
     342             :  * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
     343             :  *
     344             :  * @addr: 48 bit physical address, page aligned (36 significant bits)
     345             :  * @pasid: 16 bit process address space identifier
     346             :  */
     347             : static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
     348             : {
     349           0 :         return addr << 4 | pasid;
     350             : }
     351             : 
     352             : /**
     353             :  * amdgpu_gmc_filter_faults - filter VM faults
     354             :  *
     355             :  * @adev: amdgpu device structure
     356             :  * @ih: interrupt ring that the fault received from
     357             :  * @addr: address of the VM fault
     358             :  * @pasid: PASID of the process causing the fault
     359             :  * @timestamp: timestamp of the fault
     360             :  *
     361             :  * Returns:
     362             :  * True if the fault was filtered and should not be processed further.
     363             :  * False if the fault is a new one and needs to be handled.
     364             :  */
     365           0 : bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
     366             :                               struct amdgpu_ih_ring *ih, uint64_t addr,
     367             :                               uint16_t pasid, uint64_t timestamp)
     368             : {
     369           0 :         struct amdgpu_gmc *gmc = &adev->gmc;
     370           0 :         uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
     371             :         struct amdgpu_gmc_fault *fault;
     372             :         uint32_t hash;
     373             : 
     374             :         /* Stale retry fault if timestamp goes backward */
     375           0 :         if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
     376             :                 return true;
     377             : 
     378             :         /* If we don't have space left in the ring buffer return immediately */
     379           0 :         stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
     380             :                 AMDGPU_GMC_FAULT_TIMEOUT;
     381           0 :         if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
     382             :                 return true;
     383             : 
     384             :         /* Try to find the fault in the hash */
     385           0 :         hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
     386           0 :         fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
     387           0 :         while (fault->timestamp >= stamp) {
     388             :                 uint64_t tmp;
     389             : 
     390           0 :                 if (atomic64_read(&fault->key) == key)
     391             :                         return true;
     392             : 
     393           0 :                 tmp = fault->timestamp;
     394           0 :                 fault = &gmc->fault_ring[fault->next];
     395             : 
     396             :                 /* Check if the entry was reused */
     397           0 :                 if (fault->timestamp >= tmp)
     398             :                         break;
     399             :         }
     400             : 
     401             :         /* Add the fault to the ring */
     402           0 :         fault = &gmc->fault_ring[gmc->last_fault];
     403           0 :         atomic64_set(&fault->key, key);
     404           0 :         fault->timestamp = timestamp;
     405             : 
     406             :         /* And update the hash */
     407           0 :         fault->next = gmc->fault_hash[hash].idx;
     408           0 :         gmc->fault_hash[hash].idx = gmc->last_fault++;
     409           0 :         return false;
     410             : }
     411             : 
     412             : /**
     413             :  * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
     414             :  *
     415             :  * @adev: amdgpu device structure
     416             :  * @addr: address of the VM fault
     417             :  * @pasid: PASID of the process causing the fault
     418             :  *
     419             :  * Remove the address from fault filter, then future vm fault on this address
     420             :  * will pass to retry fault handler to recover.
     421             :  */
     422           0 : void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
     423             :                                      uint16_t pasid)
     424             : {
     425           0 :         struct amdgpu_gmc *gmc = &adev->gmc;
     426           0 :         uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
     427             :         struct amdgpu_gmc_fault *fault;
     428             :         uint32_t hash;
     429             :         uint64_t tmp;
     430             : 
     431           0 :         hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
     432           0 :         fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
     433             :         do {
     434           0 :                 if (atomic64_cmpxchg(&fault->key, key, 0) == key)
     435             :                         break;
     436             : 
     437           0 :                 tmp = fault->timestamp;
     438           0 :                 fault = &gmc->fault_ring[fault->next];
     439           0 :         } while (fault->timestamp < tmp);
     440           0 : }
     441             : 
     442           0 : int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
     443             : {
     444           0 :         if (!adev->gmc.xgmi.connected_to_cpu) {
     445           0 :                 adev->gmc.xgmi.ras = &xgmi_ras;
     446           0 :                 amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
     447           0 :                 adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm;
     448             :         }
     449             : 
     450           0 :         return 0;
     451             : }
     452             : 
     453           0 : int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
     454             : {
     455           0 :         return 0;
     456             : }
     457             : 
     458           0 : void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
     459             : {
     460             : 
     461           0 : }
     462             : 
     463             :         /*
     464             :          * The latest engine allocation on gfx9/10 is:
     465             :          * Engine 2, 3: firmware
     466             :          * Engine 0, 1, 4~16: amdgpu ring,
     467             :          *                    subject to change when ring number changes
     468             :          * Engine 17: Gart flushes
     469             :          */
     470             : #define GFXHUB_FREE_VM_INV_ENGS_BITMAP          0x1FFF3
     471             : #define MMHUB_FREE_VM_INV_ENGS_BITMAP           0x1FFF3
     472             : 
     473           0 : int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
     474             : {
     475             :         struct amdgpu_ring *ring;
     476           0 :         unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
     477             :                 {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
     478             :                 GFXHUB_FREE_VM_INV_ENGS_BITMAP};
     479             :         unsigned i;
     480             :         unsigned vmhub, inv_eng;
     481             : 
     482           0 :         for (i = 0; i < adev->num_rings; ++i) {
     483           0 :                 ring = adev->rings[i];
     484           0 :                 vmhub = ring->funcs->vmhub;
     485             : 
     486           0 :                 if (ring == &adev->mes.ring)
     487           0 :                         continue;
     488             : 
     489           0 :                 inv_eng = ffs(vm_inv_engs[vmhub]);
     490           0 :                 if (!inv_eng) {
     491           0 :                         dev_err(adev->dev, "no VM inv eng for ring %s\n",
     492             :                                 ring->name);
     493           0 :                         return -EINVAL;
     494             :                 }
     495             : 
     496           0 :                 ring->vm_inv_eng = inv_eng - 1;
     497           0 :                 vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
     498             : 
     499           0 :                 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
     500             :                          ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
     501             :         }
     502             : 
     503             :         return 0;
     504             : }
     505             : 
     506             : /**
     507             :  * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
     508             :  * @adev: amdgpu_device pointer
     509             :  *
     510             :  * Check and set if an the device @adev supports Trusted Memory
     511             :  * Zones (TMZ).
     512             :  */
     513           0 : void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
     514             : {
     515           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
     516             :         /* RAVEN */
     517             :         case IP_VERSION(9, 2, 2):
     518             :         case IP_VERSION(9, 1, 0):
     519             :         /* RENOIR looks like RAVEN */
     520             :         case IP_VERSION(9, 3, 0):
     521             :         /* GC 10.3.7 */
     522             :         case IP_VERSION(10, 3, 7):
     523           0 :                 if (amdgpu_tmz == 0) {
     524           0 :                         adev->gmc.tmz_enabled = false;
     525           0 :                         dev_info(adev->dev,
     526             :                                  "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
     527             :                 } else {
     528           0 :                         adev->gmc.tmz_enabled = true;
     529           0 :                         dev_info(adev->dev,
     530             :                                  "Trusted Memory Zone (TMZ) feature enabled\n");
     531             :                 }
     532             :                 break;
     533             :         case IP_VERSION(10, 1, 10):
     534             :         case IP_VERSION(10, 1, 1):
     535             :         case IP_VERSION(10, 1, 2):
     536             :         case IP_VERSION(10, 1, 3):
     537             :         case IP_VERSION(10, 3, 0):
     538             :         case IP_VERSION(10, 3, 2):
     539             :         case IP_VERSION(10, 3, 4):
     540             :         case IP_VERSION(10, 3, 5):
     541             :         /* VANGOGH */
     542             :         case IP_VERSION(10, 3, 1):
     543             :         /* YELLOW_CARP*/
     544             :         case IP_VERSION(10, 3, 3):
     545             :                 /* Don't enable it by default yet.
     546             :                  */
     547           0 :                 if (amdgpu_tmz < 1) {
     548           0 :                         adev->gmc.tmz_enabled = false;
     549           0 :                         dev_info(adev->dev,
     550             :                                  "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
     551             :                 } else {
     552           0 :                         adev->gmc.tmz_enabled = true;
     553           0 :                         dev_info(adev->dev,
     554             :                                  "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
     555             :                 }
     556             :                 break;
     557             :         default:
     558           0 :                 adev->gmc.tmz_enabled = false;
     559           0 :                 dev_info(adev->dev,
     560             :                          "Trusted Memory Zone (TMZ) feature not supported\n");
     561           0 :                 break;
     562             :         }
     563           0 : }
     564             : 
     565             : /**
     566             :  * amdgpu_gmc_noretry_set -- set per asic noretry defaults
     567             :  * @adev: amdgpu_device pointer
     568             :  *
     569             :  * Set a per asic default for the no-retry parameter.
     570             :  *
     571             :  */
     572           0 : void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
     573             : {
     574           0 :         struct amdgpu_gmc *gmc = &adev->gmc;
     575             : 
     576           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
     577             :         case IP_VERSION(9, 0, 1):
     578             :         case IP_VERSION(9, 3, 0):
     579             :         case IP_VERSION(9, 4, 0):
     580             :         case IP_VERSION(9, 4, 1):
     581             :         case IP_VERSION(9, 4, 2):
     582             :         case IP_VERSION(10, 3, 3):
     583             :         case IP_VERSION(10, 3, 4):
     584             :         case IP_VERSION(10, 3, 5):
     585             :         case IP_VERSION(10, 3, 6):
     586             :         case IP_VERSION(10, 3, 7):
     587             :                 /*
     588             :                  * noretry = 0 will cause kfd page fault tests fail
     589             :                  * for some ASICs, so set default to 1 for these ASICs.
     590             :                  */
     591           0 :                 if (amdgpu_noretry == -1)
     592           0 :                         gmc->noretry = 1;
     593             :                 else
     594           0 :                         gmc->noretry = amdgpu_noretry;
     595             :                 break;
     596             :         default:
     597             :                 /* Raven currently has issues with noretry
     598             :                  * regardless of what we decide for other
     599             :                  * asics, we should leave raven with
     600             :                  * noretry = 0 until we root cause the
     601             :                  * issues.
     602             :                  *
     603             :                  * default this to 0 for now, but we may want
     604             :                  * to change this in the future for certain
     605             :                  * GPUs as it can increase performance in
     606             :                  * certain cases.
     607             :                  */
     608           0 :                 if (amdgpu_noretry == -1)
     609           0 :                         gmc->noretry = 0;
     610             :                 else
     611           0 :                         gmc->noretry = amdgpu_noretry;
     612             :                 break;
     613             :         }
     614           0 : }
     615             : 
     616           0 : void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
     617             :                                    bool enable)
     618             : {
     619             :         struct amdgpu_vmhub *hub;
     620             :         u32 tmp, reg, i;
     621             : 
     622           0 :         hub = &adev->vmhub[hub_type];
     623           0 :         for (i = 0; i < 16; i++) {
     624           0 :                 reg = hub->vm_context0_cntl + hub->ctx_distance * i;
     625             : 
     626           0 :                 tmp = (hub_type == AMDGPU_GFXHUB_0) ?
     627           0 :                         RREG32_SOC15_IP(GC, reg) :
     628           0 :                         RREG32_SOC15_IP(MMHUB, reg);
     629             : 
     630           0 :                 if (enable)
     631           0 :                         tmp |= hub->vm_cntx_cntl_vm_fault;
     632             :                 else
     633           0 :                         tmp &= ~hub->vm_cntx_cntl_vm_fault;
     634             : 
     635             :                 (hub_type == AMDGPU_GFXHUB_0) ?
     636           0 :                         WREG32_SOC15_IP(GC, reg, tmp) :
     637           0 :                         WREG32_SOC15_IP(MMHUB, reg, tmp);
     638             :         }
     639           0 : }
     640             : 
     641           0 : void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
     642             : {
     643             :         unsigned size;
     644             : 
     645             :         /*
     646             :          * Some ASICs need to reserve a region of video memory to avoid access
     647             :          * from driver
     648             :          */
     649           0 :         adev->mman.stolen_reserved_offset = 0;
     650           0 :         adev->mman.stolen_reserved_size = 0;
     651             : 
     652             :         /*
     653             :          * TODO:
     654             :          * Currently there is a bug where some memory client outside
     655             :          * of the driver writes to first 8M of VRAM on S3 resume,
     656             :          * this overrides GART which by default gets placed in first 8M and
     657             :          * causes VM_FAULTS once GTT is accessed.
     658             :          * Keep the stolen memory reservation until the while this is not solved.
     659             :          */
     660           0 :         switch (adev->asic_type) {
     661             :         case CHIP_VEGA10:
     662           0 :                 adev->mman.keep_stolen_vga_memory = true;
     663             :                 /*
     664             :                  * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
     665             :                  */
     666             : #ifdef CONFIG_X86
     667             :                 if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
     668             :                         adev->mman.stolen_reserved_offset = 0x500000;
     669             :                         adev->mman.stolen_reserved_size = 0x200000;
     670             :                 }
     671             : #endif
     672           0 :                 break;
     673             :         case CHIP_RAVEN:
     674             :         case CHIP_RENOIR:
     675           0 :                 adev->mman.keep_stolen_vga_memory = true;
     676           0 :                 break;
     677             :         case CHIP_YELLOW_CARP:
     678           0 :                 if (amdgpu_discovery == 0) {
     679           0 :                         adev->mman.stolen_reserved_offset = 0x1ffb0000;
     680           0 :                         adev->mman.stolen_reserved_size = 64 * PAGE_SIZE;
     681             :                 }
     682             :                 break;
     683             :         default:
     684           0 :                 adev->mman.keep_stolen_vga_memory = false;
     685           0 :                 break;
     686             :         }
     687             : 
     688           0 :         if (amdgpu_sriov_vf(adev) ||
     689           0 :             !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
     690             :                 size = 0;
     691             :         } else {
     692           0 :                 size = amdgpu_gmc_get_vbios_fb_size(adev);
     693             : 
     694           0 :                 if (adev->mman.keep_stolen_vga_memory)
     695           0 :                         size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
     696             :         }
     697             : 
     698             :         /* set to 0 if the pre-OS buffer uses up most of vram */
     699           0 :         if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
     700           0 :                 size = 0;
     701             : 
     702           0 :         if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
     703           0 :                 adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
     704           0 :                 adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
     705             :         } else {
     706           0 :                 adev->mman.stolen_vga_size = size;
     707           0 :                 adev->mman.stolen_extended_size = 0;
     708             :         }
     709           0 : }
     710             : 
     711             : /**
     712             :  * amdgpu_gmc_init_pdb0 - initialize PDB0
     713             :  *
     714             :  * @adev: amdgpu_device pointer
     715             :  *
     716             :  * This function is only used when GART page table is used
     717             :  * for FB address translatioin. In such a case, we construct
     718             :  * a 2-level system VM page table: PDB0->PTB, to cover both
     719             :  * VRAM of the hive and system memory.
     720             :  *
     721             :  * PDB0 is static, initialized once on driver initialization.
     722             :  * The first n entries of PDB0 are used as PTE by setting
     723             :  * P bit to 1, pointing to VRAM. The n+1'th entry points
     724             :  * to a big PTB covering system memory.
     725             :  *
     726             :  */
     727           0 : void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
     728             : {
     729             :         int i;
     730           0 :         uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
     731             :         /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
     732             :          */
     733           0 :         u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
     734           0 :         u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
     735           0 :         u64 vram_addr = adev->vm_manager.vram_base_offset -
     736           0 :                 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
     737           0 :         u64 vram_end = vram_addr + vram_size;
     738           0 :         u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
     739             :         int idx;
     740             : 
     741           0 :         if (!drm_dev_enter(adev_to_drm(adev), &idx))
     742           0 :                 return;
     743             : 
     744           0 :         flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
     745           0 :         flags |= AMDGPU_PTE_WRITEABLE;
     746           0 :         flags |= AMDGPU_PTE_SNOOPED;
     747           0 :         flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
     748           0 :         flags |= AMDGPU_PDE_PTE;
     749             : 
     750             :         /* The first n PDE0 entries are used as PTE,
     751             :          * pointing to vram
     752             :          */
     753           0 :         for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
     754           0 :                 amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
     755             : 
     756             :         /* The n+1'th PDE0 entry points to a huge
     757             :          * PTB who has more than 512 entries each
     758             :          * pointing to a 4K system page
     759             :          */
     760           0 :         flags = AMDGPU_PTE_VALID;
     761           0 :         flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
     762             :         /* Requires gart_ptb_gpu_pa to be 4K aligned */
     763           0 :         amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
     764           0 :         drm_dev_exit(idx);
     765             : }
     766             : 
     767             : /**
     768             :  * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
     769             :  * address
     770             :  *
     771             :  * @adev: amdgpu_device pointer
     772             :  * @mc_addr: MC address of buffer
     773             :  */
     774           0 : uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
     775             : {
     776           0 :         return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
     777             : }
     778             : 
     779             : /**
     780             :  * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
     781             :  * GPU's view
     782             :  *
     783             :  * @adev: amdgpu_device pointer
     784             :  * @bo: amdgpu buffer object
     785             :  */
     786           0 : uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
     787             : {
     788           0 :         return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
     789             : }
     790             : 
     791             : /**
     792             :  * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
     793             :  * from CPU's view
     794             :  *
     795             :  * @adev: amdgpu_device pointer
     796             :  * @bo: amdgpu buffer object
     797             :  */
     798           0 : uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
     799             : {
     800           0 :         return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
     801             : }
     802             : 
     803           0 : int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
     804             : {
     805           0 :         struct amdgpu_bo *vram_bo = NULL;
     806           0 :         uint64_t vram_gpu = 0;
     807           0 :         void *vram_ptr = NULL;
     808             : 
     809           0 :         int ret, size = 0x100000;
     810             :         uint8_t cptr[10];
     811             : 
     812           0 :         ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
     813             :                                 AMDGPU_GEM_DOMAIN_VRAM,
     814             :                                 &vram_bo,
     815             :                                 &vram_gpu,
     816             :                                 &vram_ptr);
     817           0 :         if (ret)
     818             :                 return ret;
     819             : 
     820           0 :         memset(vram_ptr, 0x86, size);
     821           0 :         memset(cptr, 0x86, 10);
     822             : 
     823             :         /**
     824             :          * Check the start, the mid, and the end of the memory if the content of
     825             :          * each byte is the pattern "0x86". If yes, we suppose the vram bo is
     826             :          * workable.
     827             :          *
     828             :          * Note: If check the each byte of whole 1M bo, it will cost too many
     829             :          * seconds, so here, we just pick up three parts for emulation.
     830             :          */
     831           0 :         ret = memcmp(vram_ptr, cptr, 10);
     832           0 :         if (ret)
     833             :                 return ret;
     834             : 
     835           0 :         ret = memcmp(vram_ptr + (size / 2), cptr, 10);
     836           0 :         if (ret)
     837             :                 return ret;
     838             : 
     839           0 :         ret = memcmp(vram_ptr + size - 10, cptr, 10);
     840           0 :         if (ret)
     841             :                 return ret;
     842             : 
     843           0 :         amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
     844             :                         &vram_ptr);
     845             : 
     846           0 :         return 0;
     847             : }

Generated by: LCOV version 1.14