LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - amdgpu_vm_pt.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 293 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 16 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0 OR MIT
       2             : /*
       3             :  * Copyright 2022 Advanced Micro Devices, Inc.
       4             :  *
       5             :  * Permission is hereby granted, free of charge, to any person obtaining a
       6             :  * copy of this software and associated documentation files (the "Software"),
       7             :  * to deal in the Software without restriction, including without limitation
       8             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       9             :  * and/or sell copies of the Software, and to permit persons to whom the
      10             :  * Software is furnished to do so, subject to the following conditions:
      11             :  *
      12             :  * The above copyright notice and this permission notice shall be included in
      13             :  * all copies or substantial portions of the Software.
      14             :  *
      15             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      16             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      17             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      18             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      19             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      20             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      21             :  * OTHER DEALINGS IN THE SOFTWARE.
      22             :  */
      23             : 
      24             : #include <drm/drm_drv.h>
      25             : 
      26             : #include "amdgpu.h"
      27             : #include "amdgpu_trace.h"
      28             : #include "amdgpu_vm.h"
      29             : 
      30             : /*
      31             :  * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
      32             :  */
      33             : struct amdgpu_vm_pt_cursor {
      34             :         uint64_t pfn;
      35             :         struct amdgpu_vm_bo_base *parent;
      36             :         struct amdgpu_vm_bo_base *entry;
      37             :         unsigned int level;
      38             : };
      39             : 
      40             : /**
      41             :  * amdgpu_vm_pt_level_shift - return the addr shift for each level
      42             :  *
      43             :  * @adev: amdgpu_device pointer
      44             :  * @level: VMPT level
      45             :  *
      46             :  * Returns:
      47             :  * The number of bits the pfn needs to be right shifted for a level.
      48             :  */
      49             : static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
      50             :                                              unsigned int level)
      51             : {
      52           0 :         switch (level) {
      53             :         case AMDGPU_VM_PDB2:
      54             :         case AMDGPU_VM_PDB1:
      55             :         case AMDGPU_VM_PDB0:
      56           0 :                 return 9 * (AMDGPU_VM_PDB0 - level) +
      57           0 :                         adev->vm_manager.block_size;
      58             :         case AMDGPU_VM_PTB:
      59             :                 return 0;
      60             :         default:
      61             :                 return ~0;
      62             :         }
      63             : }
      64             : 
      65             : /**
      66             :  * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
      67             :  *
      68             :  * @adev: amdgpu_device pointer
      69             :  * @level: VMPT level
      70             :  *
      71             :  * Returns:
      72             :  * The number of entries in a page directory or page table.
      73             :  */
      74           0 : static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
      75             :                                              unsigned int level)
      76             : {
      77             :         unsigned int shift;
      78             : 
      79           0 :         shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
      80           0 :         if (level == adev->vm_manager.root_level)
      81             :                 /* For the root directory */
      82           0 :                 return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
      83           0 :                         >> shift;
      84           0 :         else if (level != AMDGPU_VM_PTB)
      85             :                 /* Everything in between */
      86             :                 return 512;
      87             : 
      88             :         /* For the page tables on the leaves */
      89           0 :         return AMDGPU_VM_PTE_COUNT(adev);
      90             : }
      91             : 
      92             : /**
      93             :  * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
      94             :  *
      95             :  * @adev: amdgpu_device pointer
      96             :  *
      97             :  * Returns:
      98             :  * The number of entries in the root page directory which needs the ATS setting.
      99             :  */
     100             : static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
     101             : {
     102             :         unsigned int shift;
     103             : 
     104           0 :         shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
     105           0 :         return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
     106             : }
     107             : 
     108             : /**
     109             :  * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
     110             :  *
     111             :  * @adev: amdgpu_device pointer
     112             :  * @level: VMPT level
     113             :  *
     114             :  * Returns:
     115             :  * The mask to extract the entry number of a PD/PT from an address.
     116             :  */
     117             : static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
     118             :                                           unsigned int level)
     119             : {
     120           0 :         if (level <= adev->vm_manager.root_level)
     121             :                 return 0xffffffff;
     122           0 :         else if (level != AMDGPU_VM_PTB)
     123             :                 return 0x1ff;
     124             :         else
     125           0 :                 return AMDGPU_VM_PTE_COUNT(adev) - 1;
     126             : }
     127             : 
     128             : /**
     129             :  * amdgpu_vm_pt_size - returns the size of the page table in bytes
     130             :  *
     131             :  * @adev: amdgpu_device pointer
     132             :  * @level: VMPT level
     133             :  *
     134             :  * Returns:
     135             :  * The size of the BO for a page directory or page table in bytes.
     136             :  */
     137             : static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
     138             :                                       unsigned int level)
     139             : {
     140           0 :         return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
     141             : }
     142             : 
     143             : /**
     144             :  * amdgpu_vm_pt_parent - get the parent page directory
     145             :  *
     146             :  * @pt: child page table
     147             :  *
     148             :  * Helper to get the parent entry for the child page table. NULL if we are at
     149             :  * the root page directory.
     150             :  */
     151             : static struct amdgpu_vm_bo_base *
     152             : amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
     153             : {
     154           0 :         struct amdgpu_bo *parent = pt->bo->parent;
     155             : 
     156           0 :         if (!parent)
     157             :                 return NULL;
     158             : 
     159           0 :         return parent->vm_bo;
     160             : }
     161             : 
     162             : /**
     163             :  * amdgpu_vm_pt_start - start PD/PT walk
     164             :  *
     165             :  * @adev: amdgpu_device pointer
     166             :  * @vm: amdgpu_vm structure
     167             :  * @start: start address of the walk
     168             :  * @cursor: state to initialize
     169             :  *
     170             :  * Initialize a amdgpu_vm_pt_cursor to start a walk.
     171             :  */
     172             : static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
     173             :                                struct amdgpu_vm *vm, uint64_t start,
     174             :                                struct amdgpu_vm_pt_cursor *cursor)
     175             : {
     176           0 :         cursor->pfn = start;
     177           0 :         cursor->parent = NULL;
     178           0 :         cursor->entry = &vm->root;
     179           0 :         cursor->level = adev->vm_manager.root_level;
     180             : }
     181             : 
     182             : /**
     183             :  * amdgpu_vm_pt_descendant - go to child node
     184             :  *
     185             :  * @adev: amdgpu_device pointer
     186             :  * @cursor: current state
     187             :  *
     188             :  * Walk to the child node of the current node.
     189             :  * Returns:
     190             :  * True if the walk was possible, false otherwise.
     191             :  */
     192           0 : static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
     193             :                                     struct amdgpu_vm_pt_cursor *cursor)
     194             : {
     195             :         unsigned int mask, shift, idx;
     196             : 
     197           0 :         if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
     198           0 :             !cursor->entry->bo)
     199             :                 return false;
     200             : 
     201           0 :         mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
     202           0 :         shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
     203             : 
     204           0 :         ++cursor->level;
     205           0 :         idx = (cursor->pfn >> shift) & mask;
     206           0 :         cursor->parent = cursor->entry;
     207           0 :         cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
     208           0 :         return true;
     209             : }
     210             : 
     211             : /**
     212             :  * amdgpu_vm_pt_sibling - go to sibling node
     213             :  *
     214             :  * @adev: amdgpu_device pointer
     215             :  * @cursor: current state
     216             :  *
     217             :  * Walk to the sibling node of the current node.
     218             :  * Returns:
     219             :  * True if the walk was possible, false otherwise.
     220             :  */
     221           0 : static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
     222             :                                  struct amdgpu_vm_pt_cursor *cursor)
     223             : {
     224             : 
     225             :         unsigned int shift, num_entries;
     226             :         struct amdgpu_bo_vm *parent;
     227             : 
     228             :         /* Root doesn't have a sibling */
     229           0 :         if (!cursor->parent)
     230             :                 return false;
     231             : 
     232             :         /* Go to our parents and see if we got a sibling */
     233           0 :         shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
     234           0 :         num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
     235           0 :         parent = to_amdgpu_bo_vm(cursor->parent->bo);
     236             : 
     237           0 :         if (cursor->entry == &parent->entries[num_entries - 1])
     238             :                 return false;
     239             : 
     240           0 :         cursor->pfn += 1ULL << shift;
     241           0 :         cursor->pfn &= ~((1ULL << shift) - 1);
     242           0 :         ++cursor->entry;
     243           0 :         return true;
     244             : }
     245             : 
     246             : /**
     247             :  * amdgpu_vm_pt_ancestor - go to parent node
     248             :  *
     249             :  * @cursor: current state
     250             :  *
     251             :  * Walk to the parent node of the current node.
     252             :  * Returns:
     253             :  * True if the walk was possible, false otherwise.
     254             :  */
     255             : static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
     256             : {
     257           0 :         if (!cursor->parent)
     258             :                 return false;
     259             : 
     260           0 :         --cursor->level;
     261           0 :         cursor->entry = cursor->parent;
     262           0 :         cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
     263             :         return true;
     264             : }
     265             : 
     266             : /**
     267             :  * amdgpu_vm_pt_next - get next PD/PT in hieratchy
     268             :  *
     269             :  * @adev: amdgpu_device pointer
     270             :  * @cursor: current state
     271             :  *
     272             :  * Walk the PD/PT tree to the next node.
     273             :  */
     274           0 : static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
     275             :                               struct amdgpu_vm_pt_cursor *cursor)
     276             : {
     277             :         /* First try a newborn child */
     278           0 :         if (amdgpu_vm_pt_descendant(adev, cursor))
     279             :                 return;
     280             : 
     281             :         /* If that didn't worked try to find a sibling */
     282           0 :         while (!amdgpu_vm_pt_sibling(adev, cursor)) {
     283             :                 /* No sibling, go to our parents and grandparents */
     284           0 :                 if (!amdgpu_vm_pt_ancestor(cursor)) {
     285           0 :                         cursor->pfn = ~0ll;
     286           0 :                         return;
     287             :                 }
     288             :         }
     289             : }
     290             : 
     291             : /**
     292             :  * amdgpu_vm_pt_first_dfs - start a deep first search
     293             :  *
     294             :  * @adev: amdgpu_device structure
     295             :  * @vm: amdgpu_vm structure
     296             :  * @start: optional cursor to start with
     297             :  * @cursor: state to initialize
     298             :  *
     299             :  * Starts a deep first traversal of the PD/PT tree.
     300             :  */
     301           0 : static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
     302             :                                    struct amdgpu_vm *vm,
     303             :                                    struct amdgpu_vm_pt_cursor *start,
     304             :                                    struct amdgpu_vm_pt_cursor *cursor)
     305             : {
     306           0 :         if (start)
     307           0 :                 *cursor = *start;
     308             :         else
     309           0 :                 amdgpu_vm_pt_start(adev, vm, 0, cursor);
     310             : 
     311           0 :         while (amdgpu_vm_pt_descendant(adev, cursor))
     312             :                 ;
     313           0 : }
     314             : 
     315             : /**
     316             :  * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
     317             :  *
     318             :  * @start: starting point for the search
     319             :  * @entry: current entry
     320             :  *
     321             :  * Returns:
     322             :  * True when the search should continue, false otherwise.
     323             :  */
     324             : static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
     325             :                                       struct amdgpu_vm_bo_base *entry)
     326             : {
     327           0 :         return entry && (!start || entry != start->entry);
     328             : }
     329             : 
     330             : /**
     331             :  * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
     332             :  *
     333             :  * @adev: amdgpu_device structure
     334             :  * @cursor: current state
     335             :  *
     336             :  * Move the cursor to the next node in a deep first search.
     337             :  */
     338           0 : static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
     339             :                                   struct amdgpu_vm_pt_cursor *cursor)
     340             : {
     341           0 :         if (!cursor->entry)
     342             :                 return;
     343             : 
     344           0 :         if (!cursor->parent)
     345           0 :                 cursor->entry = NULL;
     346           0 :         else if (amdgpu_vm_pt_sibling(adev, cursor))
     347           0 :                 while (amdgpu_vm_pt_descendant(adev, cursor))
     348             :                         ;
     349             :         else
     350             :                 amdgpu_vm_pt_ancestor(cursor);
     351             : }
     352             : 
     353             : /*
     354             :  * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
     355             :  */
     356             : #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)          \
     357             :         for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)),              \
     358             :              (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
     359             :              amdgpu_vm_pt_continue_dfs((start), (entry));                       \
     360             :              (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
     361             : 
     362             : /**
     363             :  * amdgpu_vm_pt_clear - initially clear the PDs/PTs
     364             :  *
     365             :  * @adev: amdgpu_device pointer
     366             :  * @vm: VM to clear BO from
     367             :  * @vmbo: BO to clear
     368             :  * @immediate: use an immediate update
     369             :  *
     370             :  * Root PD needs to be reserved when calling this.
     371             :  *
     372             :  * Returns:
     373             :  * 0 on success, errno otherwise.
     374             :  */
     375           0 : int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
     376             :                        struct amdgpu_bo_vm *vmbo, bool immediate)
     377             : {
     378           0 :         unsigned int level = adev->vm_manager.root_level;
     379           0 :         struct ttm_operation_ctx ctx = { true, false };
     380             :         struct amdgpu_vm_update_params params;
     381           0 :         struct amdgpu_bo *ancestor = &vmbo->bo;
     382             :         unsigned int entries, ats_entries;
     383           0 :         struct amdgpu_bo *bo = &vmbo->bo;
     384             :         uint64_t addr;
     385             :         int r, idx;
     386             : 
     387             :         /* Figure out our place in the hierarchy */
     388           0 :         if (ancestor->parent) {
     389           0 :                 ++level;
     390           0 :                 while (ancestor->parent->parent) {
     391           0 :                         ++level;
     392           0 :                         ancestor = ancestor->parent;
     393             :                 }
     394             :         }
     395             : 
     396           0 :         entries = amdgpu_bo_size(bo) / 8;
     397           0 :         if (!vm->pte_support_ats) {
     398             :                 ats_entries = 0;
     399             : 
     400           0 :         } else if (!bo->parent) {
     401           0 :                 ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
     402           0 :                 ats_entries = min(ats_entries, entries);
     403           0 :                 entries -= ats_entries;
     404             : 
     405             :         } else {
     406             :                 struct amdgpu_vm_bo_base *pt;
     407             : 
     408           0 :                 pt = ancestor->vm_bo;
     409           0 :                 ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
     410           0 :                 if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
     411             :                     ats_entries) {
     412             :                         ats_entries = 0;
     413             :                 } else {
     414           0 :                         ats_entries = entries;
     415           0 :                         entries = 0;
     416             :                 }
     417             :         }
     418             : 
     419           0 :         r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
     420           0 :         if (r)
     421             :                 return r;
     422             : 
     423           0 :         if (vmbo->shadow) {
     424           0 :                 struct amdgpu_bo *shadow = vmbo->shadow;
     425             : 
     426           0 :                 r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
     427           0 :                 if (r)
     428             :                         return r;
     429             :         }
     430             : 
     431           0 :         if (!drm_dev_enter(adev_to_drm(adev), &idx))
     432             :                 return -ENODEV;
     433             : 
     434           0 :         r = vm->update_funcs->map_table(vmbo);
     435           0 :         if (r)
     436             :                 goto exit;
     437             : 
     438           0 :         memset(&params, 0, sizeof(params));
     439           0 :         params.adev = adev;
     440           0 :         params.vm = vm;
     441           0 :         params.immediate = immediate;
     442             : 
     443           0 :         r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
     444           0 :         if (r)
     445             :                 goto exit;
     446             : 
     447           0 :         addr = 0;
     448           0 :         if (ats_entries) {
     449           0 :                 uint64_t value = 0, flags;
     450             : 
     451           0 :                 flags = AMDGPU_PTE_DEFAULT_ATC;
     452           0 :                 if (level != AMDGPU_VM_PTB) {
     453             :                         /* Handle leaf PDEs as PTEs */
     454           0 :                         flags |= AMDGPU_PDE_PTE;
     455           0 :                         amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
     456             :                 }
     457             : 
     458           0 :                 r = vm->update_funcs->update(&params, vmbo, addr, 0,
     459             :                                              ats_entries, value, flags);
     460           0 :                 if (r)
     461             :                         goto exit;
     462             : 
     463           0 :                 addr += ats_entries * 8;
     464             :         }
     465             : 
     466           0 :         if (entries) {
     467           0 :                 uint64_t value = 0, flags = 0;
     468             : 
     469           0 :                 if (adev->asic_type >= CHIP_VEGA10) {
     470           0 :                         if (level != AMDGPU_VM_PTB) {
     471             :                                 /* Handle leaf PDEs as PTEs */
     472           0 :                                 flags |= AMDGPU_PDE_PTE;
     473           0 :                                 amdgpu_gmc_get_vm_pde(adev, level,
     474             :                                                       &value, &flags);
     475             :                         } else {
     476             :                                 /* Workaround for fault priority problem on GMC9 */
     477           0 :                                 flags = AMDGPU_PTE_EXECUTABLE;
     478             :                         }
     479             :                 }
     480             : 
     481           0 :                 r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
     482             :                                              value, flags);
     483           0 :                 if (r)
     484             :                         goto exit;
     485             :         }
     486             : 
     487           0 :         r = vm->update_funcs->commit(&params, NULL);
     488             : exit:
     489           0 :         drm_dev_exit(idx);
     490           0 :         return r;
     491             : }
     492             : 
     493             : /**
     494             :  * amdgpu_vm_pt_create - create bo for PD/PT
     495             :  *
     496             :  * @adev: amdgpu_device pointer
     497             :  * @vm: requesting vm
     498             :  * @level: the page table level
     499             :  * @immediate: use a immediate update
     500             :  * @vmbo: pointer to the buffer object pointer
     501             :  */
     502           0 : int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
     503             :                         int level, bool immediate, struct amdgpu_bo_vm **vmbo)
     504             : {
     505             :         struct amdgpu_bo_param bp;
     506             :         struct amdgpu_bo *bo;
     507             :         struct dma_resv *resv;
     508             :         unsigned int num_entries;
     509             :         int r;
     510             : 
     511           0 :         memset(&bp, 0, sizeof(bp));
     512             : 
     513           0 :         bp.size = amdgpu_vm_pt_size(adev, level);
     514           0 :         bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
     515           0 :         bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
     516           0 :         bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
     517           0 :         bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
     518             :                 AMDGPU_GEM_CREATE_CPU_GTT_USWC;
     519             : 
     520           0 :         if (level < AMDGPU_VM_PTB)
     521           0 :                 num_entries = amdgpu_vm_pt_num_entries(adev, level);
     522             :         else
     523             :                 num_entries = 0;
     524             : 
     525           0 :         bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
     526             : 
     527           0 :         if (vm->use_cpu_for_update)
     528           0 :                 bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
     529             : 
     530           0 :         bp.type = ttm_bo_type_kernel;
     531           0 :         bp.no_wait_gpu = immediate;
     532           0 :         if (vm->root.bo)
     533           0 :                 bp.resv = vm->root.bo->tbo.base.resv;
     534             : 
     535           0 :         r = amdgpu_bo_create_vm(adev, &bp, vmbo);
     536           0 :         if (r)
     537             :                 return r;
     538             : 
     539           0 :         bo = &(*vmbo)->bo;
     540           0 :         if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
     541           0 :                 (*vmbo)->shadow = NULL;
     542           0 :                 return 0;
     543             :         }
     544             : 
     545           0 :         if (!bp.resv)
     546           0 :                 WARN_ON(dma_resv_lock(bo->tbo.base.resv,
     547             :                                       NULL));
     548           0 :         resv = bp.resv;
     549           0 :         memset(&bp, 0, sizeof(bp));
     550           0 :         bp.size = amdgpu_vm_pt_size(adev, level);
     551           0 :         bp.domain = AMDGPU_GEM_DOMAIN_GTT;
     552           0 :         bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
     553           0 :         bp.type = ttm_bo_type_kernel;
     554           0 :         bp.resv = bo->tbo.base.resv;
     555           0 :         bp.bo_ptr_size = sizeof(struct amdgpu_bo);
     556             : 
     557           0 :         r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
     558             : 
     559           0 :         if (!resv)
     560           0 :                 dma_resv_unlock(bo->tbo.base.resv);
     561             : 
     562           0 :         if (r) {
     563           0 :                 amdgpu_bo_unref(&bo);
     564           0 :                 return r;
     565             :         }
     566             : 
     567           0 :         (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
     568           0 :         amdgpu_bo_add_to_shadow_list(*vmbo);
     569             : 
     570           0 :         return 0;
     571             : }
     572             : 
     573             : /**
     574             :  * amdgpu_vm_pt_alloc - Allocate a specific page table
     575             :  *
     576             :  * @adev: amdgpu_device pointer
     577             :  * @vm: VM to allocate page tables for
     578             :  * @cursor: Which page table to allocate
     579             :  * @immediate: use an immediate update
     580             :  *
     581             :  * Make sure a specific page table or directory is allocated.
     582             :  *
     583             :  * Returns:
     584             :  * 1 if page table needed to be allocated, 0 if page table was already
     585             :  * allocated, negative errno if an error occurred.
     586             :  */
     587           0 : static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
     588             :                               struct amdgpu_vm *vm,
     589             :                               struct amdgpu_vm_pt_cursor *cursor,
     590             :                               bool immediate)
     591             : {
     592           0 :         struct amdgpu_vm_bo_base *entry = cursor->entry;
     593             :         struct amdgpu_bo *pt_bo;
     594             :         struct amdgpu_bo_vm *pt;
     595             :         int r;
     596             : 
     597           0 :         if (entry->bo)
     598             :                 return 0;
     599             : 
     600           0 :         r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
     601           0 :         if (r)
     602             :                 return r;
     603             : 
     604             :         /* Keep a reference to the root directory to avoid
     605             :          * freeing them up in the wrong order.
     606             :          */
     607           0 :         pt_bo = &pt->bo;
     608           0 :         pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
     609           0 :         amdgpu_vm_bo_base_init(entry, vm, pt_bo);
     610           0 :         r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
     611           0 :         if (r)
     612             :                 goto error_free_pt;
     613             : 
     614             :         return 0;
     615             : 
     616             : error_free_pt:
     617           0 :         amdgpu_bo_unref(&pt->shadow);
     618           0 :         amdgpu_bo_unref(&pt_bo);
     619           0 :         return r;
     620             : }
     621             : 
     622             : /**
     623             :  * amdgpu_vm_pt_free - free one PD/PT
     624             :  *
     625             :  * @entry: PDE to free
     626             :  */
     627           0 : static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
     628             : {
     629             :         struct amdgpu_bo *shadow;
     630             : 
     631           0 :         if (!entry->bo)
     632           0 :                 return;
     633           0 :         shadow = amdgpu_bo_shadowed(entry->bo);
     634           0 :         if (shadow) {
     635           0 :                 ttm_bo_set_bulk_move(&shadow->tbo, NULL);
     636           0 :                 amdgpu_bo_unref(&shadow);
     637             :         }
     638           0 :         ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
     639           0 :         entry->bo->vm_bo = NULL;
     640           0 :         list_del(&entry->vm_status);
     641           0 :         amdgpu_bo_unref(&entry->bo);
     642             : }
     643             : 
     644             : /**
     645             :  * amdgpu_vm_pt_free_dfs - free PD/PT levels
     646             :  *
     647             :  * @adev: amdgpu device structure
     648             :  * @vm: amdgpu vm structure
     649             :  * @start: optional cursor where to start freeing PDs/PTs
     650             :  *
     651             :  * Free the page directory or page table level and all sub levels.
     652             :  */
     653           0 : static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
     654             :                                   struct amdgpu_vm *vm,
     655             :                                   struct amdgpu_vm_pt_cursor *start)
     656             : {
     657             :         struct amdgpu_vm_pt_cursor cursor;
     658             :         struct amdgpu_vm_bo_base *entry;
     659             : 
     660           0 :         for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
     661           0 :                 amdgpu_vm_pt_free(entry);
     662             : 
     663           0 :         if (start)
     664           0 :                 amdgpu_vm_pt_free(start->entry);
     665           0 : }
     666             : 
     667             : /**
     668             :  * amdgpu_vm_pt_free_root - free root PD
     669             :  * @adev: amdgpu device structure
     670             :  * @vm: amdgpu vm structure
     671             :  *
     672             :  * Free the root page directory and everything below it.
     673             :  */
     674           0 : void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
     675             : {
     676           0 :         amdgpu_vm_pt_free_dfs(adev, vm, NULL);
     677           0 : }
     678             : 
     679             : /**
     680             :  * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
     681             :  *
     682             :  * @adev: amdgpu_device pointer
     683             :  * @vm: the VM to check
     684             :  *
     685             :  * Check all entries of the root PD, if any subsequent PDs are allocated,
     686             :  * it means there are page table creating and filling, and is no a clean
     687             :  * VM
     688             :  *
     689             :  * Returns:
     690             :  *      0 if this VM is clean
     691             :  */
     692           0 : bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
     693             :                                 struct amdgpu_vm *vm)
     694             : {
     695           0 :         enum amdgpu_vm_level root = adev->vm_manager.root_level;
     696           0 :         unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
     697           0 :         unsigned int i = 0;
     698             : 
     699           0 :         for (i = 0; i < entries; i++) {
     700           0 :                 if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
     701             :                         return false;
     702             :         }
     703             :         return true;
     704             : }
     705             : 
     706             : /**
     707             :  * amdgpu_vm_pde_update - update a single level in the hierarchy
     708             :  *
     709             :  * @params: parameters for the update
     710             :  * @entry: entry to update
     711             :  *
     712             :  * Makes sure the requested entry in parent is up to date.
     713             :  */
     714           0 : int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
     715             :                          struct amdgpu_vm_bo_base *entry)
     716             : {
     717           0 :         struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
     718           0 :         struct amdgpu_bo *bo = parent->bo, *pbo;
     719           0 :         struct amdgpu_vm *vm = params->vm;
     720             :         uint64_t pde, pt, flags;
     721             :         unsigned int level;
     722             : 
     723           0 :         for (level = 0, pbo = bo->parent; pbo; ++level)
     724           0 :                 pbo = pbo->parent;
     725             : 
     726           0 :         level += params->adev->vm_manager.root_level;
     727           0 :         amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
     728           0 :         pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
     729           0 :         return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
     730             :                                         1, 0, flags);
     731             : }
     732             : 
     733             : /*
     734             :  * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
     735             :  *
     736             :  * Make sure to set the right flags for the PTEs at the desired level.
     737             :  */
     738           0 : static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
     739             :                                        struct amdgpu_bo_vm *pt,
     740             :                                        unsigned int level,
     741             :                                        uint64_t pe, uint64_t addr,
     742             :                                        unsigned int count, uint32_t incr,
     743             :                                        uint64_t flags)
     744             : 
     745             : {
     746           0 :         if (level != AMDGPU_VM_PTB) {
     747           0 :                 flags |= AMDGPU_PDE_PTE;
     748           0 :                 amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
     749             : 
     750           0 :         } else if (params->adev->asic_type >= CHIP_VEGA10 &&
     751           0 :                    !(flags & AMDGPU_PTE_VALID) &&
     752             :                    !(flags & AMDGPU_PTE_PRT)) {
     753             : 
     754             :                 /* Workaround for fault priority problem on GMC9 */
     755           0 :                 flags |= AMDGPU_PTE_EXECUTABLE;
     756             :         }
     757             : 
     758           0 :         params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
     759             :                                          flags);
     760           0 : }
     761             : 
     762             : /**
     763             :  * amdgpu_vm_pte_fragment - get fragment for PTEs
     764             :  *
     765             :  * @params: see amdgpu_vm_update_params definition
     766             :  * @start: first PTE to handle
     767             :  * @end: last PTE to handle
     768             :  * @flags: hw mapping flags
     769             :  * @frag: resulting fragment size
     770             :  * @frag_end: end of this fragment
     771             :  *
     772             :  * Returns the first possible fragment for the start and end address.
     773             :  */
     774             : static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
     775             :                                    uint64_t start, uint64_t end, uint64_t flags,
     776             :                                    unsigned int *frag, uint64_t *frag_end)
     777             : {
     778             :         /**
     779             :          * The MC L1 TLB supports variable sized pages, based on a fragment
     780             :          * field in the PTE. When this field is set to a non-zero value, page
     781             :          * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
     782             :          * flags are considered valid for all PTEs within the fragment range
     783             :          * and corresponding mappings are assumed to be physically contiguous.
     784             :          *
     785             :          * The L1 TLB can store a single PTE for the whole fragment,
     786             :          * significantly increasing the space available for translation
     787             :          * caching. This leads to large improvements in throughput when the
     788             :          * TLB is under pressure.
     789             :          *
     790             :          * The L2 TLB distributes small and large fragments into two
     791             :          * asymmetric partitions. The large fragment cache is significantly
     792             :          * larger. Thus, we try to use large fragments wherever possible.
     793             :          * Userspace can support this by aligning virtual base address and
     794             :          * allocation size to the fragment size.
     795             :          *
     796             :          * Starting with Vega10 the fragment size only controls the L1. The L2
     797             :          * is now directly feed with small/huge/giant pages from the walker.
     798             :          */
     799             :         unsigned int max_frag;
     800             : 
     801           0 :         if (params->adev->asic_type < CHIP_VEGA10)
     802           0 :                 max_frag = params->adev->vm_manager.fragment_size;
     803             :         else
     804             :                 max_frag = 31;
     805             : 
     806             :         /* system pages are non continuously */
     807           0 :         if (params->pages_addr) {
     808             :                 *frag = 0;
     809             :                 *frag_end = end;
     810             :                 return;
     811             :         }
     812             : 
     813             :         /* This intentionally wraps around if no bit is set */
     814           0 :         *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
     815           0 :         if (*frag >= max_frag) {
     816           0 :                 *frag = max_frag;
     817           0 :                 *frag_end = end & ~((1ULL << max_frag) - 1);
     818             :         } else {
     819           0 :                 *frag_end = start + (1 << *frag);
     820             :         }
     821             : }
     822             : 
     823             : /**
     824             :  * amdgpu_vm_ptes_update - make sure that page tables are valid
     825             :  *
     826             :  * @params: see amdgpu_vm_update_params definition
     827             :  * @start: start of GPU address range
     828             :  * @end: end of GPU address range
     829             :  * @dst: destination address to map to, the next dst inside the function
     830             :  * @flags: mapping flags
     831             :  *
     832             :  * Update the page tables in the range @start - @end.
     833             :  *
     834             :  * Returns:
     835             :  * 0 for success, -EINVAL for failure.
     836             :  */
     837           0 : int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
     838             :                           uint64_t start, uint64_t end,
     839             :                           uint64_t dst, uint64_t flags)
     840             : {
     841           0 :         struct amdgpu_device *adev = params->adev;
     842             :         struct amdgpu_vm_pt_cursor cursor;
     843           0 :         uint64_t frag_start = start, frag_end;
     844             :         unsigned int frag;
     845             :         int r;
     846             : 
     847             :         /* figure out the initial fragment */
     848           0 :         amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
     849             :                                &frag_end);
     850             : 
     851             :         /* walk over the address space and update the PTs */
     852           0 :         amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
     853           0 :         while (cursor.pfn < end) {
     854             :                 unsigned int shift, parent_shift, mask;
     855             :                 uint64_t incr, entry_end, pe_start;
     856             :                 struct amdgpu_bo *pt;
     857             : 
     858           0 :                 if (!params->unlocked) {
     859             :                         /* make sure that the page tables covering the
     860             :                          * address range are actually allocated
     861             :                          */
     862           0 :                         r = amdgpu_vm_pt_alloc(params->adev, params->vm,
     863           0 :                                                &cursor, params->immediate);
     864           0 :                         if (r)
     865             :                                 return r;
     866             :                 }
     867             : 
     868           0 :                 shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
     869           0 :                 parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
     870           0 :                 if (params->unlocked) {
     871             :                         /* Unlocked updates are only allowed on the leaves */
     872           0 :                         if (amdgpu_vm_pt_descendant(adev, &cursor))
     873           0 :                                 continue;
     874           0 :                 } else if (adev->asic_type < CHIP_VEGA10 &&
     875           0 :                            (flags & AMDGPU_PTE_VALID)) {
     876             :                         /* No huge page support before GMC v9 */
     877           0 :                         if (cursor.level != AMDGPU_VM_PTB) {
     878           0 :                                 if (!amdgpu_vm_pt_descendant(adev, &cursor))
     879             :                                         return -ENOENT;
     880           0 :                                 continue;
     881             :                         }
     882           0 :                 } else if (frag < shift) {
     883             :                         /* We can't use this level when the fragment size is
     884             :                          * smaller than the address shift. Go to the next
     885             :                          * child entry and try again.
     886             :                          */
     887           0 :                         if (amdgpu_vm_pt_descendant(adev, &cursor))
     888           0 :                                 continue;
     889           0 :                 } else if (frag >= parent_shift) {
     890             :                         /* If the fragment size is even larger than the parent
     891             :                          * shift we should go up one level and check it again.
     892             :                          */
     893           0 :                         if (!amdgpu_vm_pt_ancestor(&cursor))
     894             :                                 return -EINVAL;
     895           0 :                         continue;
     896             :                 }
     897             : 
     898           0 :                 pt = cursor.entry->bo;
     899           0 :                 if (!pt) {
     900             :                         /* We need all PDs and PTs for mapping something, */
     901           0 :                         if (flags & AMDGPU_PTE_VALID)
     902             :                                 return -ENOENT;
     903             : 
     904             :                         /* but unmapping something can happen at a higher
     905             :                          * level.
     906             :                          */
     907           0 :                         if (!amdgpu_vm_pt_ancestor(&cursor))
     908             :                                 return -EINVAL;
     909             : 
     910           0 :                         pt = cursor.entry->bo;
     911           0 :                         shift = parent_shift;
     912           0 :                         frag_end = max(frag_end, ALIGN(frag_start + 1,
     913             :                                    1ULL << shift));
     914             :                 }
     915             : 
     916             :                 /* Looks good so far, calculate parameters for the update */
     917           0 :                 incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
     918           0 :                 mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
     919           0 :                 pe_start = ((cursor.pfn >> shift) & mask) * 8;
     920           0 :                 entry_end = ((uint64_t)mask + 1) << shift;
     921           0 :                 entry_end += cursor.pfn & ~(entry_end - 1);
     922           0 :                 entry_end = min(entry_end, end);
     923             : 
     924             :                 do {
     925           0 :                         struct amdgpu_vm *vm = params->vm;
     926           0 :                         uint64_t upd_end = min(entry_end, frag_end);
     927           0 :                         unsigned int nptes = (upd_end - frag_start) >> shift;
     928           0 :                         uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
     929             : 
     930             :                         /* This can happen when we set higher level PDs to
     931             :                          * silent to stop fault floods.
     932             :                          */
     933           0 :                         nptes = max(nptes, 1u);
     934             : 
     935           0 :                         trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
     936           0 :                                                     min(nptes, 32u), dst, incr,
     937             :                                                     upd_flags,
     938             :                                                     vm->task_info.pid,
     939             :                                                     vm->immediate.fence_context);
     940           0 :                         amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
     941             :                                                    cursor.level, pe_start, dst,
     942             :                                                    nptes, incr, upd_flags);
     943             : 
     944           0 :                         pe_start += nptes * 8;
     945           0 :                         dst += nptes * incr;
     946             : 
     947           0 :                         frag_start = upd_end;
     948           0 :                         if (frag_start >= frag_end) {
     949             :                                 /* figure out the next fragment */
     950           0 :                                 amdgpu_vm_pte_fragment(params, frag_start, end,
     951             :                                                        flags, &frag, &frag_end);
     952           0 :                                 if (frag < shift)
     953             :                                         break;
     954             :                         }
     955           0 :                 } while (frag_start < entry_end);
     956             : 
     957           0 :                 if (amdgpu_vm_pt_descendant(adev, &cursor)) {
     958             :                         /* Free all child entries.
     959             :                          * Update the tables with the flags and addresses and free up subsequent
     960             :                          * tables in the case of huge pages or freed up areas.
     961             :                          * This is the maximum you can free, because all other page tables are not
     962             :                          * completely covered by the range and so potentially still in use.
     963             :                          */
     964           0 :                         while (cursor.pfn < frag_start) {
     965             :                                 /* Make sure previous mapping is freed */
     966           0 :                                 if (cursor.entry->bo) {
     967           0 :                                         params->table_freed = true;
     968           0 :                                         amdgpu_vm_pt_free_dfs(adev, params->vm,
     969             :                                                               &cursor);
     970             :                                 }
     971           0 :                                 amdgpu_vm_pt_next(adev, &cursor);
     972             :                         }
     973             : 
     974           0 :                 } else if (frag >= shift) {
     975             :                         /* or just move on to the next on the same level. */
     976           0 :                         amdgpu_vm_pt_next(adev, &cursor);
     977             :                 }
     978             :         }
     979             : 
     980             :         return 0;
     981             : }

Generated by: LCOV version 1.14