LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - amdgpu_device.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 2130 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 128 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2008 Advanced Micro Devices, Inc.
       3             :  * Copyright 2008 Red Hat Inc.
       4             :  * Copyright 2009 Jerome Glisse.
       5             :  *
       6             :  * Permission is hereby granted, free of charge, to any person obtaining a
       7             :  * copy of this software and associated documentation files (the "Software"),
       8             :  * to deal in the Software without restriction, including without limitation
       9             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      10             :  * and/or sell copies of the Software, and to permit persons to whom the
      11             :  * Software is furnished to do so, subject to the following conditions:
      12             :  *
      13             :  * The above copyright notice and this permission notice shall be included in
      14             :  * all copies or substantial portions of the Software.
      15             :  *
      16             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      17             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      18             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      19             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      20             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      21             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      22             :  * OTHER DEALINGS IN THE SOFTWARE.
      23             :  *
      24             :  * Authors: Dave Airlie
      25             :  *          Alex Deucher
      26             :  *          Jerome Glisse
      27             :  */
      28             : #include <linux/power_supply.h>
      29             : #include <linux/kthread.h>
      30             : #include <linux/module.h>
      31             : #include <linux/console.h>
      32             : #include <linux/slab.h>
      33             : #include <linux/iommu.h>
      34             : #include <linux/pci.h>
      35             : #include <linux/devcoredump.h>
      36             : #include <generated/utsrelease.h>
      37             : #include <linux/pci-p2pdma.h>
      38             : 
      39             : #include <drm/drm_atomic_helper.h>
      40             : #include <drm/drm_probe_helper.h>
      41             : #include <drm/amdgpu_drm.h>
      42             : #include <linux/vgaarb.h>
      43             : #include <linux/vga_switcheroo.h>
      44             : #include <linux/efi.h>
      45             : #include "amdgpu.h"
      46             : #include "amdgpu_trace.h"
      47             : #include "amdgpu_i2c.h"
      48             : #include "atom.h"
      49             : #include "amdgpu_atombios.h"
      50             : #include "amdgpu_atomfirmware.h"
      51             : #include "amd_pcie.h"
      52             : #ifdef CONFIG_DRM_AMDGPU_SI
      53             : #include "si.h"
      54             : #endif
      55             : #ifdef CONFIG_DRM_AMDGPU_CIK
      56             : #include "cik.h"
      57             : #endif
      58             : #include "vi.h"
      59             : #include "soc15.h"
      60             : #include "nv.h"
      61             : #include "bif/bif_4_1_d.h"
      62             : #include <linux/firmware.h>
      63             : #include "amdgpu_vf_error.h"
      64             : 
      65             : #include "amdgpu_amdkfd.h"
      66             : #include "amdgpu_pm.h"
      67             : 
      68             : #include "amdgpu_xgmi.h"
      69             : #include "amdgpu_ras.h"
      70             : #include "amdgpu_pmu.h"
      71             : #include "amdgpu_fru_eeprom.h"
      72             : #include "amdgpu_reset.h"
      73             : 
      74             : #include <linux/suspend.h>
      75             : #include <drm/task_barrier.h>
      76             : #include <linux/pm_runtime.h>
      77             : 
      78             : #include <drm/drm_drv.h>
      79             : 
      80             : MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
      81             : MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
      82             : MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
      83             : MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
      84             : MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
      85             : MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
      86             : MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
      87             : 
      88             : #define AMDGPU_RESUME_MS                2000
      89             : #define AMDGPU_MAX_RETRY_LIMIT          2
      90             : #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
      91             : 
      92             : const char *amdgpu_asic_name[] = {
      93             :         "TAHITI",
      94             :         "PITCAIRN",
      95             :         "VERDE",
      96             :         "OLAND",
      97             :         "HAINAN",
      98             :         "BONAIRE",
      99             :         "KAVERI",
     100             :         "KABINI",
     101             :         "HAWAII",
     102             :         "MULLINS",
     103             :         "TOPAZ",
     104             :         "TONGA",
     105             :         "FIJI",
     106             :         "CARRIZO",
     107             :         "STONEY",
     108             :         "POLARIS10",
     109             :         "POLARIS11",
     110             :         "POLARIS12",
     111             :         "VEGAM",
     112             :         "VEGA10",
     113             :         "VEGA12",
     114             :         "VEGA20",
     115             :         "RAVEN",
     116             :         "ARCTURUS",
     117             :         "RENOIR",
     118             :         "ALDEBARAN",
     119             :         "NAVI10",
     120             :         "CYAN_SKILLFISH",
     121             :         "NAVI14",
     122             :         "NAVI12",
     123             :         "SIENNA_CICHLID",
     124             :         "NAVY_FLOUNDER",
     125             :         "VANGOGH",
     126             :         "DIMGREY_CAVEFISH",
     127             :         "BEIGE_GOBY",
     128             :         "YELLOW_CARP",
     129             :         "IP DISCOVERY",
     130             :         "LAST",
     131             : };
     132             : 
     133             : /**
     134             :  * DOC: pcie_replay_count
     135             :  *
     136             :  * The amdgpu driver provides a sysfs API for reporting the total number
     137             :  * of PCIe replays (NAKs)
     138             :  * The file pcie_replay_count is used for this and returns the total
     139             :  * number of replays as a sum of the NAKs generated and NAKs received
     140             :  */
     141             : 
     142           0 : static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
     143             :                 struct device_attribute *attr, char *buf)
     144             : {
     145           0 :         struct drm_device *ddev = dev_get_drvdata(dev);
     146           0 :         struct amdgpu_device *adev = drm_to_adev(ddev);
     147           0 :         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
     148             : 
     149           0 :         return sysfs_emit(buf, "%llu\n", cnt);
     150             : }
     151             : 
     152             : static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
     153             :                 amdgpu_device_get_pcie_replay_count, NULL);
     154             : 
     155             : static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
     156             : 
     157             : /**
     158             :  * DOC: product_name
     159             :  *
     160             :  * The amdgpu driver provides a sysfs API for reporting the product name
     161             :  * for the device
     162             :  * The file serial_number is used for this and returns the product name
     163             :  * as returned from the FRU.
     164             :  * NOTE: This is only available for certain server cards
     165             :  */
     166             : 
     167           0 : static ssize_t amdgpu_device_get_product_name(struct device *dev,
     168             :                 struct device_attribute *attr, char *buf)
     169             : {
     170           0 :         struct drm_device *ddev = dev_get_drvdata(dev);
     171           0 :         struct amdgpu_device *adev = drm_to_adev(ddev);
     172             : 
     173           0 :         return sysfs_emit(buf, "%s\n", adev->product_name);
     174             : }
     175             : 
     176             : static DEVICE_ATTR(product_name, S_IRUGO,
     177             :                 amdgpu_device_get_product_name, NULL);
     178             : 
     179             : /**
     180             :  * DOC: product_number
     181             :  *
     182             :  * The amdgpu driver provides a sysfs API for reporting the part number
     183             :  * for the device
     184             :  * The file serial_number is used for this and returns the part number
     185             :  * as returned from the FRU.
     186             :  * NOTE: This is only available for certain server cards
     187             :  */
     188             : 
     189           0 : static ssize_t amdgpu_device_get_product_number(struct device *dev,
     190             :                 struct device_attribute *attr, char *buf)
     191             : {
     192           0 :         struct drm_device *ddev = dev_get_drvdata(dev);
     193           0 :         struct amdgpu_device *adev = drm_to_adev(ddev);
     194             : 
     195           0 :         return sysfs_emit(buf, "%s\n", adev->product_number);
     196             : }
     197             : 
     198             : static DEVICE_ATTR(product_number, S_IRUGO,
     199             :                 amdgpu_device_get_product_number, NULL);
     200             : 
     201             : /**
     202             :  * DOC: serial_number
     203             :  *
     204             :  * The amdgpu driver provides a sysfs API for reporting the serial number
     205             :  * for the device
     206             :  * The file serial_number is used for this and returns the serial number
     207             :  * as returned from the FRU.
     208             :  * NOTE: This is only available for certain server cards
     209             :  */
     210             : 
     211           0 : static ssize_t amdgpu_device_get_serial_number(struct device *dev,
     212             :                 struct device_attribute *attr, char *buf)
     213             : {
     214           0 :         struct drm_device *ddev = dev_get_drvdata(dev);
     215           0 :         struct amdgpu_device *adev = drm_to_adev(ddev);
     216             : 
     217           0 :         return sysfs_emit(buf, "%s\n", adev->serial);
     218             : }
     219             : 
     220             : static DEVICE_ATTR(serial_number, S_IRUGO,
     221             :                 amdgpu_device_get_serial_number, NULL);
     222             : 
     223             : /**
     224             :  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
     225             :  *
     226             :  * @dev: drm_device pointer
     227             :  *
     228             :  * Returns true if the device is a dGPU with ATPX power control,
     229             :  * otherwise return false.
     230             :  */
     231           0 : bool amdgpu_device_supports_px(struct drm_device *dev)
     232             : {
     233           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
     234             : 
     235           0 :         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
     236             :                 return true;
     237             :         return false;
     238             : }
     239             : 
     240             : /**
     241             :  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
     242             :  *
     243             :  * @dev: drm_device pointer
     244             :  *
     245             :  * Returns true if the device is a dGPU with ACPI power control,
     246             :  * otherwise return false.
     247             :  */
     248           0 : bool amdgpu_device_supports_boco(struct drm_device *dev)
     249             : {
     250           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
     251             : 
     252           0 :         if (adev->has_pr3 ||
     253             :             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
     254             :                 return true;
     255           0 :         return false;
     256             : }
     257             : 
     258             : /**
     259             :  * amdgpu_device_supports_baco - Does the device support BACO
     260             :  *
     261             :  * @dev: drm_device pointer
     262             :  *
     263             :  * Returns true if the device supporte BACO,
     264             :  * otherwise return false.
     265             :  */
     266           0 : bool amdgpu_device_supports_baco(struct drm_device *dev)
     267             : {
     268           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
     269             : 
     270           0 :         return amdgpu_asic_supports_baco(adev);
     271             : }
     272             : 
     273             : /**
     274             :  * amdgpu_device_supports_smart_shift - Is the device dGPU with
     275             :  * smart shift support
     276             :  *
     277             :  * @dev: drm_device pointer
     278             :  *
     279             :  * Returns true if the device is a dGPU with Smart Shift support,
     280             :  * otherwise returns false.
     281             :  */
     282           0 : bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
     283             : {
     284           0 :         return (amdgpu_device_supports_boco(dev) &&
     285             :                 amdgpu_acpi_is_power_shift_control_supported());
     286             : }
     287             : 
     288             : /*
     289             :  * VRAM access helper functions
     290             :  */
     291             : 
     292             : /**
     293             :  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
     294             :  *
     295             :  * @adev: amdgpu_device pointer
     296             :  * @pos: offset of the buffer in vram
     297             :  * @buf: virtual address of the buffer in system memory
     298             :  * @size: read/write size, sizeof(@buf) must > @size
     299             :  * @write: true - write to vram, otherwise - read from vram
     300             :  */
     301           0 : void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
     302             :                              void *buf, size_t size, bool write)
     303             : {
     304             :         unsigned long flags;
     305           0 :         uint32_t hi = ~0, tmp = 0;
     306           0 :         uint32_t *data = buf;
     307             :         uint64_t last;
     308             :         int idx;
     309             : 
     310           0 :         if (!drm_dev_enter(adev_to_drm(adev), &idx))
     311           0 :                 return;
     312             : 
     313           0 :         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
     314             : 
     315           0 :         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
     316           0 :         for (last = pos + size; pos < last; pos += 4) {
     317           0 :                 tmp = pos >> 31;
     318             : 
     319           0 :                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
     320           0 :                 if (tmp != hi) {
     321           0 :                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
     322           0 :                         hi = tmp;
     323             :                 }
     324           0 :                 if (write)
     325           0 :                         WREG32_NO_KIQ(mmMM_DATA, *data++);
     326             :                 else
     327           0 :                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
     328             :         }
     329             : 
     330           0 :         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
     331           0 :         drm_dev_exit(idx);
     332             : }
     333             : 
     334             : /**
     335             :  * amdgpu_device_aper_access - access vram by vram aperature
     336             :  *
     337             :  * @adev: amdgpu_device pointer
     338             :  * @pos: offset of the buffer in vram
     339             :  * @buf: virtual address of the buffer in system memory
     340             :  * @size: read/write size, sizeof(@buf) must > @size
     341             :  * @write: true - write to vram, otherwise - read from vram
     342             :  *
     343             :  * The return value means how many bytes have been transferred.
     344             :  */
     345           0 : size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
     346             :                                  void *buf, size_t size, bool write)
     347             : {
     348             : #ifdef CONFIG_64BIT
     349             :         void __iomem *addr;
     350           0 :         size_t count = 0;
     351             :         uint64_t last;
     352             : 
     353           0 :         if (!adev->mman.aper_base_kaddr)
     354             :                 return 0;
     355             : 
     356           0 :         last = min(pos + size, adev->gmc.visible_vram_size);
     357           0 :         if (last > pos) {
     358           0 :                 addr = adev->mman.aper_base_kaddr + pos;
     359           0 :                 count = last - pos;
     360             : 
     361           0 :                 if (write) {
     362           0 :                         memcpy_toio(addr, buf, count);
     363           0 :                         mb();
     364           0 :                         amdgpu_device_flush_hdp(adev, NULL);
     365             :                 } else {
     366           0 :                         amdgpu_device_invalidate_hdp(adev, NULL);
     367           0 :                         mb();
     368           0 :                         memcpy_fromio(buf, addr, count);
     369             :                 }
     370             : 
     371             :         }
     372             : 
     373             :         return count;
     374             : #else
     375             :         return 0;
     376             : #endif
     377             : }
     378             : 
     379             : /**
     380             :  * amdgpu_device_vram_access - read/write a buffer in vram
     381             :  *
     382             :  * @adev: amdgpu_device pointer
     383             :  * @pos: offset of the buffer in vram
     384             :  * @buf: virtual address of the buffer in system memory
     385             :  * @size: read/write size, sizeof(@buf) must > @size
     386             :  * @write: true - write to vram, otherwise - read from vram
     387             :  */
     388           0 : void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
     389             :                                void *buf, size_t size, bool write)
     390             : {
     391             :         size_t count;
     392             : 
     393             :         /* try to using vram apreature to access vram first */
     394           0 :         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
     395           0 :         size -= count;
     396           0 :         if (size) {
     397             :                 /* using MM to access rest vram */
     398           0 :                 pos += count;
     399           0 :                 buf += count;
     400           0 :                 amdgpu_device_mm_access(adev, pos, buf, size, write);
     401             :         }
     402           0 : }
     403             : 
     404             : /*
     405             :  * register access helper functions.
     406             :  */
     407             : 
     408             : /* Check if hw access should be skipped because of hotplug or device error */
     409           0 : bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
     410             : {
     411           0 :         if (adev->no_hw_access)
     412             :                 return true;
     413             : 
     414             : #ifdef CONFIG_LOCKDEP
     415             :         /*
     416             :          * This is a bit complicated to understand, so worth a comment. What we assert
     417             :          * here is that the GPU reset is not running on another thread in parallel.
     418             :          *
     419             :          * For this we trylock the read side of the reset semaphore, if that succeeds
     420             :          * we know that the reset is not running in paralell.
     421             :          *
     422             :          * If the trylock fails we assert that we are either already holding the read
     423             :          * side of the lock or are the reset thread itself and hold the write side of
     424             :          * the lock.
     425             :          */
     426             :         if (in_task()) {
     427             :                 if (down_read_trylock(&adev->reset_domain->sem))
     428             :                         up_read(&adev->reset_domain->sem);
     429             :                 else
     430             :                         lockdep_assert_held(&adev->reset_domain->sem);
     431             :         }
     432             : #endif
     433           0 :         return false;
     434             : }
     435             : 
     436             : /**
     437             :  * amdgpu_device_rreg - read a memory mapped IO or indirect register
     438             :  *
     439             :  * @adev: amdgpu_device pointer
     440             :  * @reg: dword aligned register offset
     441             :  * @acc_flags: access flags which require special behavior
     442             :  *
     443             :  * Returns the 32 bit value from the offset specified.
     444             :  */
     445           0 : uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
     446             :                             uint32_t reg, uint32_t acc_flags)
     447             : {
     448             :         uint32_t ret;
     449             : 
     450           0 :         if (amdgpu_device_skip_hw_access(adev))
     451             :                 return 0;
     452             : 
     453           0 :         if ((reg * 4) < adev->rmmio_size) {
     454           0 :                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
     455           0 :                     amdgpu_sriov_runtime(adev) &&
     456           0 :                     down_read_trylock(&adev->reset_domain->sem)) {
     457           0 :                         ret = amdgpu_kiq_rreg(adev, reg);
     458           0 :                         up_read(&adev->reset_domain->sem);
     459             :                 } else {
     460           0 :                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
     461             :                 }
     462             :         } else {
     463           0 :                 ret = adev->pcie_rreg(adev, reg * 4);
     464             :         }
     465             : 
     466           0 :         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
     467             : 
     468           0 :         return ret;
     469             : }
     470             : 
     471             : /*
     472             :  * MMIO register read with bytes helper functions
     473             :  * @offset:bytes offset from MMIO start
     474             :  *
     475             : */
     476             : 
     477             : /**
     478             :  * amdgpu_mm_rreg8 - read a memory mapped IO register
     479             :  *
     480             :  * @adev: amdgpu_device pointer
     481             :  * @offset: byte aligned register offset
     482             :  *
     483             :  * Returns the 8 bit value from the offset specified.
     484             :  */
     485           0 : uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
     486             : {
     487           0 :         if (amdgpu_device_skip_hw_access(adev))
     488             :                 return 0;
     489             : 
     490           0 :         if (offset < adev->rmmio_size)
     491           0 :                 return (readb(adev->rmmio + offset));
     492           0 :         BUG();
     493             : }
     494             : 
     495             : /*
     496             :  * MMIO register write with bytes helper functions
     497             :  * @offset:bytes offset from MMIO start
     498             :  * @value: the value want to be written to the register
     499             :  *
     500             : */
     501             : /**
     502             :  * amdgpu_mm_wreg8 - read a memory mapped IO register
     503             :  *
     504             :  * @adev: amdgpu_device pointer
     505             :  * @offset: byte aligned register offset
     506             :  * @value: 8 bit value to write
     507             :  *
     508             :  * Writes the value specified to the offset specified.
     509             :  */
     510           0 : void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
     511             : {
     512           0 :         if (amdgpu_device_skip_hw_access(adev))
     513             :                 return;
     514             : 
     515           0 :         if (offset < adev->rmmio_size)
     516           0 :                 writeb(value, adev->rmmio + offset);
     517             :         else
     518           0 :                 BUG();
     519             : }
     520             : 
     521             : /**
     522             :  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
     523             :  *
     524             :  * @adev: amdgpu_device pointer
     525             :  * @reg: dword aligned register offset
     526             :  * @v: 32 bit value to write to the register
     527             :  * @acc_flags: access flags which require special behavior
     528             :  *
     529             :  * Writes the value specified to the offset specified.
     530             :  */
     531           0 : void amdgpu_device_wreg(struct amdgpu_device *adev,
     532             :                         uint32_t reg, uint32_t v,
     533             :                         uint32_t acc_flags)
     534             : {
     535           0 :         if (amdgpu_device_skip_hw_access(adev))
     536             :                 return;
     537             : 
     538           0 :         if ((reg * 4) < adev->rmmio_size) {
     539           0 :                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
     540           0 :                     amdgpu_sriov_runtime(adev) &&
     541           0 :                     down_read_trylock(&adev->reset_domain->sem)) {
     542           0 :                         amdgpu_kiq_wreg(adev, reg, v);
     543           0 :                         up_read(&adev->reset_domain->sem);
     544             :                 } else {
     545           0 :                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
     546             :                 }
     547             :         } else {
     548           0 :                 adev->pcie_wreg(adev, reg * 4, v);
     549             :         }
     550             : 
     551           0 :         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
     552             : }
     553             : 
     554             : /**
     555             :  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
     556             :  *
     557             :  * @adev: amdgpu_device pointer
     558             :  * @reg: mmio/rlc register
     559             :  * @v: value to write
     560             :  *
     561             :  * this function is invoked only for the debugfs register access
     562             :  */
     563           0 : void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
     564             :                              uint32_t reg, uint32_t v)
     565             : {
     566           0 :         if (amdgpu_device_skip_hw_access(adev))
     567             :                 return;
     568             : 
     569           0 :         if (amdgpu_sriov_fullaccess(adev) &&
     570           0 :             adev->gfx.rlc.funcs &&
     571           0 :             adev->gfx.rlc.funcs->is_rlcg_access_range) {
     572           0 :                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
     573           0 :                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
     574           0 :         } else if ((reg * 4) >= adev->rmmio_size) {
     575           0 :                 adev->pcie_wreg(adev, reg * 4, v);
     576             :         } else {
     577           0 :                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
     578             :         }
     579             : }
     580             : 
     581             : /**
     582             :  * amdgpu_mm_rdoorbell - read a doorbell dword
     583             :  *
     584             :  * @adev: amdgpu_device pointer
     585             :  * @index: doorbell index
     586             :  *
     587             :  * Returns the value in the doorbell aperture at the
     588             :  * requested doorbell index (CIK).
     589             :  */
     590           0 : u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
     591             : {
     592           0 :         if (amdgpu_device_skip_hw_access(adev))
     593             :                 return 0;
     594             : 
     595           0 :         if (index < adev->doorbell.num_doorbells) {
     596           0 :                 return readl(adev->doorbell.ptr + index);
     597             :         } else {
     598           0 :                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
     599           0 :                 return 0;
     600             :         }
     601             : }
     602             : 
     603             : /**
     604             :  * amdgpu_mm_wdoorbell - write a doorbell dword
     605             :  *
     606             :  * @adev: amdgpu_device pointer
     607             :  * @index: doorbell index
     608             :  * @v: value to write
     609             :  *
     610             :  * Writes @v to the doorbell aperture at the
     611             :  * requested doorbell index (CIK).
     612             :  */
     613           0 : void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
     614             : {
     615           0 :         if (amdgpu_device_skip_hw_access(adev))
     616             :                 return;
     617             : 
     618           0 :         if (index < adev->doorbell.num_doorbells) {
     619           0 :                 writel(v, adev->doorbell.ptr + index);
     620             :         } else {
     621           0 :                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
     622             :         }
     623             : }
     624             : 
     625             : /**
     626             :  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
     627             :  *
     628             :  * @adev: amdgpu_device pointer
     629             :  * @index: doorbell index
     630             :  *
     631             :  * Returns the value in the doorbell aperture at the
     632             :  * requested doorbell index (VEGA10+).
     633             :  */
     634           0 : u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
     635             : {
     636           0 :         if (amdgpu_device_skip_hw_access(adev))
     637             :                 return 0;
     638             : 
     639           0 :         if (index < adev->doorbell.num_doorbells) {
     640           0 :                 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
     641             :         } else {
     642           0 :                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
     643           0 :                 return 0;
     644             :         }
     645             : }
     646             : 
     647             : /**
     648             :  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
     649             :  *
     650             :  * @adev: amdgpu_device pointer
     651             :  * @index: doorbell index
     652             :  * @v: value to write
     653             :  *
     654             :  * Writes @v to the doorbell aperture at the
     655             :  * requested doorbell index (VEGA10+).
     656             :  */
     657           0 : void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
     658             : {
     659           0 :         if (amdgpu_device_skip_hw_access(adev))
     660             :                 return;
     661             : 
     662           0 :         if (index < adev->doorbell.num_doorbells) {
     663           0 :                 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
     664             :         } else {
     665           0 :                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
     666             :         }
     667             : }
     668             : 
     669             : /**
     670             :  * amdgpu_device_indirect_rreg - read an indirect register
     671             :  *
     672             :  * @adev: amdgpu_device pointer
     673             :  * @pcie_index: mmio register offset
     674             :  * @pcie_data: mmio register offset
     675             :  * @reg_addr: indirect register address to read from
     676             :  *
     677             :  * Returns the value of indirect register @reg_addr
     678             :  */
     679           0 : u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
     680             :                                 u32 pcie_index, u32 pcie_data,
     681             :                                 u32 reg_addr)
     682             : {
     683             :         unsigned long flags;
     684             :         u32 r;
     685             :         void __iomem *pcie_index_offset;
     686             :         void __iomem *pcie_data_offset;
     687             : 
     688           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
     689           0 :         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
     690           0 :         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
     691             : 
     692           0 :         writel(reg_addr, pcie_index_offset);
     693           0 :         readl(pcie_index_offset);
     694           0 :         r = readl(pcie_data_offset);
     695           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
     696             : 
     697           0 :         return r;
     698             : }
     699             : 
     700             : /**
     701             :  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
     702             :  *
     703             :  * @adev: amdgpu_device pointer
     704             :  * @pcie_index: mmio register offset
     705             :  * @pcie_data: mmio register offset
     706             :  * @reg_addr: indirect register address to read from
     707             :  *
     708             :  * Returns the value of indirect register @reg_addr
     709             :  */
     710           0 : u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
     711             :                                   u32 pcie_index, u32 pcie_data,
     712             :                                   u32 reg_addr)
     713             : {
     714             :         unsigned long flags;
     715             :         u64 r;
     716             :         void __iomem *pcie_index_offset;
     717             :         void __iomem *pcie_data_offset;
     718             : 
     719           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
     720           0 :         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
     721           0 :         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
     722             : 
     723             :         /* read low 32 bits */
     724           0 :         writel(reg_addr, pcie_index_offset);
     725           0 :         readl(pcie_index_offset);
     726           0 :         r = readl(pcie_data_offset);
     727             :         /* read high 32 bits */
     728           0 :         writel(reg_addr + 4, pcie_index_offset);
     729           0 :         readl(pcie_index_offset);
     730           0 :         r |= ((u64)readl(pcie_data_offset) << 32);
     731           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
     732             : 
     733           0 :         return r;
     734             : }
     735             : 
     736             : /**
     737             :  * amdgpu_device_indirect_wreg - write an indirect register address
     738             :  *
     739             :  * @adev: amdgpu_device pointer
     740             :  * @pcie_index: mmio register offset
     741             :  * @pcie_data: mmio register offset
     742             :  * @reg_addr: indirect register offset
     743             :  * @reg_data: indirect register data
     744             :  *
     745             :  */
     746           0 : void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
     747             :                                  u32 pcie_index, u32 pcie_data,
     748             :                                  u32 reg_addr, u32 reg_data)
     749             : {
     750             :         unsigned long flags;
     751             :         void __iomem *pcie_index_offset;
     752             :         void __iomem *pcie_data_offset;
     753             : 
     754           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
     755           0 :         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
     756           0 :         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
     757             : 
     758           0 :         writel(reg_addr, pcie_index_offset);
     759           0 :         readl(pcie_index_offset);
     760           0 :         writel(reg_data, pcie_data_offset);
     761           0 :         readl(pcie_data_offset);
     762           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
     763           0 : }
     764             : 
     765             : /**
     766             :  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
     767             :  *
     768             :  * @adev: amdgpu_device pointer
     769             :  * @pcie_index: mmio register offset
     770             :  * @pcie_data: mmio register offset
     771             :  * @reg_addr: indirect register offset
     772             :  * @reg_data: indirect register data
     773             :  *
     774             :  */
     775           0 : void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
     776             :                                    u32 pcie_index, u32 pcie_data,
     777             :                                    u32 reg_addr, u64 reg_data)
     778             : {
     779             :         unsigned long flags;
     780             :         void __iomem *pcie_index_offset;
     781             :         void __iomem *pcie_data_offset;
     782             : 
     783           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
     784           0 :         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
     785           0 :         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
     786             : 
     787             :         /* write low 32 bits */
     788           0 :         writel(reg_addr, pcie_index_offset);
     789           0 :         readl(pcie_index_offset);
     790           0 :         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
     791           0 :         readl(pcie_data_offset);
     792             :         /* write high 32 bits */
     793           0 :         writel(reg_addr + 4, pcie_index_offset);
     794           0 :         readl(pcie_index_offset);
     795           0 :         writel((u32)(reg_data >> 32), pcie_data_offset);
     796           0 :         readl(pcie_data_offset);
     797           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
     798           0 : }
     799             : 
     800             : /**
     801             :  * amdgpu_invalid_rreg - dummy reg read function
     802             :  *
     803             :  * @adev: amdgpu_device pointer
     804             :  * @reg: offset of register
     805             :  *
     806             :  * Dummy register read function.  Used for register blocks
     807             :  * that certain asics don't have (all asics).
     808             :  * Returns the value in the register.
     809             :  */
     810           0 : static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
     811             : {
     812           0 :         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
     813           0 :         BUG();
     814             :         return 0;
     815             : }
     816             : 
     817             : /**
     818             :  * amdgpu_invalid_wreg - dummy reg write function
     819             :  *
     820             :  * @adev: amdgpu_device pointer
     821             :  * @reg: offset of register
     822             :  * @v: value to write to the register
     823             :  *
     824             :  * Dummy register read function.  Used for register blocks
     825             :  * that certain asics don't have (all asics).
     826             :  */
     827           0 : static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
     828             : {
     829           0 :         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
     830             :                   reg, v);
     831           0 :         BUG();
     832             : }
     833             : 
     834             : /**
     835             :  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
     836             :  *
     837             :  * @adev: amdgpu_device pointer
     838             :  * @reg: offset of register
     839             :  *
     840             :  * Dummy register read function.  Used for register blocks
     841             :  * that certain asics don't have (all asics).
     842             :  * Returns the value in the register.
     843             :  */
     844           0 : static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
     845             : {
     846           0 :         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
     847           0 :         BUG();
     848             :         return 0;
     849             : }
     850             : 
     851             : /**
     852             :  * amdgpu_invalid_wreg64 - dummy reg write function
     853             :  *
     854             :  * @adev: amdgpu_device pointer
     855             :  * @reg: offset of register
     856             :  * @v: value to write to the register
     857             :  *
     858             :  * Dummy register read function.  Used for register blocks
     859             :  * that certain asics don't have (all asics).
     860             :  */
     861           0 : static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
     862             : {
     863           0 :         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
     864             :                   reg, v);
     865           0 :         BUG();
     866             : }
     867             : 
     868             : /**
     869             :  * amdgpu_block_invalid_rreg - dummy reg read function
     870             :  *
     871             :  * @adev: amdgpu_device pointer
     872             :  * @block: offset of instance
     873             :  * @reg: offset of register
     874             :  *
     875             :  * Dummy register read function.  Used for register blocks
     876             :  * that certain asics don't have (all asics).
     877             :  * Returns the value in the register.
     878             :  */
     879           0 : static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
     880             :                                           uint32_t block, uint32_t reg)
     881             : {
     882           0 :         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
     883             :                   reg, block);
     884           0 :         BUG();
     885             :         return 0;
     886             : }
     887             : 
     888             : /**
     889             :  * amdgpu_block_invalid_wreg - dummy reg write function
     890             :  *
     891             :  * @adev: amdgpu_device pointer
     892             :  * @block: offset of instance
     893             :  * @reg: offset of register
     894             :  * @v: value to write to the register
     895             :  *
     896             :  * Dummy register read function.  Used for register blocks
     897             :  * that certain asics don't have (all asics).
     898             :  */
     899           0 : static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
     900             :                                       uint32_t block,
     901             :                                       uint32_t reg, uint32_t v)
     902             : {
     903           0 :         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
     904             :                   reg, block, v);
     905           0 :         BUG();
     906             : }
     907             : 
     908             : /**
     909             :  * amdgpu_device_asic_init - Wrapper for atom asic_init
     910             :  *
     911             :  * @adev: amdgpu_device pointer
     912             :  *
     913             :  * Does any asic specific work and then calls atom asic init.
     914             :  */
     915           0 : static int amdgpu_device_asic_init(struct amdgpu_device *adev)
     916             : {
     917           0 :         amdgpu_asic_pre_asic_init(adev);
     918             : 
     919           0 :         if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
     920           0 :                 return amdgpu_atomfirmware_asic_init(adev, true);
     921             :         else
     922           0 :                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
     923             : }
     924             : 
     925             : /**
     926             :  * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
     927             :  *
     928             :  * @adev: amdgpu_device pointer
     929             :  *
     930             :  * Allocates a scratch page of VRAM for use by various things in the
     931             :  * driver.
     932             :  */
     933             : static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
     934             : {
     935           0 :         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
     936             :                                        PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
     937             :                                        &adev->vram_scratch.robj,
     938             :                                        &adev->vram_scratch.gpu_addr,
     939           0 :                                        (void **)&adev->vram_scratch.ptr);
     940             : }
     941             : 
     942             : /**
     943             :  * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
     944             :  *
     945             :  * @adev: amdgpu_device pointer
     946             :  *
     947             :  * Frees the VRAM scratch page.
     948             :  */
     949             : static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
     950             : {
     951           0 :         amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
     952             : }
     953             : 
     954             : /**
     955             :  * amdgpu_device_program_register_sequence - program an array of registers.
     956             :  *
     957             :  * @adev: amdgpu_device pointer
     958             :  * @registers: pointer to the register array
     959             :  * @array_size: size of the register array
     960             :  *
     961             :  * Programs an array or registers with and and or masks.
     962             :  * This is a helper for setting golden registers.
     963             :  */
     964           0 : void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
     965             :                                              const u32 *registers,
     966             :                                              const u32 array_size)
     967             : {
     968             :         u32 tmp, reg, and_mask, or_mask;
     969             :         int i;
     970             : 
     971           0 :         if (array_size % 3)
     972             :                 return;
     973             : 
     974           0 :         for (i = 0; i < array_size; i +=3) {
     975           0 :                 reg = registers[i + 0];
     976           0 :                 and_mask = registers[i + 1];
     977           0 :                 or_mask = registers[i + 2];
     978             : 
     979           0 :                 if (and_mask == 0xffffffff) {
     980             :                         tmp = or_mask;
     981             :                 } else {
     982           0 :                         tmp = RREG32(reg);
     983           0 :                         tmp &= ~and_mask;
     984           0 :                         if (adev->family >= AMDGPU_FAMILY_AI)
     985           0 :                                 tmp |= (or_mask & and_mask);
     986             :                         else
     987           0 :                                 tmp |= or_mask;
     988             :                 }
     989           0 :                 WREG32(reg, tmp);
     990             :         }
     991             : }
     992             : 
     993             : /**
     994             :  * amdgpu_device_pci_config_reset - reset the GPU
     995             :  *
     996             :  * @adev: amdgpu_device pointer
     997             :  *
     998             :  * Resets the GPU using the pci config reset sequence.
     999             :  * Only applicable to asics prior to vega10.
    1000             :  */
    1001           0 : void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
    1002             : {
    1003           0 :         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
    1004           0 : }
    1005             : 
    1006             : /**
    1007             :  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
    1008             :  *
    1009             :  * @adev: amdgpu_device pointer
    1010             :  *
    1011             :  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
    1012             :  */
    1013           0 : int amdgpu_device_pci_reset(struct amdgpu_device *adev)
    1014             : {
    1015           0 :         return pci_reset_function(adev->pdev);
    1016             : }
    1017             : 
    1018             : /*
    1019             :  * GPU doorbell aperture helpers function.
    1020             :  */
    1021             : /**
    1022             :  * amdgpu_device_doorbell_init - Init doorbell driver information.
    1023             :  *
    1024             :  * @adev: amdgpu_device pointer
    1025             :  *
    1026             :  * Init doorbell driver information (CIK)
    1027             :  * Returns 0 on success, error on failure.
    1028             :  */
    1029           0 : static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
    1030             : {
    1031             : 
    1032             :         /* No doorbell on SI hardware generation */
    1033           0 :         if (adev->asic_type < CHIP_BONAIRE) {
    1034           0 :                 adev->doorbell.base = 0;
    1035           0 :                 adev->doorbell.size = 0;
    1036           0 :                 adev->doorbell.num_doorbells = 0;
    1037           0 :                 adev->doorbell.ptr = NULL;
    1038           0 :                 return 0;
    1039             :         }
    1040             : 
    1041           0 :         if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
    1042             :                 return -EINVAL;
    1043             : 
    1044           0 :         amdgpu_asic_init_doorbell_index(adev);
    1045             : 
    1046             :         /* doorbell bar mapping */
    1047           0 :         adev->doorbell.base = pci_resource_start(adev->pdev, 2);
    1048           0 :         adev->doorbell.size = pci_resource_len(adev->pdev, 2);
    1049             : 
    1050           0 :         if (adev->enable_mes) {
    1051           0 :                 adev->doorbell.num_doorbells =
    1052           0 :                         adev->doorbell.size / sizeof(u32);
    1053             :         } else {
    1054           0 :                 adev->doorbell.num_doorbells =
    1055           0 :                         min_t(u32, adev->doorbell.size / sizeof(u32),
    1056             :                               adev->doorbell_index.max_assignment+1);
    1057           0 :                 if (adev->doorbell.num_doorbells == 0)
    1058             :                         return -EINVAL;
    1059             : 
    1060             :                 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
    1061             :                  * paging queue doorbell use the second page. The
    1062             :                  * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
    1063             :                  * doorbells are in the first page. So with paging queue enabled,
    1064             :                  * the max num_doorbells should + 1 page (0x400 in dword)
    1065             :                  */
    1066           0 :                 if (adev->asic_type >= CHIP_VEGA10)
    1067           0 :                         adev->doorbell.num_doorbells += 0x400;
    1068             :         }
    1069             : 
    1070           0 :         adev->doorbell.ptr = ioremap(adev->doorbell.base,
    1071           0 :                                      adev->doorbell.num_doorbells *
    1072             :                                      sizeof(u32));
    1073           0 :         if (adev->doorbell.ptr == NULL)
    1074             :                 return -ENOMEM;
    1075             : 
    1076           0 :         return 0;
    1077             : }
    1078             : 
    1079             : /**
    1080             :  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
    1081             :  *
    1082             :  * @adev: amdgpu_device pointer
    1083             :  *
    1084             :  * Tear down doorbell driver information (CIK)
    1085             :  */
    1086             : static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
    1087             : {
    1088           0 :         iounmap(adev->doorbell.ptr);
    1089           0 :         adev->doorbell.ptr = NULL;
    1090             : }
    1091             : 
    1092             : 
    1093             : 
    1094             : /*
    1095             :  * amdgpu_device_wb_*()
    1096             :  * Writeback is the method by which the GPU updates special pages in memory
    1097             :  * with the status of certain GPU events (fences, ring pointers,etc.).
    1098             :  */
    1099             : 
    1100             : /**
    1101             :  * amdgpu_device_wb_fini - Disable Writeback and free memory
    1102             :  *
    1103             :  * @adev: amdgpu_device pointer
    1104             :  *
    1105             :  * Disables Writeback and frees the Writeback memory (all asics).
    1106             :  * Used at driver shutdown.
    1107             :  */
    1108             : static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
    1109             : {
    1110           0 :         if (adev->wb.wb_obj) {
    1111           0 :                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
    1112           0 :                                       &adev->wb.gpu_addr,
    1113           0 :                                       (void **)&adev->wb.wb);
    1114           0 :                 adev->wb.wb_obj = NULL;
    1115             :         }
    1116             : }
    1117             : 
    1118             : /**
    1119             :  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
    1120             :  *
    1121             :  * @adev: amdgpu_device pointer
    1122             :  *
    1123             :  * Initializes writeback and allocates writeback memory (all asics).
    1124             :  * Used at driver startup.
    1125             :  * Returns 0 on success or an -error on failure.
    1126             :  */
    1127           0 : static int amdgpu_device_wb_init(struct amdgpu_device *adev)
    1128             : {
    1129             :         int r;
    1130             : 
    1131           0 :         if (adev->wb.wb_obj == NULL) {
    1132             :                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
    1133           0 :                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
    1134             :                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    1135           0 :                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
    1136           0 :                                             (void **)&adev->wb.wb);
    1137           0 :                 if (r) {
    1138           0 :                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
    1139           0 :                         return r;
    1140             :                 }
    1141             : 
    1142           0 :                 adev->wb.num_wb = AMDGPU_MAX_WB;
    1143           0 :                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
    1144             : 
    1145             :                 /* clear wb memory */
    1146           0 :                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
    1147             :         }
    1148             : 
    1149             :         return 0;
    1150             : }
    1151             : 
    1152             : /**
    1153             :  * amdgpu_device_wb_get - Allocate a wb entry
    1154             :  *
    1155             :  * @adev: amdgpu_device pointer
    1156             :  * @wb: wb index
    1157             :  *
    1158             :  * Allocate a wb slot for use by the driver (all asics).
    1159             :  * Returns 0 on success or -EINVAL on failure.
    1160             :  */
    1161           0 : int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
    1162             : {
    1163           0 :         unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
    1164             : 
    1165           0 :         if (offset < adev->wb.num_wb) {
    1166           0 :                 __set_bit(offset, adev->wb.used);
    1167           0 :                 *wb = offset << 3; /* convert to dw offset */
    1168           0 :                 return 0;
    1169             :         } else {
    1170             :                 return -EINVAL;
    1171             :         }
    1172             : }
    1173             : 
    1174             : /**
    1175             :  * amdgpu_device_wb_free - Free a wb entry
    1176             :  *
    1177             :  * @adev: amdgpu_device pointer
    1178             :  * @wb: wb index
    1179             :  *
    1180             :  * Free a wb slot allocated for use by the driver (all asics)
    1181             :  */
    1182           0 : void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
    1183             : {
    1184           0 :         wb >>= 3;
    1185           0 :         if (wb < adev->wb.num_wb)
    1186           0 :                 __clear_bit(wb, adev->wb.used);
    1187           0 : }
    1188             : 
    1189             : /**
    1190             :  * amdgpu_device_resize_fb_bar - try to resize FB BAR
    1191             :  *
    1192             :  * @adev: amdgpu_device pointer
    1193             :  *
    1194             :  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
    1195             :  * to fail, but if any of the BARs is not accessible after the size we abort
    1196             :  * driver loading by returning -ENODEV.
    1197             :  */
    1198           0 : int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
    1199             : {
    1200           0 :         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
    1201             :         struct pci_bus *root;
    1202             :         struct resource *res;
    1203             :         unsigned i;
    1204             :         u16 cmd;
    1205             :         int r;
    1206             : 
    1207             :         /* Bypass for VF */
    1208           0 :         if (amdgpu_sriov_vf(adev))
    1209             :                 return 0;
    1210             : 
    1211             :         /* skip if the bios has already enabled large BAR */
    1212           0 :         if (adev->gmc.real_vram_size &&
    1213           0 :             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
    1214             :                 return 0;
    1215             : 
    1216             :         /* Check if the root BUS has 64bit memory resources */
    1217           0 :         root = adev->pdev->bus;
    1218           0 :         while (root->parent)
    1219             :                 root = root->parent;
    1220             : 
    1221           0 :         pci_bus_for_each_resource(root, res, i) {
    1222           0 :                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
    1223           0 :                     res->start > 0x100000000ull)
    1224             :                         break;
    1225             :         }
    1226             : 
    1227             :         /* Trying to resize is pointless without a root hub window above 4GB */
    1228           0 :         if (!res)
    1229             :                 return 0;
    1230             : 
    1231             :         /* Limit the BAR size to what is available */
    1232           0 :         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
    1233             :                         rbar_size);
    1234             : 
    1235             :         /* Disable memory decoding while we change the BAR addresses and size */
    1236           0 :         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
    1237           0 :         pci_write_config_word(adev->pdev, PCI_COMMAND,
    1238           0 :                               cmd & ~PCI_COMMAND_MEMORY);
    1239             : 
    1240             :         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
    1241           0 :         amdgpu_device_doorbell_fini(adev);
    1242           0 :         if (adev->asic_type >= CHIP_BONAIRE)
    1243           0 :                 pci_release_resource(adev->pdev, 2);
    1244             : 
    1245           0 :         pci_release_resource(adev->pdev, 0);
    1246             : 
    1247           0 :         r = pci_resize_resource(adev->pdev, 0, rbar_size);
    1248           0 :         if (r == -ENOSPC)
    1249           0 :                 DRM_INFO("Not enough PCI address space for a large BAR.");
    1250           0 :         else if (r && r != -ENOTSUPP)
    1251           0 :                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
    1252             : 
    1253           0 :         pci_assign_unassigned_bus_resources(adev->pdev->bus);
    1254             : 
    1255             :         /* When the doorbell or fb BAR isn't available we have no chance of
    1256             :          * using the device.
    1257             :          */
    1258           0 :         r = amdgpu_device_doorbell_init(adev);
    1259           0 :         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
    1260             :                 return -ENODEV;
    1261             : 
    1262           0 :         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
    1263             : 
    1264           0 :         return 0;
    1265             : }
    1266             : 
    1267             : /*
    1268             :  * GPU helpers function.
    1269             :  */
    1270             : /**
    1271             :  * amdgpu_device_need_post - check if the hw need post or not
    1272             :  *
    1273             :  * @adev: amdgpu_device pointer
    1274             :  *
    1275             :  * Check if the asic has been initialized (all asics) at driver startup
    1276             :  * or post is needed if  hw reset is performed.
    1277             :  * Returns true if need or false if not.
    1278             :  */
    1279           0 : bool amdgpu_device_need_post(struct amdgpu_device *adev)
    1280             : {
    1281             :         uint32_t reg;
    1282             : 
    1283           0 :         if (amdgpu_sriov_vf(adev))
    1284             :                 return false;
    1285             : 
    1286           0 :         if (amdgpu_passthrough(adev)) {
    1287             :                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
    1288             :                  * some old smc fw still need driver do vPost otherwise gpu hang, while
    1289             :                  * those smc fw version above 22.15 doesn't have this flaw, so we force
    1290             :                  * vpost executed for smc version below 22.15
    1291             :                  */
    1292           0 :                 if (adev->asic_type == CHIP_FIJI) {
    1293             :                         int err;
    1294             :                         uint32_t fw_ver;
    1295           0 :                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
    1296             :                         /* force vPost if error occured */
    1297           0 :                         if (err)
    1298             :                                 return true;
    1299             : 
    1300           0 :                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
    1301           0 :                         if (fw_ver < 0x00160e00)
    1302             :                                 return true;
    1303             :                 }
    1304             :         }
    1305             : 
    1306             :         /* Don't post if we need to reset whole hive on init */
    1307           0 :         if (adev->gmc.xgmi.pending_reset)
    1308             :                 return false;
    1309             : 
    1310           0 :         if (adev->has_hw_reset) {
    1311           0 :                 adev->has_hw_reset = false;
    1312           0 :                 return true;
    1313             :         }
    1314             : 
    1315             :         /* bios scratch used on CIK+ */
    1316           0 :         if (adev->asic_type >= CHIP_BONAIRE)
    1317           0 :                 return amdgpu_atombios_scratch_need_asic_init(adev);
    1318             : 
    1319             :         /* check MEM_SIZE for older asics */
    1320           0 :         reg = amdgpu_asic_get_config_memsize(adev);
    1321             : 
    1322           0 :         if ((reg != 0) && (reg != 0xffffffff))
    1323             :                 return false;
    1324             : 
    1325           0 :         return true;
    1326             : }
    1327             : 
    1328             : /**
    1329             :  * amdgpu_device_should_use_aspm - check if the device should program ASPM
    1330             :  *
    1331             :  * @adev: amdgpu_device pointer
    1332             :  *
    1333             :  * Confirm whether the module parameter and pcie bridge agree that ASPM should
    1334             :  * be set for this device.
    1335             :  *
    1336             :  * Returns true if it should be used or false if not.
    1337             :  */
    1338           0 : bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
    1339             : {
    1340           0 :         switch (amdgpu_aspm) {
    1341             :         case -1:
    1342             :                 break;
    1343             :         case 0:
    1344             :                 return false;
    1345             :         case 1:
    1346           0 :                 return true;
    1347             :         default:
    1348             :                 return false;
    1349             :         }
    1350           0 :         return pcie_aspm_enabled(adev->pdev);
    1351             : }
    1352             : 
    1353             : /* if we get transitioned to only one device, take VGA back */
    1354             : /**
    1355             :  * amdgpu_device_vga_set_decode - enable/disable vga decode
    1356             :  *
    1357             :  * @pdev: PCI device pointer
    1358             :  * @state: enable/disable vga decode
    1359             :  *
    1360             :  * Enable/disable vga decode (all asics).
    1361             :  * Returns VGA resource flags.
    1362             :  */
    1363           0 : static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
    1364             :                 bool state)
    1365             : {
    1366           0 :         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
    1367           0 :         amdgpu_asic_set_vga_state(adev, state);
    1368           0 :         if (state)
    1369             :                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
    1370             :                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
    1371             :         else
    1372           0 :                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
    1373             : }
    1374             : 
    1375             : /**
    1376             :  * amdgpu_device_check_block_size - validate the vm block size
    1377             :  *
    1378             :  * @adev: amdgpu_device pointer
    1379             :  *
    1380             :  * Validates the vm block size specified via module parameter.
    1381             :  * The vm block size defines number of bits in page table versus page directory,
    1382             :  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
    1383             :  * page table and the remaining bits are in the page directory.
    1384             :  */
    1385           0 : static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
    1386             : {
    1387             :         /* defines number of bits in page table versus page directory,
    1388             :          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
    1389             :          * page table and the remaining bits are in the page directory */
    1390           0 :         if (amdgpu_vm_block_size == -1)
    1391             :                 return;
    1392             : 
    1393           0 :         if (amdgpu_vm_block_size < 9) {
    1394           0 :                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
    1395             :                          amdgpu_vm_block_size);
    1396           0 :                 amdgpu_vm_block_size = -1;
    1397             :         }
    1398             : }
    1399             : 
    1400             : /**
    1401             :  * amdgpu_device_check_vm_size - validate the vm size
    1402             :  *
    1403             :  * @adev: amdgpu_device pointer
    1404             :  *
    1405             :  * Validates the vm size in GB specified via module parameter.
    1406             :  * The VM size is the size of the GPU virtual memory space in GB.
    1407             :  */
    1408           0 : static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
    1409             : {
    1410             :         /* no need to check the default value */
    1411           0 :         if (amdgpu_vm_size == -1)
    1412             :                 return;
    1413             : 
    1414           0 :         if (amdgpu_vm_size < 1) {
    1415           0 :                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
    1416             :                          amdgpu_vm_size);
    1417           0 :                 amdgpu_vm_size = -1;
    1418             :         }
    1419             : }
    1420             : 
    1421           0 : static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
    1422             : {
    1423             :         struct sysinfo si;
    1424           0 :         bool is_os_64 = (sizeof(void *) == 8);
    1425             :         uint64_t total_memory;
    1426           0 :         uint64_t dram_size_seven_GB = 0x1B8000000;
    1427           0 :         uint64_t dram_size_three_GB = 0xB8000000;
    1428             : 
    1429           0 :         if (amdgpu_smu_memory_pool_size == 0)
    1430           0 :                 return;
    1431             : 
    1432             :         if (!is_os_64) {
    1433             :                 DRM_WARN("Not 64-bit OS, feature not supported\n");
    1434             :                 goto def_value;
    1435             :         }
    1436           0 :         si_meminfo(&si);
    1437           0 :         total_memory = (uint64_t)si.totalram * si.mem_unit;
    1438             : 
    1439           0 :         if ((amdgpu_smu_memory_pool_size == 1) ||
    1440             :                 (amdgpu_smu_memory_pool_size == 2)) {
    1441           0 :                 if (total_memory < dram_size_three_GB)
    1442             :                         goto def_value1;
    1443           0 :         } else if ((amdgpu_smu_memory_pool_size == 4) ||
    1444             :                 (amdgpu_smu_memory_pool_size == 8)) {
    1445           0 :                 if (total_memory < dram_size_seven_GB)
    1446             :                         goto def_value1;
    1447             :         } else {
    1448           0 :                 DRM_WARN("Smu memory pool size not supported\n");
    1449             :                 goto def_value;
    1450             :         }
    1451           0 :         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
    1452             : 
    1453             :         return;
    1454             : 
    1455             : def_value1:
    1456           0 :         DRM_WARN("No enough system memory\n");
    1457             : def_value:
    1458           0 :         adev->pm.smu_prv_buffer_size = 0;
    1459             : }
    1460             : 
    1461           0 : static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
    1462             : {
    1463           0 :         if (!(adev->flags & AMD_IS_APU) ||
    1464           0 :             adev->asic_type < CHIP_RAVEN)
    1465             :                 return 0;
    1466             : 
    1467           0 :         switch (adev->asic_type) {
    1468             :         case CHIP_RAVEN:
    1469           0 :                 if (adev->pdev->device == 0x15dd)
    1470           0 :                         adev->apu_flags |= AMD_APU_IS_RAVEN;
    1471           0 :                 if (adev->pdev->device == 0x15d8)
    1472           0 :                         adev->apu_flags |= AMD_APU_IS_PICASSO;
    1473             :                 break;
    1474             :         case CHIP_RENOIR:
    1475           0 :                 if ((adev->pdev->device == 0x1636) ||
    1476             :                     (adev->pdev->device == 0x164c))
    1477           0 :                         adev->apu_flags |= AMD_APU_IS_RENOIR;
    1478             :                 else
    1479           0 :                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
    1480             :                 break;
    1481             :         case CHIP_VANGOGH:
    1482           0 :                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
    1483           0 :                 break;
    1484             :         case CHIP_YELLOW_CARP:
    1485             :                 break;
    1486             :         case CHIP_CYAN_SKILLFISH:
    1487           0 :                 if ((adev->pdev->device == 0x13FE) ||
    1488             :                     (adev->pdev->device == 0x143F))
    1489           0 :                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
    1490             :                 break;
    1491             :         default:
    1492             :                 break;
    1493             :         }
    1494             : 
    1495             :         return 0;
    1496             : }
    1497             : 
    1498             : /**
    1499             :  * amdgpu_device_check_arguments - validate module params
    1500             :  *
    1501             :  * @adev: amdgpu_device pointer
    1502             :  *
    1503             :  * Validates certain module parameters and updates
    1504             :  * the associated values used by the driver (all asics).
    1505             :  */
    1506           0 : static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
    1507             : {
    1508           0 :         if (amdgpu_sched_jobs < 4) {
    1509           0 :                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
    1510             :                          amdgpu_sched_jobs);
    1511           0 :                 amdgpu_sched_jobs = 4;
    1512           0 :         } else if (!is_power_of_2(amdgpu_sched_jobs)){
    1513           0 :                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
    1514             :                          amdgpu_sched_jobs);
    1515           0 :                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
    1516             :         }
    1517             : 
    1518           0 :         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
    1519             :                 /* gart size must be greater or equal to 32M */
    1520           0 :                 dev_warn(adev->dev, "gart size (%d) too small\n",
    1521             :                          amdgpu_gart_size);
    1522           0 :                 amdgpu_gart_size = -1;
    1523             :         }
    1524             : 
    1525           0 :         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
    1526             :                 /* gtt size must be greater or equal to 32M */
    1527           0 :                 dev_warn(adev->dev, "gtt size (%d) too small\n",
    1528             :                                  amdgpu_gtt_size);
    1529           0 :                 amdgpu_gtt_size = -1;
    1530             :         }
    1531             : 
    1532             :         /* valid range is between 4 and 9 inclusive */
    1533           0 :         if (amdgpu_vm_fragment_size != -1 &&
    1534             :             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
    1535           0 :                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
    1536           0 :                 amdgpu_vm_fragment_size = -1;
    1537             :         }
    1538             : 
    1539           0 :         if (amdgpu_sched_hw_submission < 2) {
    1540           0 :                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
    1541             :                          amdgpu_sched_hw_submission);
    1542           0 :                 amdgpu_sched_hw_submission = 2;
    1543           0 :         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
    1544           0 :                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
    1545             :                          amdgpu_sched_hw_submission);
    1546           0 :                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
    1547             :         }
    1548             : 
    1549           0 :         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
    1550           0 :                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
    1551           0 :                 amdgpu_reset_method = -1;
    1552             :         }
    1553             : 
    1554           0 :         amdgpu_device_check_smu_prv_buffer_size(adev);
    1555             : 
    1556           0 :         amdgpu_device_check_vm_size(adev);
    1557             : 
    1558           0 :         amdgpu_device_check_block_size(adev);
    1559             : 
    1560           0 :         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
    1561             : 
    1562           0 :         return 0;
    1563             : }
    1564             : 
    1565             : /**
    1566             :  * amdgpu_switcheroo_set_state - set switcheroo state
    1567             :  *
    1568             :  * @pdev: pci dev pointer
    1569             :  * @state: vga_switcheroo state
    1570             :  *
    1571             :  * Callback for the switcheroo driver.  Suspends or resumes the
    1572             :  * the asics before or after it is powered up using ACPI methods.
    1573             :  */
    1574             : static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
    1575             :                                         enum vga_switcheroo_state state)
    1576             : {
    1577             :         struct drm_device *dev = pci_get_drvdata(pdev);
    1578             :         int r;
    1579             : 
    1580             :         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
    1581             :                 return;
    1582             : 
    1583             :         if (state == VGA_SWITCHEROO_ON) {
    1584             :                 pr_info("switched on\n");
    1585             :                 /* don't suspend or resume card normally */
    1586             :                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
    1587             : 
    1588             :                 pci_set_power_state(pdev, PCI_D0);
    1589             :                 amdgpu_device_load_pci_state(pdev);
    1590             :                 r = pci_enable_device(pdev);
    1591             :                 if (r)
    1592             :                         DRM_WARN("pci_enable_device failed (%d)\n", r);
    1593             :                 amdgpu_device_resume(dev, true);
    1594             : 
    1595             :                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
    1596             :         } else {
    1597             :                 pr_info("switched off\n");
    1598             :                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
    1599             :                 amdgpu_device_suspend(dev, true);
    1600             :                 amdgpu_device_cache_pci_state(pdev);
    1601             :                 /* Shut down the device */
    1602             :                 pci_disable_device(pdev);
    1603             :                 pci_set_power_state(pdev, PCI_D3cold);
    1604             :                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
    1605             :         }
    1606             : }
    1607             : 
    1608             : /**
    1609             :  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
    1610             :  *
    1611             :  * @pdev: pci dev pointer
    1612             :  *
    1613             :  * Callback for the switcheroo driver.  Check of the switcheroo
    1614             :  * state can be changed.
    1615             :  * Returns true if the state can be changed, false if not.
    1616             :  */
    1617             : static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
    1618             : {
    1619             :         struct drm_device *dev = pci_get_drvdata(pdev);
    1620             : 
    1621             :         /*
    1622             :         * FIXME: open_count is protected by drm_global_mutex but that would lead to
    1623             :         * locking inversion with the driver load path. And the access here is
    1624             :         * completely racy anyway. So don't bother with locking for now.
    1625             :         */
    1626             :         return atomic_read(&dev->open_count) == 0;
    1627             : }
    1628             : 
    1629             : static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
    1630             :         .set_gpu_state = amdgpu_switcheroo_set_state,
    1631             :         .reprobe = NULL,
    1632             :         .can_switch = amdgpu_switcheroo_can_switch,
    1633             : };
    1634             : 
    1635             : /**
    1636             :  * amdgpu_device_ip_set_clockgating_state - set the CG state
    1637             :  *
    1638             :  * @dev: amdgpu_device pointer
    1639             :  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
    1640             :  * @state: clockgating state (gate or ungate)
    1641             :  *
    1642             :  * Sets the requested clockgating state for all instances of
    1643             :  * the hardware IP specified.
    1644             :  * Returns the error code from the last instance.
    1645             :  */
    1646           0 : int amdgpu_device_ip_set_clockgating_state(void *dev,
    1647             :                                            enum amd_ip_block_type block_type,
    1648             :                                            enum amd_clockgating_state state)
    1649             : {
    1650           0 :         struct amdgpu_device *adev = dev;
    1651           0 :         int i, r = 0;
    1652             : 
    1653           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    1654           0 :                 if (!adev->ip_blocks[i].status.valid)
    1655           0 :                         continue;
    1656           0 :                 if (adev->ip_blocks[i].version->type != block_type)
    1657           0 :                         continue;
    1658           0 :                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
    1659           0 :                         continue;
    1660           0 :                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
    1661             :                         (void *)adev, state);
    1662           0 :                 if (r)
    1663           0 :                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
    1664             :                                   adev->ip_blocks[i].version->funcs->name, r);
    1665             :         }
    1666           0 :         return r;
    1667             : }
    1668             : 
    1669             : /**
    1670             :  * amdgpu_device_ip_set_powergating_state - set the PG state
    1671             :  *
    1672             :  * @dev: amdgpu_device pointer
    1673             :  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
    1674             :  * @state: powergating state (gate or ungate)
    1675             :  *
    1676             :  * Sets the requested powergating state for all instances of
    1677             :  * the hardware IP specified.
    1678             :  * Returns the error code from the last instance.
    1679             :  */
    1680           0 : int amdgpu_device_ip_set_powergating_state(void *dev,
    1681             :                                            enum amd_ip_block_type block_type,
    1682             :                                            enum amd_powergating_state state)
    1683             : {
    1684           0 :         struct amdgpu_device *adev = dev;
    1685           0 :         int i, r = 0;
    1686             : 
    1687           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    1688           0 :                 if (!adev->ip_blocks[i].status.valid)
    1689           0 :                         continue;
    1690           0 :                 if (adev->ip_blocks[i].version->type != block_type)
    1691           0 :                         continue;
    1692           0 :                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
    1693           0 :                         continue;
    1694           0 :                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
    1695             :                         (void *)adev, state);
    1696           0 :                 if (r)
    1697           0 :                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
    1698             :                                   adev->ip_blocks[i].version->funcs->name, r);
    1699             :         }
    1700           0 :         return r;
    1701             : }
    1702             : 
    1703             : /**
    1704             :  * amdgpu_device_ip_get_clockgating_state - get the CG state
    1705             :  *
    1706             :  * @adev: amdgpu_device pointer
    1707             :  * @flags: clockgating feature flags
    1708             :  *
    1709             :  * Walks the list of IPs on the device and updates the clockgating
    1710             :  * flags for each IP.
    1711             :  * Updates @flags with the feature flags for each hardware IP where
    1712             :  * clockgating is enabled.
    1713             :  */
    1714           0 : void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
    1715             :                                             u64 *flags)
    1716             : {
    1717             :         int i;
    1718             : 
    1719           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    1720           0 :                 if (!adev->ip_blocks[i].status.valid)
    1721           0 :                         continue;
    1722           0 :                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
    1723           0 :                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
    1724             :         }
    1725           0 : }
    1726             : 
    1727             : /**
    1728             :  * amdgpu_device_ip_wait_for_idle - wait for idle
    1729             :  *
    1730             :  * @adev: amdgpu_device pointer
    1731             :  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
    1732             :  *
    1733             :  * Waits for the request hardware IP to be idle.
    1734             :  * Returns 0 for success or a negative error code on failure.
    1735             :  */
    1736           0 : int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
    1737             :                                    enum amd_ip_block_type block_type)
    1738             : {
    1739             :         int i, r;
    1740             : 
    1741           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    1742           0 :                 if (!adev->ip_blocks[i].status.valid)
    1743           0 :                         continue;
    1744           0 :                 if (adev->ip_blocks[i].version->type == block_type) {
    1745           0 :                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
    1746           0 :                         if (r)
    1747             :                                 return r;
    1748             :                         break;
    1749             :                 }
    1750             :         }
    1751             :         return 0;
    1752             : 
    1753             : }
    1754             : 
    1755             : /**
    1756             :  * amdgpu_device_ip_is_idle - is the hardware IP idle
    1757             :  *
    1758             :  * @adev: amdgpu_device pointer
    1759             :  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
    1760             :  *
    1761             :  * Check if the hardware IP is idle or not.
    1762             :  * Returns true if it the IP is idle, false if not.
    1763             :  */
    1764           0 : bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
    1765             :                               enum amd_ip_block_type block_type)
    1766             : {
    1767             :         int i;
    1768             : 
    1769           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    1770           0 :                 if (!adev->ip_blocks[i].status.valid)
    1771           0 :                         continue;
    1772           0 :                 if (adev->ip_blocks[i].version->type == block_type)
    1773           0 :                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
    1774             :         }
    1775             :         return true;
    1776             : 
    1777             : }
    1778             : 
    1779             : /**
    1780             :  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
    1781             :  *
    1782             :  * @adev: amdgpu_device pointer
    1783             :  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
    1784             :  *
    1785             :  * Returns a pointer to the hardware IP block structure
    1786             :  * if it exists for the asic, otherwise NULL.
    1787             :  */
    1788             : struct amdgpu_ip_block *
    1789           0 : amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
    1790             :                               enum amd_ip_block_type type)
    1791             : {
    1792             :         int i;
    1793             : 
    1794           0 :         for (i = 0; i < adev->num_ip_blocks; i++)
    1795           0 :                 if (adev->ip_blocks[i].version->type == type)
    1796           0 :                         return &adev->ip_blocks[i];
    1797             : 
    1798             :         return NULL;
    1799             : }
    1800             : 
    1801             : /**
    1802             :  * amdgpu_device_ip_block_version_cmp
    1803             :  *
    1804             :  * @adev: amdgpu_device pointer
    1805             :  * @type: enum amd_ip_block_type
    1806             :  * @major: major version
    1807             :  * @minor: minor version
    1808             :  *
    1809             :  * return 0 if equal or greater
    1810             :  * return 1 if smaller or the ip_block doesn't exist
    1811             :  */
    1812           0 : int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
    1813             :                                        enum amd_ip_block_type type,
    1814             :                                        u32 major, u32 minor)
    1815             : {
    1816           0 :         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
    1817             : 
    1818           0 :         if (ip_block && ((ip_block->version->major > major) ||
    1819           0 :                         ((ip_block->version->major == major) &&
    1820           0 :                         (ip_block->version->minor >= minor))))
    1821             :                 return 0;
    1822             : 
    1823           0 :         return 1;
    1824             : }
    1825             : 
    1826             : /**
    1827             :  * amdgpu_device_ip_block_add
    1828             :  *
    1829             :  * @adev: amdgpu_device pointer
    1830             :  * @ip_block_version: pointer to the IP to add
    1831             :  *
    1832             :  * Adds the IP block driver information to the collection of IPs
    1833             :  * on the asic.
    1834             :  */
    1835           0 : int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
    1836             :                                const struct amdgpu_ip_block_version *ip_block_version)
    1837             : {
    1838           0 :         if (!ip_block_version)
    1839             :                 return -EINVAL;
    1840             : 
    1841           0 :         switch (ip_block_version->type) {
    1842             :         case AMD_IP_BLOCK_TYPE_VCN:
    1843           0 :                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
    1844             :                         return 0;
    1845             :                 break;
    1846             :         case AMD_IP_BLOCK_TYPE_JPEG:
    1847           0 :                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
    1848             :                         return 0;
    1849             :                 break;
    1850             :         default:
    1851             :                 break;
    1852             :         }
    1853             : 
    1854           0 :         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
    1855             :                   ip_block_version->funcs->name);
    1856             : 
    1857           0 :         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
    1858             : 
    1859           0 :         return 0;
    1860             : }
    1861             : 
    1862             : /**
    1863             :  * amdgpu_device_enable_virtual_display - enable virtual display feature
    1864             :  *
    1865             :  * @adev: amdgpu_device pointer
    1866             :  *
    1867             :  * Enabled the virtual display feature if the user has enabled it via
    1868             :  * the module parameter virtual_display.  This feature provides a virtual
    1869             :  * display hardware on headless boards or in virtualized environments.
    1870             :  * This function parses and validates the configuration string specified by
    1871             :  * the user and configues the virtual display configuration (number of
    1872             :  * virtual connectors, crtcs, etc.) specified.
    1873             :  */
    1874           0 : static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
    1875             : {
    1876           0 :         adev->enable_virtual_display = false;
    1877             : 
    1878           0 :         if (amdgpu_virtual_display) {
    1879           0 :                 const char *pci_address_name = pci_name(adev->pdev);
    1880             :                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
    1881             : 
    1882           0 :                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
    1883           0 :                 pciaddstr_tmp = pciaddstr;
    1884           0 :                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
    1885           0 :                         pciaddname = strsep(&pciaddname_tmp, ",");
    1886           0 :                         if (!strcmp("all", pciaddname)
    1887           0 :                             || !strcmp(pci_address_name, pciaddname)) {
    1888             :                                 long num_crtc;
    1889           0 :                                 int res = -1;
    1890             : 
    1891           0 :                                 adev->enable_virtual_display = true;
    1892             : 
    1893           0 :                                 if (pciaddname_tmp)
    1894           0 :                                         res = kstrtol(pciaddname_tmp, 10,
    1895             :                                                       &num_crtc);
    1896             : 
    1897           0 :                                 if (!res) {
    1898           0 :                                         if (num_crtc < 1)
    1899           0 :                                                 num_crtc = 1;
    1900           0 :                                         if (num_crtc > 6)
    1901           0 :                                                 num_crtc = 6;
    1902           0 :                                         adev->mode_info.num_crtc = num_crtc;
    1903             :                                 } else {
    1904           0 :                                         adev->mode_info.num_crtc = 1;
    1905             :                                 }
    1906             :                                 break;
    1907             :                         }
    1908             :                 }
    1909             : 
    1910           0 :                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
    1911             :                          amdgpu_virtual_display, pci_address_name,
    1912             :                          adev->enable_virtual_display, adev->mode_info.num_crtc);
    1913             : 
    1914           0 :                 kfree(pciaddstr);
    1915             :         }
    1916           0 : }
    1917             : 
    1918             : /**
    1919             :  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
    1920             :  *
    1921             :  * @adev: amdgpu_device pointer
    1922             :  *
    1923             :  * Parses the asic configuration parameters specified in the gpu info
    1924             :  * firmware and makes them availale to the driver for use in configuring
    1925             :  * the asic.
    1926             :  * Returns 0 on success, -EINVAL on failure.
    1927             :  */
    1928           0 : static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
    1929             : {
    1930             :         const char *chip_name;
    1931             :         char fw_name[40];
    1932             :         int err;
    1933             :         const struct gpu_info_firmware_header_v1_0 *hdr;
    1934             : 
    1935           0 :         adev->firmware.gpu_info_fw = NULL;
    1936             : 
    1937           0 :         if (adev->mman.discovery_bin) {
    1938             :                 /*
    1939             :                  * FIXME: The bounding box is still needed by Navi12, so
    1940             :                  * temporarily read it from gpu_info firmware. Should be dropped
    1941             :                  * when DAL no longer needs it.
    1942             :                  */
    1943           0 :                 if (adev->asic_type != CHIP_NAVI12)
    1944             :                         return 0;
    1945             :         }
    1946             : 
    1947           0 :         switch (adev->asic_type) {
    1948             :         default:
    1949             :                 return 0;
    1950             :         case CHIP_VEGA10:
    1951             :                 chip_name = "vega10";
    1952             :                 break;
    1953             :         case CHIP_VEGA12:
    1954           0 :                 chip_name = "vega12";
    1955           0 :                 break;
    1956             :         case CHIP_RAVEN:
    1957           0 :                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
    1958             :                         chip_name = "raven2";
    1959           0 :                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
    1960             :                         chip_name = "picasso";
    1961             :                 else
    1962           0 :                         chip_name = "raven";
    1963             :                 break;
    1964             :         case CHIP_ARCTURUS:
    1965           0 :                 chip_name = "arcturus";
    1966           0 :                 break;
    1967             :         case CHIP_NAVI12:
    1968           0 :                 chip_name = "navi12";
    1969           0 :                 break;
    1970             :         }
    1971             : 
    1972           0 :         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
    1973           0 :         err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
    1974           0 :         if (err) {
    1975           0 :                 dev_err(adev->dev,
    1976             :                         "Failed to load gpu_info firmware \"%s\"\n",
    1977             :                         fw_name);
    1978           0 :                 goto out;
    1979             :         }
    1980           0 :         err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
    1981           0 :         if (err) {
    1982           0 :                 dev_err(adev->dev,
    1983             :                         "Failed to validate gpu_info firmware \"%s\"\n",
    1984             :                         fw_name);
    1985           0 :                 goto out;
    1986             :         }
    1987             : 
    1988           0 :         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
    1989           0 :         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
    1990             : 
    1991           0 :         switch (hdr->version_major) {
    1992             :         case 1:
    1993             :         {
    1994           0 :                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
    1995           0 :                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
    1996           0 :                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
    1997             : 
    1998             :                 /*
    1999             :                  * Should be droped when DAL no longer needs it.
    2000             :                  */
    2001           0 :                 if (adev->asic_type == CHIP_NAVI12)
    2002             :                         goto parse_soc_bounding_box;
    2003             : 
    2004           0 :                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
    2005           0 :                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
    2006           0 :                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
    2007           0 :                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
    2008           0 :                 adev->gfx.config.max_texture_channel_caches =
    2009           0 :                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
    2010           0 :                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
    2011           0 :                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
    2012           0 :                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
    2013           0 :                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
    2014           0 :                 adev->gfx.config.double_offchip_lds_buf =
    2015           0 :                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
    2016           0 :                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
    2017           0 :                 adev->gfx.cu_info.max_waves_per_simd =
    2018           0 :                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
    2019           0 :                 adev->gfx.cu_info.max_scratch_slots_per_cu =
    2020           0 :                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
    2021           0 :                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
    2022           0 :                 if (hdr->version_minor >= 1) {
    2023           0 :                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
    2024           0 :                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
    2025           0 :                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
    2026           0 :                         adev->gfx.config.num_sc_per_sh =
    2027           0 :                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
    2028           0 :                         adev->gfx.config.num_packer_per_sc =
    2029           0 :                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
    2030             :                 }
    2031             : 
    2032             : parse_soc_bounding_box:
    2033             :                 /*
    2034             :                  * soc bounding box info is not integrated in disocovery table,
    2035             :                  * we always need to parse it from gpu info firmware if needed.
    2036             :                  */
    2037           0 :                 if (hdr->version_minor == 2) {
    2038           0 :                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
    2039           0 :                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
    2040           0 :                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
    2041           0 :                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
    2042             :                 }
    2043             :                 break;
    2044             :         }
    2045             :         default:
    2046           0 :                 dev_err(adev->dev,
    2047             :                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
    2048           0 :                 err = -EINVAL;
    2049           0 :                 goto out;
    2050             :         }
    2051             : out:
    2052             :         return err;
    2053             : }
    2054             : 
    2055             : /**
    2056             :  * amdgpu_device_ip_early_init - run early init for hardware IPs
    2057             :  *
    2058             :  * @adev: amdgpu_device pointer
    2059             :  *
    2060             :  * Early initialization pass for hardware IPs.  The hardware IPs that make
    2061             :  * up each asic are discovered each IP's early_init callback is run.  This
    2062             :  * is the first stage in initializing the asic.
    2063             :  * Returns 0 on success, negative error code on failure.
    2064             :  */
    2065           0 : static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
    2066             : {
    2067           0 :         struct drm_device *dev = adev_to_drm(adev);
    2068             :         struct pci_dev *parent;
    2069             :         int i, r;
    2070             : 
    2071           0 :         amdgpu_device_enable_virtual_display(adev);
    2072             : 
    2073           0 :         if (amdgpu_sriov_vf(adev)) {
    2074           0 :                 r = amdgpu_virt_request_full_gpu(adev, true);
    2075           0 :                 if (r)
    2076             :                         return r;
    2077             :         }
    2078             : 
    2079           0 :         switch (adev->asic_type) {
    2080             : #ifdef CONFIG_DRM_AMDGPU_SI
    2081             :         case CHIP_VERDE:
    2082             :         case CHIP_TAHITI:
    2083             :         case CHIP_PITCAIRN:
    2084             :         case CHIP_OLAND:
    2085             :         case CHIP_HAINAN:
    2086             :                 adev->family = AMDGPU_FAMILY_SI;
    2087             :                 r = si_set_ip_blocks(adev);
    2088             :                 if (r)
    2089             :                         return r;
    2090             :                 break;
    2091             : #endif
    2092             : #ifdef CONFIG_DRM_AMDGPU_CIK
    2093             :         case CHIP_BONAIRE:
    2094             :         case CHIP_HAWAII:
    2095             :         case CHIP_KAVERI:
    2096             :         case CHIP_KABINI:
    2097             :         case CHIP_MULLINS:
    2098             :                 if (adev->flags & AMD_IS_APU)
    2099             :                         adev->family = AMDGPU_FAMILY_KV;
    2100             :                 else
    2101             :                         adev->family = AMDGPU_FAMILY_CI;
    2102             : 
    2103             :                 r = cik_set_ip_blocks(adev);
    2104             :                 if (r)
    2105             :                         return r;
    2106             :                 break;
    2107             : #endif
    2108             :         case CHIP_TOPAZ:
    2109             :         case CHIP_TONGA:
    2110             :         case CHIP_FIJI:
    2111             :         case CHIP_POLARIS10:
    2112             :         case CHIP_POLARIS11:
    2113             :         case CHIP_POLARIS12:
    2114             :         case CHIP_VEGAM:
    2115             :         case CHIP_CARRIZO:
    2116             :         case CHIP_STONEY:
    2117           0 :                 if (adev->flags & AMD_IS_APU)
    2118           0 :                         adev->family = AMDGPU_FAMILY_CZ;
    2119             :                 else
    2120           0 :                         adev->family = AMDGPU_FAMILY_VI;
    2121             : 
    2122           0 :                 r = vi_set_ip_blocks(adev);
    2123           0 :                 if (r)
    2124             :                         return r;
    2125             :                 break;
    2126             :         default:
    2127           0 :                 r = amdgpu_discovery_set_ip_blocks(adev);
    2128           0 :                 if (r)
    2129             :                         return r;
    2130             :                 break;
    2131             :         }
    2132             : 
    2133             :         if (amdgpu_has_atpx() &&
    2134             :             (amdgpu_is_atpx_hybrid() ||
    2135             :              amdgpu_has_atpx_dgpu_power_cntl()) &&
    2136             :             ((adev->flags & AMD_IS_APU) == 0) &&
    2137             :             !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
    2138             :                 adev->flags |= AMD_IS_PX;
    2139             : 
    2140           0 :         if (!(adev->flags & AMD_IS_APU)) {
    2141           0 :                 parent = pci_upstream_bridge(adev->pdev);
    2142           0 :                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
    2143             :         }
    2144             : 
    2145           0 :         amdgpu_amdkfd_device_probe(adev);
    2146             : 
    2147           0 :         adev->pm.pp_feature = amdgpu_pp_feature_mask;
    2148           0 :         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
    2149           0 :                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
    2150           0 :         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
    2151           0 :                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
    2152             : 
    2153           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2154           0 :                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
    2155           0 :                         DRM_ERROR("disabled ip block: %d <%s>\n",
    2156             :                                   i, adev->ip_blocks[i].version->funcs->name);
    2157           0 :                         adev->ip_blocks[i].status.valid = false;
    2158             :                 } else {
    2159           0 :                         if (adev->ip_blocks[i].version->funcs->early_init) {
    2160           0 :                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
    2161           0 :                                 if (r == -ENOENT) {
    2162           0 :                                         adev->ip_blocks[i].status.valid = false;
    2163           0 :                                 } else if (r) {
    2164           0 :                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
    2165             :                                                   adev->ip_blocks[i].version->funcs->name, r);
    2166           0 :                                         return r;
    2167             :                                 } else {
    2168           0 :                                         adev->ip_blocks[i].status.valid = true;
    2169             :                                 }
    2170             :                         } else {
    2171           0 :                                 adev->ip_blocks[i].status.valid = true;
    2172             :                         }
    2173             :                 }
    2174             :                 /* get the vbios after the asic_funcs are set up */
    2175           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
    2176           0 :                         r = amdgpu_device_parse_gpu_info_fw(adev);
    2177           0 :                         if (r)
    2178             :                                 return r;
    2179             : 
    2180             :                         /* Read BIOS */
    2181           0 :                         if (!amdgpu_get_bios(adev))
    2182             :                                 return -EINVAL;
    2183             : 
    2184           0 :                         r = amdgpu_atombios_init(adev);
    2185           0 :                         if (r) {
    2186           0 :                                 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
    2187           0 :                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
    2188           0 :                                 return r;
    2189             :                         }
    2190             : 
    2191             :                         /*get pf2vf msg info at it's earliest time*/
    2192           0 :                         if (amdgpu_sriov_vf(adev))
    2193           0 :                                 amdgpu_virt_init_data_exchange(adev);
    2194             : 
    2195             :                 }
    2196             :         }
    2197             : 
    2198           0 :         adev->cg_flags &= amdgpu_cg_mask;
    2199           0 :         adev->pg_flags &= amdgpu_pg_mask;
    2200             : 
    2201           0 :         return 0;
    2202             : }
    2203             : 
    2204           0 : static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
    2205             : {
    2206             :         int i, r;
    2207             : 
    2208           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2209           0 :                 if (!adev->ip_blocks[i].status.sw)
    2210           0 :                         continue;
    2211           0 :                 if (adev->ip_blocks[i].status.hw)
    2212           0 :                         continue;
    2213           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
    2214           0 :                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
    2215             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
    2216           0 :                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
    2217           0 :                         if (r) {
    2218           0 :                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
    2219             :                                           adev->ip_blocks[i].version->funcs->name, r);
    2220           0 :                                 return r;
    2221             :                         }
    2222           0 :                         adev->ip_blocks[i].status.hw = true;
    2223             :                 }
    2224             :         }
    2225             : 
    2226             :         return 0;
    2227             : }
    2228             : 
    2229           0 : static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
    2230             : {
    2231             :         int i, r;
    2232             : 
    2233           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2234           0 :                 if (!adev->ip_blocks[i].status.sw)
    2235           0 :                         continue;
    2236           0 :                 if (adev->ip_blocks[i].status.hw)
    2237           0 :                         continue;
    2238           0 :                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
    2239           0 :                 if (r) {
    2240           0 :                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
    2241             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2242           0 :                         return r;
    2243             :                 }
    2244           0 :                 adev->ip_blocks[i].status.hw = true;
    2245             :         }
    2246             : 
    2247             :         return 0;
    2248             : }
    2249             : 
    2250           0 : static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
    2251             : {
    2252           0 :         int r = 0;
    2253             :         int i;
    2254             :         uint32_t smu_version;
    2255             : 
    2256           0 :         if (adev->asic_type >= CHIP_VEGA10) {
    2257           0 :                 for (i = 0; i < adev->num_ip_blocks; i++) {
    2258           0 :                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
    2259           0 :                                 continue;
    2260             : 
    2261           0 :                         if (!adev->ip_blocks[i].status.sw)
    2262           0 :                                 continue;
    2263             : 
    2264             :                         /* no need to do the fw loading again if already done*/
    2265           0 :                         if (adev->ip_blocks[i].status.hw == true)
    2266             :                                 break;
    2267             : 
    2268           0 :                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
    2269           0 :                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
    2270           0 :                                 if (r) {
    2271           0 :                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
    2272             :                                                           adev->ip_blocks[i].version->funcs->name, r);
    2273           0 :                                         return r;
    2274             :                                 }
    2275             :                         } else {
    2276           0 :                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
    2277           0 :                                 if (r) {
    2278           0 :                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
    2279             :                                                           adev->ip_blocks[i].version->funcs->name, r);
    2280           0 :                                         return r;
    2281             :                                 }
    2282             :                         }
    2283             : 
    2284           0 :                         adev->ip_blocks[i].status.hw = true;
    2285           0 :                         break;
    2286             :                 }
    2287             :         }
    2288             : 
    2289           0 :         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
    2290           0 :                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
    2291             : 
    2292             :         return r;
    2293             : }
    2294             : 
    2295           0 : static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
    2296             : {
    2297             :         long timeout;
    2298             :         int r, i;
    2299             : 
    2300           0 :         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    2301           0 :                 struct amdgpu_ring *ring = adev->rings[i];
    2302             : 
    2303             :                 /* No need to setup the GPU scheduler for rings that don't need it */
    2304           0 :                 if (!ring || ring->no_scheduler)
    2305           0 :                         continue;
    2306             : 
    2307           0 :                 switch (ring->funcs->type) {
    2308             :                 case AMDGPU_RING_TYPE_GFX:
    2309           0 :                         timeout = adev->gfx_timeout;
    2310           0 :                         break;
    2311             :                 case AMDGPU_RING_TYPE_COMPUTE:
    2312           0 :                         timeout = adev->compute_timeout;
    2313           0 :                         break;
    2314             :                 case AMDGPU_RING_TYPE_SDMA:
    2315           0 :                         timeout = adev->sdma_timeout;
    2316           0 :                         break;
    2317             :                 default:
    2318           0 :                         timeout = adev->video_timeout;
    2319           0 :                         break;
    2320             :                 }
    2321             : 
    2322           0 :                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
    2323             :                                    ring->num_hw_submission, amdgpu_job_hang_limit,
    2324           0 :                                    timeout, adev->reset_domain->wq,
    2325           0 :                                    ring->sched_score, ring->name,
    2326             :                                    adev->dev);
    2327           0 :                 if (r) {
    2328           0 :                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
    2329             :                                   ring->name);
    2330           0 :                         return r;
    2331             :                 }
    2332             :         }
    2333             : 
    2334             :         return 0;
    2335             : }
    2336             : 
    2337             : 
    2338             : /**
    2339             :  * amdgpu_device_ip_init - run init for hardware IPs
    2340             :  *
    2341             :  * @adev: amdgpu_device pointer
    2342             :  *
    2343             :  * Main initialization pass for hardware IPs.  The list of all the hardware
    2344             :  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
    2345             :  * are run.  sw_init initializes the software state associated with each IP
    2346             :  * and hw_init initializes the hardware associated with each IP.
    2347             :  * Returns 0 on success, negative error code on failure.
    2348             :  */
    2349           0 : static int amdgpu_device_ip_init(struct amdgpu_device *adev)
    2350             : {
    2351             :         int i, r;
    2352             : 
    2353           0 :         r = amdgpu_ras_init(adev);
    2354           0 :         if (r)
    2355             :                 return r;
    2356             : 
    2357           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2358           0 :                 if (!adev->ip_blocks[i].status.valid)
    2359           0 :                         continue;
    2360           0 :                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
    2361           0 :                 if (r) {
    2362           0 :                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
    2363             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2364           0 :                         goto init_failed;
    2365             :                 }
    2366           0 :                 adev->ip_blocks[i].status.sw = true;
    2367             : 
    2368             :                 /* need to do gmc hw init early so we can allocate gpu mem */
    2369           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
    2370             :                         /* Try to reserve bad pages early */
    2371           0 :                         if (amdgpu_sriov_vf(adev))
    2372           0 :                                 amdgpu_virt_exchange_data(adev);
    2373             : 
    2374           0 :                         r = amdgpu_device_vram_scratch_init(adev);
    2375           0 :                         if (r) {
    2376           0 :                                 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
    2377           0 :                                 goto init_failed;
    2378             :                         }
    2379           0 :                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
    2380           0 :                         if (r) {
    2381           0 :                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
    2382           0 :                                 goto init_failed;
    2383             :                         }
    2384           0 :                         r = amdgpu_device_wb_init(adev);
    2385           0 :                         if (r) {
    2386           0 :                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
    2387           0 :                                 goto init_failed;
    2388             :                         }
    2389           0 :                         adev->ip_blocks[i].status.hw = true;
    2390             : 
    2391             :                         /* right after GMC hw init, we create CSA */
    2392           0 :                         if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
    2393           0 :                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
    2394             :                                                                 AMDGPU_GEM_DOMAIN_VRAM,
    2395             :                                                                 AMDGPU_CSA_SIZE);
    2396           0 :                                 if (r) {
    2397           0 :                                         DRM_ERROR("allocate CSA failed %d\n", r);
    2398           0 :                                         goto init_failed;
    2399             :                                 }
    2400             :                         }
    2401             :                 }
    2402             :         }
    2403             : 
    2404           0 :         if (amdgpu_sriov_vf(adev))
    2405           0 :                 amdgpu_virt_init_data_exchange(adev);
    2406             : 
    2407           0 :         r = amdgpu_ib_pool_init(adev);
    2408           0 :         if (r) {
    2409           0 :                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
    2410           0 :                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
    2411           0 :                 goto init_failed;
    2412             :         }
    2413             : 
    2414           0 :         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
    2415           0 :         if (r)
    2416             :                 goto init_failed;
    2417             : 
    2418           0 :         r = amdgpu_device_ip_hw_init_phase1(adev);
    2419           0 :         if (r)
    2420             :                 goto init_failed;
    2421             : 
    2422           0 :         r = amdgpu_device_fw_loading(adev);
    2423           0 :         if (r)
    2424             :                 goto init_failed;
    2425             : 
    2426           0 :         r = amdgpu_device_ip_hw_init_phase2(adev);
    2427           0 :         if (r)
    2428             :                 goto init_failed;
    2429             : 
    2430             :         /*
    2431             :          * retired pages will be loaded from eeprom and reserved here,
    2432             :          * it should be called after amdgpu_device_ip_hw_init_phase2  since
    2433             :          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
    2434             :          * for I2C communication which only true at this point.
    2435             :          *
    2436             :          * amdgpu_ras_recovery_init may fail, but the upper only cares the
    2437             :          * failure from bad gpu situation and stop amdgpu init process
    2438             :          * accordingly. For other failed cases, it will still release all
    2439             :          * the resource and print error message, rather than returning one
    2440             :          * negative value to upper level.
    2441             :          *
    2442             :          * Note: theoretically, this should be called before all vram allocations
    2443             :          * to protect retired page from abusing
    2444             :          */
    2445           0 :         r = amdgpu_ras_recovery_init(adev);
    2446           0 :         if (r)
    2447             :                 goto init_failed;
    2448             : 
    2449             :         /**
    2450             :          * In case of XGMI grab extra reference for reset domain for this device
    2451             :          */
    2452           0 :         if (adev->gmc.xgmi.num_physical_nodes > 1) {
    2453           0 :                 if (amdgpu_xgmi_add_device(adev) == 0) {
    2454           0 :                         if (!amdgpu_sriov_vf(adev)) {
    2455           0 :                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
    2456             : 
    2457           0 :                                 if (!hive->reset_domain ||
    2458           0 :                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
    2459           0 :                                         r = -ENOENT;
    2460           0 :                                         amdgpu_put_xgmi_hive(hive);
    2461           0 :                                         goto init_failed;
    2462             :                                 }
    2463             : 
    2464             :                                 /* Drop the early temporary reset domain we created for device */
    2465           0 :                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
    2466           0 :                                 adev->reset_domain = hive->reset_domain;
    2467           0 :                                 amdgpu_put_xgmi_hive(hive);
    2468             :                         }
    2469             :                 }
    2470             :         }
    2471             : 
    2472           0 :         r = amdgpu_device_init_schedulers(adev);
    2473           0 :         if (r)
    2474             :                 goto init_failed;
    2475             : 
    2476             :         /* Don't init kfd if whole hive need to be reset during init */
    2477           0 :         if (!adev->gmc.xgmi.pending_reset)
    2478           0 :                 amdgpu_amdkfd_device_init(adev);
    2479             : 
    2480           0 :         amdgpu_fru_get_product_info(adev);
    2481             : 
    2482             : init_failed:
    2483           0 :         if (amdgpu_sriov_vf(adev))
    2484           0 :                 amdgpu_virt_release_full_gpu(adev, true);
    2485             : 
    2486             :         return r;
    2487             : }
    2488             : 
    2489             : /**
    2490             :  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
    2491             :  *
    2492             :  * @adev: amdgpu_device pointer
    2493             :  *
    2494             :  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
    2495             :  * this function before a GPU reset.  If the value is retained after a
    2496             :  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
    2497             :  */
    2498             : static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
    2499             : {
    2500           0 :         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
    2501             : }
    2502             : 
    2503             : /**
    2504             :  * amdgpu_device_check_vram_lost - check if vram is valid
    2505             :  *
    2506             :  * @adev: amdgpu_device pointer
    2507             :  *
    2508             :  * Checks the reset magic value written to the gart pointer in VRAM.
    2509             :  * The driver calls this after a GPU reset to see if the contents of
    2510             :  * VRAM is lost or now.
    2511             :  * returns true if vram is lost, false if not.
    2512             :  */
    2513           0 : static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
    2514             : {
    2515           0 :         if (memcmp(adev->gart.ptr, adev->reset_magic,
    2516             :                         AMDGPU_RESET_MAGIC_NUM))
    2517             :                 return true;
    2518             : 
    2519           0 :         if (!amdgpu_in_reset(adev))
    2520             :                 return false;
    2521             : 
    2522             :         /*
    2523             :          * For all ASICs with baco/mode1 reset, the VRAM is
    2524             :          * always assumed to be lost.
    2525             :          */
    2526           0 :         switch (amdgpu_asic_reset_method(adev)) {
    2527             :         case AMD_RESET_METHOD_BACO:
    2528             :         case AMD_RESET_METHOD_MODE1:
    2529             :                 return true;
    2530             :         default:
    2531           0 :                 return false;
    2532             :         }
    2533             : }
    2534             : 
    2535             : /**
    2536             :  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
    2537             :  *
    2538             :  * @adev: amdgpu_device pointer
    2539             :  * @state: clockgating state (gate or ungate)
    2540             :  *
    2541             :  * The list of all the hardware IPs that make up the asic is walked and the
    2542             :  * set_clockgating_state callbacks are run.
    2543             :  * Late initialization pass enabling clockgating for hardware IPs.
    2544             :  * Fini or suspend, pass disabling clockgating for hardware IPs.
    2545             :  * Returns 0 on success, negative error code on failure.
    2546             :  */
    2547             : 
    2548           0 : int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
    2549             :                                enum amd_clockgating_state state)
    2550             : {
    2551             :         int i, j, r;
    2552             : 
    2553           0 :         if (amdgpu_emu_mode == 1)
    2554             :                 return 0;
    2555             : 
    2556           0 :         for (j = 0; j < adev->num_ip_blocks; j++) {
    2557           0 :                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
    2558           0 :                 if (!adev->ip_blocks[i].status.late_initialized)
    2559           0 :                         continue;
    2560             :                 /* skip CG for GFX on S0ix */
    2561           0 :                 if (adev->in_s0ix &&
    2562           0 :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
    2563           0 :                         continue;
    2564             :                 /* skip CG for VCE/UVD, it's handled specially */
    2565           0 :                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
    2566           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
    2567           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
    2568           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
    2569           0 :                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
    2570             :                         /* enable clockgating to save power */
    2571           0 :                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
    2572             :                                                                                      state);
    2573           0 :                         if (r) {
    2574           0 :                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
    2575             :                                           adev->ip_blocks[i].version->funcs->name, r);
    2576           0 :                                 return r;
    2577             :                         }
    2578             :                 }
    2579             :         }
    2580             : 
    2581             :         return 0;
    2582             : }
    2583             : 
    2584           0 : int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
    2585             :                                enum amd_powergating_state state)
    2586             : {
    2587             :         int i, j, r;
    2588             : 
    2589           0 :         if (amdgpu_emu_mode == 1)
    2590             :                 return 0;
    2591             : 
    2592           0 :         for (j = 0; j < adev->num_ip_blocks; j++) {
    2593           0 :                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
    2594           0 :                 if (!adev->ip_blocks[i].status.late_initialized)
    2595           0 :                         continue;
    2596             :                 /* skip PG for GFX on S0ix */
    2597           0 :                 if (adev->in_s0ix &&
    2598           0 :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
    2599           0 :                         continue;
    2600             :                 /* skip CG for VCE/UVD, it's handled specially */
    2601           0 :                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
    2602           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
    2603           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
    2604           0 :                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
    2605           0 :                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
    2606             :                         /* enable powergating to save power */
    2607           0 :                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
    2608             :                                                                                         state);
    2609           0 :                         if (r) {
    2610           0 :                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
    2611             :                                           adev->ip_blocks[i].version->funcs->name, r);
    2612           0 :                                 return r;
    2613             :                         }
    2614             :                 }
    2615             :         }
    2616             :         return 0;
    2617             : }
    2618             : 
    2619           0 : static int amdgpu_device_enable_mgpu_fan_boost(void)
    2620             : {
    2621             :         struct amdgpu_gpu_instance *gpu_ins;
    2622             :         struct amdgpu_device *adev;
    2623           0 :         int i, ret = 0;
    2624             : 
    2625           0 :         mutex_lock(&mgpu_info.mutex);
    2626             : 
    2627             :         /*
    2628             :          * MGPU fan boost feature should be enabled
    2629             :          * only when there are two or more dGPUs in
    2630             :          * the system
    2631             :          */
    2632           0 :         if (mgpu_info.num_dgpu < 2)
    2633             :                 goto out;
    2634             : 
    2635           0 :         for (i = 0; i < mgpu_info.num_dgpu; i++) {
    2636           0 :                 gpu_ins = &(mgpu_info.gpu_ins[i]);
    2637           0 :                 adev = gpu_ins->adev;
    2638           0 :                 if (!(adev->flags & AMD_IS_APU) &&
    2639           0 :                     !gpu_ins->mgpu_fan_enabled) {
    2640           0 :                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
    2641           0 :                         if (ret)
    2642             :                                 break;
    2643             : 
    2644           0 :                         gpu_ins->mgpu_fan_enabled = 1;
    2645             :                 }
    2646             :         }
    2647             : 
    2648             : out:
    2649           0 :         mutex_unlock(&mgpu_info.mutex);
    2650             : 
    2651           0 :         return ret;
    2652             : }
    2653             : 
    2654             : /**
    2655             :  * amdgpu_device_ip_late_init - run late init for hardware IPs
    2656             :  *
    2657             :  * @adev: amdgpu_device pointer
    2658             :  *
    2659             :  * Late initialization pass for hardware IPs.  The list of all the hardware
    2660             :  * IPs that make up the asic is walked and the late_init callbacks are run.
    2661             :  * late_init covers any special initialization that an IP requires
    2662             :  * after all of the have been initialized or something that needs to happen
    2663             :  * late in the init process.
    2664             :  * Returns 0 on success, negative error code on failure.
    2665             :  */
    2666           0 : static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
    2667             : {
    2668             :         struct amdgpu_gpu_instance *gpu_instance;
    2669           0 :         int i = 0, r;
    2670             : 
    2671           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2672           0 :                 if (!adev->ip_blocks[i].status.hw)
    2673           0 :                         continue;
    2674           0 :                 if (adev->ip_blocks[i].version->funcs->late_init) {
    2675           0 :                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
    2676           0 :                         if (r) {
    2677           0 :                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
    2678             :                                           adev->ip_blocks[i].version->funcs->name, r);
    2679           0 :                                 return r;
    2680             :                         }
    2681             :                 }
    2682           0 :                 adev->ip_blocks[i].status.late_initialized = true;
    2683             :         }
    2684             : 
    2685           0 :         r = amdgpu_ras_late_init(adev);
    2686           0 :         if (r) {
    2687           0 :                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
    2688           0 :                 return r;
    2689             :         }
    2690             : 
    2691           0 :         amdgpu_ras_set_error_query_ready(adev, true);
    2692             : 
    2693           0 :         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
    2694           0 :         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
    2695             : 
    2696           0 :         amdgpu_device_fill_reset_magic(adev);
    2697             : 
    2698           0 :         r = amdgpu_device_enable_mgpu_fan_boost();
    2699           0 :         if (r)
    2700           0 :                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
    2701             : 
    2702             :         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
    2703           0 :         if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
    2704             :                                adev->asic_type == CHIP_ALDEBARAN ))
    2705           0 :                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
    2706             : 
    2707           0 :         if (adev->gmc.xgmi.num_physical_nodes > 1) {
    2708           0 :                 mutex_lock(&mgpu_info.mutex);
    2709             : 
    2710             :                 /*
    2711             :                  * Reset device p-state to low as this was booted with high.
    2712             :                  *
    2713             :                  * This should be performed only after all devices from the same
    2714             :                  * hive get initialized.
    2715             :                  *
    2716             :                  * However, it's unknown how many device in the hive in advance.
    2717             :                  * As this is counted one by one during devices initializations.
    2718             :                  *
    2719             :                  * So, we wait for all XGMI interlinked devices initialized.
    2720             :                  * This may bring some delays as those devices may come from
    2721             :                  * different hives. But that should be OK.
    2722             :                  */
    2723           0 :                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
    2724           0 :                         for (i = 0; i < mgpu_info.num_gpu; i++) {
    2725           0 :                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
    2726           0 :                                 if (gpu_instance->adev->flags & AMD_IS_APU)
    2727           0 :                                         continue;
    2728             : 
    2729           0 :                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
    2730             :                                                 AMDGPU_XGMI_PSTATE_MIN);
    2731           0 :                                 if (r) {
    2732           0 :                                         DRM_ERROR("pstate setting failed (%d).\n", r);
    2733           0 :                                         break;
    2734             :                                 }
    2735             :                         }
    2736             :                 }
    2737             : 
    2738           0 :                 mutex_unlock(&mgpu_info.mutex);
    2739             :         }
    2740             : 
    2741             :         return 0;
    2742             : }
    2743             : 
    2744             : /**
    2745             :  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
    2746             :  *
    2747             :  * @adev: amdgpu_device pointer
    2748             :  *
    2749             :  * For ASICs need to disable SMC first
    2750             :  */
    2751           0 : static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
    2752             : {
    2753             :         int i, r;
    2754             : 
    2755           0 :         if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
    2756             :                 return;
    2757             : 
    2758           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2759           0 :                 if (!adev->ip_blocks[i].status.hw)
    2760           0 :                         continue;
    2761           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
    2762           0 :                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
    2763             :                         /* XXX handle errors */
    2764           0 :                         if (r) {
    2765           0 :                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
    2766             :                                           adev->ip_blocks[i].version->funcs->name, r);
    2767             :                         }
    2768           0 :                         adev->ip_blocks[i].status.hw = false;
    2769           0 :                         break;
    2770             :                 }
    2771             :         }
    2772             : }
    2773             : 
    2774           0 : static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
    2775             : {
    2776             :         int i, r;
    2777             : 
    2778           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    2779           0 :                 if (!adev->ip_blocks[i].version->funcs->early_fini)
    2780           0 :                         continue;
    2781             : 
    2782           0 :                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
    2783           0 :                 if (r) {
    2784           0 :                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
    2785             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2786             :                 }
    2787             :         }
    2788             : 
    2789           0 :         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
    2790           0 :         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
    2791             : 
    2792           0 :         amdgpu_amdkfd_suspend(adev, false);
    2793             : 
    2794             :         /* Workaroud for ASICs need to disable SMC first */
    2795           0 :         amdgpu_device_smu_fini_early(adev);
    2796             : 
    2797           0 :         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
    2798           0 :                 if (!adev->ip_blocks[i].status.hw)
    2799           0 :                         continue;
    2800             : 
    2801           0 :                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
    2802             :                 /* XXX handle errors */
    2803           0 :                 if (r) {
    2804           0 :                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
    2805             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2806             :                 }
    2807             : 
    2808           0 :                 adev->ip_blocks[i].status.hw = false;
    2809             :         }
    2810             : 
    2811           0 :         if (amdgpu_sriov_vf(adev)) {
    2812           0 :                 if (amdgpu_virt_release_full_gpu(adev, false))
    2813           0 :                         DRM_ERROR("failed to release exclusive mode on fini\n");
    2814             :         }
    2815             : 
    2816           0 :         return 0;
    2817             : }
    2818             : 
    2819             : /**
    2820             :  * amdgpu_device_ip_fini - run fini for hardware IPs
    2821             :  *
    2822             :  * @adev: amdgpu_device pointer
    2823             :  *
    2824             :  * Main teardown pass for hardware IPs.  The list of all the hardware
    2825             :  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
    2826             :  * are run.  hw_fini tears down the hardware associated with each IP
    2827             :  * and sw_fini tears down any software state associated with each IP.
    2828             :  * Returns 0 on success, negative error code on failure.
    2829             :  */
    2830           0 : static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
    2831             : {
    2832             :         int i, r;
    2833             : 
    2834           0 :         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
    2835           0 :                 amdgpu_virt_release_ras_err_handler_data(adev);
    2836             : 
    2837           0 :         if (adev->gmc.xgmi.num_physical_nodes > 1)
    2838           0 :                 amdgpu_xgmi_remove_device(adev);
    2839             : 
    2840           0 :         amdgpu_amdkfd_device_fini_sw(adev);
    2841             : 
    2842           0 :         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
    2843           0 :                 if (!adev->ip_blocks[i].status.sw)
    2844           0 :                         continue;
    2845             : 
    2846           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
    2847           0 :                         amdgpu_ucode_free_bo(adev);
    2848           0 :                         amdgpu_free_static_csa(&adev->virt.csa_obj);
    2849           0 :                         amdgpu_device_wb_fini(adev);
    2850           0 :                         amdgpu_device_vram_scratch_fini(adev);
    2851           0 :                         amdgpu_ib_pool_fini(adev);
    2852             :                 }
    2853             : 
    2854           0 :                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
    2855             :                 /* XXX handle errors */
    2856           0 :                 if (r) {
    2857           0 :                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
    2858             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2859             :                 }
    2860           0 :                 adev->ip_blocks[i].status.sw = false;
    2861           0 :                 adev->ip_blocks[i].status.valid = false;
    2862             :         }
    2863             : 
    2864           0 :         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
    2865           0 :                 if (!adev->ip_blocks[i].status.late_initialized)
    2866           0 :                         continue;
    2867           0 :                 if (adev->ip_blocks[i].version->funcs->late_fini)
    2868           0 :                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
    2869           0 :                 adev->ip_blocks[i].status.late_initialized = false;
    2870             :         }
    2871             : 
    2872           0 :         amdgpu_ras_fini(adev);
    2873             : 
    2874           0 :         return 0;
    2875             : }
    2876             : 
    2877             : /**
    2878             :  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
    2879             :  *
    2880             :  * @work: work_struct.
    2881             :  */
    2882           0 : static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
    2883             : {
    2884           0 :         struct amdgpu_device *adev =
    2885           0 :                 container_of(work, struct amdgpu_device, delayed_init_work.work);
    2886             :         int r;
    2887             : 
    2888           0 :         r = amdgpu_ib_ring_tests(adev);
    2889           0 :         if (r)
    2890           0 :                 DRM_ERROR("ib ring test failed (%d).\n", r);
    2891           0 : }
    2892             : 
    2893           0 : static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
    2894             : {
    2895           0 :         struct amdgpu_device *adev =
    2896           0 :                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
    2897             : 
    2898           0 :         WARN_ON_ONCE(adev->gfx.gfx_off_state);
    2899           0 :         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
    2900             : 
    2901           0 :         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
    2902           0 :                 adev->gfx.gfx_off_state = true;
    2903           0 : }
    2904             : 
    2905             : /**
    2906             :  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
    2907             :  *
    2908             :  * @adev: amdgpu_device pointer
    2909             :  *
    2910             :  * Main suspend function for hardware IPs.  The list of all the hardware
    2911             :  * IPs that make up the asic is walked, clockgating is disabled and the
    2912             :  * suspend callbacks are run.  suspend puts the hardware and software state
    2913             :  * in each IP into a state suitable for suspend.
    2914             :  * Returns 0 on success, negative error code on failure.
    2915             :  */
    2916           0 : static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
    2917             : {
    2918             :         int i, r;
    2919             : 
    2920           0 :         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
    2921           0 :         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
    2922             : 
    2923           0 :         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
    2924           0 :                 if (!adev->ip_blocks[i].status.valid)
    2925           0 :                         continue;
    2926             : 
    2927             :                 /* displays are handled separately */
    2928           0 :                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
    2929           0 :                         continue;
    2930             : 
    2931             :                 /* XXX handle errors */
    2932           0 :                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
    2933             :                 /* XXX handle errors */
    2934           0 :                 if (r) {
    2935           0 :                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
    2936             :                                   adev->ip_blocks[i].version->funcs->name, r);
    2937           0 :                         return r;
    2938             :                 }
    2939             : 
    2940           0 :                 adev->ip_blocks[i].status.hw = false;
    2941             :         }
    2942             : 
    2943             :         return 0;
    2944             : }
    2945             : 
    2946             : /**
    2947             :  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
    2948             :  *
    2949             :  * @adev: amdgpu_device pointer
    2950             :  *
    2951             :  * Main suspend function for hardware IPs.  The list of all the hardware
    2952             :  * IPs that make up the asic is walked, clockgating is disabled and the
    2953             :  * suspend callbacks are run.  suspend puts the hardware and software state
    2954             :  * in each IP into a state suitable for suspend.
    2955             :  * Returns 0 on success, negative error code on failure.
    2956             :  */
    2957           0 : static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
    2958             : {
    2959             :         int i, r;
    2960             : 
    2961           0 :         if (adev->in_s0ix)
    2962           0 :                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
    2963             : 
    2964           0 :         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
    2965           0 :                 if (!adev->ip_blocks[i].status.valid)
    2966           0 :                         continue;
    2967             :                 /* displays are handled in phase1 */
    2968           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
    2969           0 :                         continue;
    2970             :                 /* PSP lost connection when err_event_athub occurs */
    2971           0 :                 if (amdgpu_ras_intr_triggered() &&
    2972             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
    2973           0 :                         adev->ip_blocks[i].status.hw = false;
    2974           0 :                         continue;
    2975             :                 }
    2976             : 
    2977             :                 /* skip unnecessary suspend if we do not initialize them yet */
    2978           0 :                 if (adev->gmc.xgmi.pending_reset &&
    2979           0 :                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
    2980           0 :                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
    2981             :                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
    2982             :                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
    2983           0 :                         adev->ip_blocks[i].status.hw = false;
    2984           0 :                         continue;
    2985             :                 }
    2986             : 
    2987             :                 /* skip suspend of gfx and psp for S0ix
    2988             :                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
    2989             :                  * like at runtime. PSP is also part of the always on hardware
    2990             :                  * so no need to suspend it.
    2991             :                  */
    2992           0 :                 if (adev->in_s0ix &&
    2993           0 :                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
    2994             :                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
    2995           0 :                         continue;
    2996             : 
    2997             :                 /* XXX handle errors */
    2998           0 :                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
    2999             :                 /* XXX handle errors */
    3000           0 :                 if (r) {
    3001           0 :                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
    3002             :                                   adev->ip_blocks[i].version->funcs->name, r);
    3003             :                 }
    3004           0 :                 adev->ip_blocks[i].status.hw = false;
    3005             :                 /* handle putting the SMC in the appropriate state */
    3006           0 :                 if(!amdgpu_sriov_vf(adev)){
    3007           0 :                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
    3008           0 :                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
    3009           0 :                                 if (r) {
    3010           0 :                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
    3011             :                                                         adev->mp1_state, r);
    3012           0 :                                         return r;
    3013             :                                 }
    3014             :                         }
    3015             :                 }
    3016             :         }
    3017             : 
    3018             :         return 0;
    3019             : }
    3020             : 
    3021             : /**
    3022             :  * amdgpu_device_ip_suspend - run suspend for hardware IPs
    3023             :  *
    3024             :  * @adev: amdgpu_device pointer
    3025             :  *
    3026             :  * Main suspend function for hardware IPs.  The list of all the hardware
    3027             :  * IPs that make up the asic is walked, clockgating is disabled and the
    3028             :  * suspend callbacks are run.  suspend puts the hardware and software state
    3029             :  * in each IP into a state suitable for suspend.
    3030             :  * Returns 0 on success, negative error code on failure.
    3031             :  */
    3032           0 : int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
    3033             : {
    3034             :         int r;
    3035             : 
    3036           0 :         if (amdgpu_sriov_vf(adev)) {
    3037           0 :                 amdgpu_virt_fini_data_exchange(adev);
    3038           0 :                 amdgpu_virt_request_full_gpu(adev, false);
    3039             :         }
    3040             : 
    3041           0 :         r = amdgpu_device_ip_suspend_phase1(adev);
    3042           0 :         if (r)
    3043             :                 return r;
    3044           0 :         r = amdgpu_device_ip_suspend_phase2(adev);
    3045             : 
    3046           0 :         if (amdgpu_sriov_vf(adev))
    3047           0 :                 amdgpu_virt_release_full_gpu(adev, false);
    3048             : 
    3049             :         return r;
    3050             : }
    3051             : 
    3052           0 : static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
    3053             : {
    3054             :         int i, r;
    3055             : 
    3056             :         static enum amd_ip_block_type ip_order[] = {
    3057             :                 AMD_IP_BLOCK_TYPE_GMC,
    3058             :                 AMD_IP_BLOCK_TYPE_COMMON,
    3059             :                 AMD_IP_BLOCK_TYPE_PSP,
    3060             :                 AMD_IP_BLOCK_TYPE_IH,
    3061             :         };
    3062             : 
    3063           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    3064             :                 int j;
    3065             :                 struct amdgpu_ip_block *block;
    3066             : 
    3067           0 :                 block = &adev->ip_blocks[i];
    3068           0 :                 block->status.hw = false;
    3069             : 
    3070           0 :                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
    3071             : 
    3072           0 :                         if (block->version->type != ip_order[j] ||
    3073           0 :                                 !block->status.valid)
    3074           0 :                                 continue;
    3075             : 
    3076           0 :                         r = block->version->funcs->hw_init(adev);
    3077           0 :                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
    3078           0 :                         if (r)
    3079             :                                 return r;
    3080           0 :                         block->status.hw = true;
    3081             :                 }
    3082             :         }
    3083             : 
    3084             :         return 0;
    3085             : }
    3086             : 
    3087           0 : static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
    3088             : {
    3089             :         int i, r;
    3090             : 
    3091             :         static enum amd_ip_block_type ip_order[] = {
    3092             :                 AMD_IP_BLOCK_TYPE_SMC,
    3093             :                 AMD_IP_BLOCK_TYPE_DCE,
    3094             :                 AMD_IP_BLOCK_TYPE_GFX,
    3095             :                 AMD_IP_BLOCK_TYPE_SDMA,
    3096             :                 AMD_IP_BLOCK_TYPE_UVD,
    3097             :                 AMD_IP_BLOCK_TYPE_VCE,
    3098             :                 AMD_IP_BLOCK_TYPE_VCN
    3099             :         };
    3100             : 
    3101           0 :         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
    3102             :                 int j;
    3103             :                 struct amdgpu_ip_block *block;
    3104             : 
    3105           0 :                 for (j = 0; j < adev->num_ip_blocks; j++) {
    3106           0 :                         block = &adev->ip_blocks[j];
    3107             : 
    3108           0 :                         if (block->version->type != ip_order[i] ||
    3109           0 :                                 !block->status.valid ||
    3110           0 :                                 block->status.hw)
    3111           0 :                                 continue;
    3112             : 
    3113           0 :                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
    3114           0 :                                 r = block->version->funcs->resume(adev);
    3115             :                         else
    3116           0 :                                 r = block->version->funcs->hw_init(adev);
    3117             : 
    3118           0 :                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
    3119           0 :                         if (r)
    3120             :                                 return r;
    3121           0 :                         block->status.hw = true;
    3122             :                 }
    3123             :         }
    3124             : 
    3125             :         return 0;
    3126             : }
    3127             : 
    3128             : /**
    3129             :  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
    3130             :  *
    3131             :  * @adev: amdgpu_device pointer
    3132             :  *
    3133             :  * First resume function for hardware IPs.  The list of all the hardware
    3134             :  * IPs that make up the asic is walked and the resume callbacks are run for
    3135             :  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
    3136             :  * after a suspend and updates the software state as necessary.  This
    3137             :  * function is also used for restoring the GPU after a GPU reset.
    3138             :  * Returns 0 on success, negative error code on failure.
    3139             :  */
    3140           0 : static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
    3141             : {
    3142             :         int i, r;
    3143             : 
    3144           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    3145           0 :                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
    3146           0 :                         continue;
    3147           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
    3148             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
    3149             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
    3150             : 
    3151           0 :                         r = adev->ip_blocks[i].version->funcs->resume(adev);
    3152           0 :                         if (r) {
    3153           0 :                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
    3154             :                                           adev->ip_blocks[i].version->funcs->name, r);
    3155           0 :                                 return r;
    3156             :                         }
    3157           0 :                         adev->ip_blocks[i].status.hw = true;
    3158             :                 }
    3159             :         }
    3160             : 
    3161             :         return 0;
    3162             : }
    3163             : 
    3164             : /**
    3165             :  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
    3166             :  *
    3167             :  * @adev: amdgpu_device pointer
    3168             :  *
    3169             :  * First resume function for hardware IPs.  The list of all the hardware
    3170             :  * IPs that make up the asic is walked and the resume callbacks are run for
    3171             :  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
    3172             :  * functional state after a suspend and updates the software state as
    3173             :  * necessary.  This function is also used for restoring the GPU after a GPU
    3174             :  * reset.
    3175             :  * Returns 0 on success, negative error code on failure.
    3176             :  */
    3177           0 : static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
    3178             : {
    3179             :         int i, r;
    3180             : 
    3181           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    3182           0 :                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
    3183           0 :                         continue;
    3184           0 :                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
    3185             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
    3186           0 :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
    3187             :                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
    3188           0 :                         continue;
    3189           0 :                 r = adev->ip_blocks[i].version->funcs->resume(adev);
    3190           0 :                 if (r) {
    3191           0 :                         DRM_ERROR("resume of IP block <%s> failed %d\n",
    3192             :                                   adev->ip_blocks[i].version->funcs->name, r);
    3193           0 :                         return r;
    3194             :                 }
    3195           0 :                 adev->ip_blocks[i].status.hw = true;
    3196             :         }
    3197             : 
    3198             :         return 0;
    3199             : }
    3200             : 
    3201             : /**
    3202             :  * amdgpu_device_ip_resume - run resume for hardware IPs
    3203             :  *
    3204             :  * @adev: amdgpu_device pointer
    3205             :  *
    3206             :  * Main resume function for hardware IPs.  The hardware IPs
    3207             :  * are split into two resume functions because they are
    3208             :  * are also used in in recovering from a GPU reset and some additional
    3209             :  * steps need to be take between them.  In this case (S3/S4) they are
    3210             :  * run sequentially.
    3211             :  * Returns 0 on success, negative error code on failure.
    3212             :  */
    3213           0 : static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
    3214             : {
    3215             :         int r;
    3216             : 
    3217           0 :         r = amdgpu_amdkfd_resume_iommu(adev);
    3218           0 :         if (r)
    3219             :                 return r;
    3220             : 
    3221           0 :         r = amdgpu_device_ip_resume_phase1(adev);
    3222           0 :         if (r)
    3223             :                 return r;
    3224             : 
    3225           0 :         r = amdgpu_device_fw_loading(adev);
    3226           0 :         if (r)
    3227             :                 return r;
    3228             : 
    3229           0 :         r = amdgpu_device_ip_resume_phase2(adev);
    3230             : 
    3231           0 :         return r;
    3232             : }
    3233             : 
    3234             : /**
    3235             :  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
    3236             :  *
    3237             :  * @adev: amdgpu_device pointer
    3238             :  *
    3239             :  * Query the VBIOS data tables to determine if the board supports SR-IOV.
    3240             :  */
    3241           0 : static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
    3242             : {
    3243           0 :         if (amdgpu_sriov_vf(adev)) {
    3244           0 :                 if (adev->is_atom_fw) {
    3245           0 :                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
    3246           0 :                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
    3247             :                 } else {
    3248           0 :                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
    3249           0 :                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
    3250             :                 }
    3251             : 
    3252           0 :                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
    3253           0 :                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
    3254             :         }
    3255           0 : }
    3256             : 
    3257             : /**
    3258             :  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
    3259             :  *
    3260             :  * @asic_type: AMD asic type
    3261             :  *
    3262             :  * Check if there is DC (new modesetting infrastructre) support for an asic.
    3263             :  * returns true if DC has support, false if not.
    3264             :  */
    3265           0 : bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
    3266             : {
    3267             :         switch (asic_type) {
    3268             : #ifdef CONFIG_DRM_AMDGPU_SI
    3269             :         case CHIP_HAINAN:
    3270             : #endif
    3271             :         case CHIP_TOPAZ:
    3272             :                 /* chips with no display hardware */
    3273             :                 return false;
    3274             : #if defined(CONFIG_DRM_AMD_DC)
    3275             :         case CHIP_TAHITI:
    3276             :         case CHIP_PITCAIRN:
    3277             :         case CHIP_VERDE:
    3278             :         case CHIP_OLAND:
    3279             :                 /*
    3280             :                  * We have systems in the wild with these ASICs that require
    3281             :                  * LVDS and VGA support which is not supported with DC.
    3282             :                  *
    3283             :                  * Fallback to the non-DC driver here by default so as not to
    3284             :                  * cause regressions.
    3285             :                  */
    3286             : #if defined(CONFIG_DRM_AMD_DC_SI)
    3287             :                 return amdgpu_dc > 0;
    3288             : #else
    3289             :                 return false;
    3290             : #endif
    3291             :         case CHIP_BONAIRE:
    3292             :         case CHIP_KAVERI:
    3293             :         case CHIP_KABINI:
    3294             :         case CHIP_MULLINS:
    3295             :                 /*
    3296             :                  * We have systems in the wild with these ASICs that require
    3297             :                  * VGA support which is not supported with DC.
    3298             :                  *
    3299             :                  * Fallback to the non-DC driver here by default so as not to
    3300             :                  * cause regressions.
    3301             :                  */
    3302           0 :                 return amdgpu_dc > 0;
    3303             :         default:
    3304           0 :                 return amdgpu_dc != 0;
    3305             : #else
    3306             :         default:
    3307             :                 if (amdgpu_dc > 0)
    3308             :                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
    3309             :                                          "but isn't supported by ASIC, ignoring\n");
    3310             :                 return false;
    3311             : #endif
    3312             :         }
    3313             : }
    3314             : 
    3315             : /**
    3316             :  * amdgpu_device_has_dc_support - check if dc is supported
    3317             :  *
    3318             :  * @adev: amdgpu_device pointer
    3319             :  *
    3320             :  * Returns true for supported, false for not supported
    3321             :  */
    3322           0 : bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
    3323             : {
    3324           0 :         if (amdgpu_sriov_vf(adev) ||
    3325           0 :             adev->enable_virtual_display ||
    3326           0 :             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
    3327             :                 return false;
    3328             : 
    3329           0 :         return amdgpu_device_asic_has_dc_support(adev->asic_type);
    3330             : }
    3331             : 
    3332           0 : static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
    3333             : {
    3334           0 :         struct amdgpu_device *adev =
    3335           0 :                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
    3336           0 :         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
    3337             : 
    3338             :         /* It's a bug to not have a hive within this function */
    3339           0 :         if (WARN_ON(!hive))
    3340             :                 return;
    3341             : 
    3342             :         /*
    3343             :          * Use task barrier to synchronize all xgmi reset works across the
    3344             :          * hive. task_barrier_enter and task_barrier_exit will block
    3345             :          * until all the threads running the xgmi reset works reach
    3346             :          * those points. task_barrier_full will do both blocks.
    3347             :          */
    3348           0 :         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
    3349             : 
    3350           0 :                 task_barrier_enter(&hive->tb);
    3351           0 :                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
    3352             : 
    3353           0 :                 if (adev->asic_reset_res)
    3354             :                         goto fail;
    3355             : 
    3356           0 :                 task_barrier_exit(&hive->tb);
    3357           0 :                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
    3358             : 
    3359           0 :                 if (adev->asic_reset_res)
    3360             :                         goto fail;
    3361             : 
    3362           0 :                 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
    3363           0 :                     adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
    3364           0 :                         adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
    3365             :         } else {
    3366             : 
    3367           0 :                 task_barrier_full(&hive->tb);
    3368           0 :                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
    3369             :         }
    3370             : 
    3371             : fail:
    3372           0 :         if (adev->asic_reset_res)
    3373           0 :                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
    3374             :                          adev->asic_reset_res, adev_to_drm(adev)->unique);
    3375           0 :         amdgpu_put_xgmi_hive(hive);
    3376             : }
    3377             : 
    3378           0 : static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
    3379             : {
    3380           0 :         char *input = amdgpu_lockup_timeout;
    3381           0 :         char *timeout_setting = NULL;
    3382           0 :         int index = 0;
    3383             :         long timeout;
    3384           0 :         int ret = 0;
    3385             : 
    3386             :         /*
    3387             :          * By default timeout for non compute jobs is 10000
    3388             :          * and 60000 for compute jobs.
    3389             :          * In SR-IOV or passthrough mode, timeout for compute
    3390             :          * jobs are 60000 by default.
    3391             :          */
    3392           0 :         adev->gfx_timeout = msecs_to_jiffies(10000);
    3393           0 :         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
    3394           0 :         if (amdgpu_sriov_vf(adev))
    3395           0 :                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
    3396           0 :                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
    3397             :         else
    3398           0 :                 adev->compute_timeout =  msecs_to_jiffies(60000);
    3399             : 
    3400           0 :         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
    3401           0 :                 while ((timeout_setting = strsep(&input, ",")) &&
    3402           0 :                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
    3403           0 :                         ret = kstrtol(timeout_setting, 0, &timeout);
    3404           0 :                         if (ret)
    3405             :                                 return ret;
    3406             : 
    3407           0 :                         if (timeout == 0) {
    3408           0 :                                 index++;
    3409           0 :                                 continue;
    3410           0 :                         } else if (timeout < 0) {
    3411           0 :                                 timeout = MAX_SCHEDULE_TIMEOUT;
    3412           0 :                                 dev_warn(adev->dev, "lockup timeout disabled");
    3413           0 :                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
    3414             :                         } else {
    3415           0 :                                 timeout = msecs_to_jiffies(timeout);
    3416             :                         }
    3417             : 
    3418           0 :                         switch (index++) {
    3419             :                         case 0:
    3420           0 :                                 adev->gfx_timeout = timeout;
    3421           0 :                                 break;
    3422             :                         case 1:
    3423           0 :                                 adev->compute_timeout = timeout;
    3424           0 :                                 break;
    3425             :                         case 2:
    3426           0 :                                 adev->sdma_timeout = timeout;
    3427           0 :                                 break;
    3428             :                         case 3:
    3429           0 :                                 adev->video_timeout = timeout;
    3430           0 :                                 break;
    3431             :                         default:
    3432             :                                 break;
    3433             :                         }
    3434             :                 }
    3435             :                 /*
    3436             :                  * There is only one value specified and
    3437             :                  * it should apply to all non-compute jobs.
    3438             :                  */
    3439           0 :                 if (index == 1) {
    3440           0 :                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
    3441           0 :                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
    3442           0 :                                 adev->compute_timeout = adev->gfx_timeout;
    3443             :                 }
    3444             :         }
    3445             : 
    3446             :         return ret;
    3447             : }
    3448             : 
    3449             : /**
    3450             :  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
    3451             :  *
    3452             :  * @adev: amdgpu_device pointer
    3453             :  *
    3454             :  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
    3455             :  */
    3456             : static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
    3457             : {
    3458             :         struct iommu_domain *domain;
    3459             : 
    3460           0 :         domain = iommu_get_domain_for_dev(adev->dev);
    3461             :         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
    3462           0 :                 adev->ram_is_direct_mapped = true;
    3463             : }
    3464             : 
    3465             : static const struct attribute *amdgpu_dev_attributes[] = {
    3466             :         &dev_attr_product_name.attr,
    3467             :         &dev_attr_product_number.attr,
    3468             :         &dev_attr_serial_number.attr,
    3469             :         &dev_attr_pcie_replay_count.attr,
    3470             :         NULL
    3471             : };
    3472             : 
    3473             : /**
    3474             :  * amdgpu_device_init - initialize the driver
    3475             :  *
    3476             :  * @adev: amdgpu_device pointer
    3477             :  * @flags: driver flags
    3478             :  *
    3479             :  * Initializes the driver info and hw (all asics).
    3480             :  * Returns 0 for success or an error on failure.
    3481             :  * Called at driver startup.
    3482             :  */
    3483           0 : int amdgpu_device_init(struct amdgpu_device *adev,
    3484             :                        uint32_t flags)
    3485             : {
    3486           0 :         struct drm_device *ddev = adev_to_drm(adev);
    3487           0 :         struct pci_dev *pdev = adev->pdev;
    3488             :         int r, i;
    3489           0 :         bool px = false;
    3490             :         u32 max_MBps;
    3491             : 
    3492           0 :         adev->shutdown = false;
    3493           0 :         adev->flags = flags;
    3494             : 
    3495           0 :         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
    3496           0 :                 adev->asic_type = amdgpu_force_asic_type;
    3497             :         else
    3498           0 :                 adev->asic_type = flags & AMD_ASIC_MASK;
    3499             : 
    3500           0 :         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
    3501           0 :         if (amdgpu_emu_mode == 1)
    3502           0 :                 adev->usec_timeout *= 10;
    3503           0 :         adev->gmc.gart_size = 512 * 1024 * 1024;
    3504           0 :         adev->accel_working = false;
    3505           0 :         adev->num_rings = 0;
    3506           0 :         adev->mman.buffer_funcs = NULL;
    3507           0 :         adev->mman.buffer_funcs_ring = NULL;
    3508           0 :         adev->vm_manager.vm_pte_funcs = NULL;
    3509           0 :         adev->vm_manager.vm_pte_num_scheds = 0;
    3510           0 :         adev->gmc.gmc_funcs = NULL;
    3511           0 :         adev->harvest_ip_mask = 0x0;
    3512           0 :         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
    3513           0 :         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
    3514             : 
    3515           0 :         adev->smc_rreg = &amdgpu_invalid_rreg;
    3516           0 :         adev->smc_wreg = &amdgpu_invalid_wreg;
    3517           0 :         adev->pcie_rreg = &amdgpu_invalid_rreg;
    3518           0 :         adev->pcie_wreg = &amdgpu_invalid_wreg;
    3519           0 :         adev->pciep_rreg = &amdgpu_invalid_rreg;
    3520           0 :         adev->pciep_wreg = &amdgpu_invalid_wreg;
    3521           0 :         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
    3522           0 :         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
    3523           0 :         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
    3524           0 :         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
    3525           0 :         adev->didt_rreg = &amdgpu_invalid_rreg;
    3526           0 :         adev->didt_wreg = &amdgpu_invalid_wreg;
    3527           0 :         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
    3528           0 :         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
    3529           0 :         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
    3530           0 :         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
    3531             : 
    3532           0 :         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
    3533             :                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
    3534             :                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
    3535             : 
    3536             :         /* mutex initialization are all done here so we
    3537             :          * can recall function without having locking issues */
    3538           0 :         mutex_init(&adev->firmware.mutex);
    3539           0 :         mutex_init(&adev->pm.mutex);
    3540           0 :         mutex_init(&adev->gfx.gpu_clock_mutex);
    3541           0 :         mutex_init(&adev->srbm_mutex);
    3542           0 :         mutex_init(&adev->gfx.pipe_reserve_mutex);
    3543           0 :         mutex_init(&adev->gfx.gfx_off_mutex);
    3544           0 :         mutex_init(&adev->grbm_idx_mutex);
    3545           0 :         mutex_init(&adev->mn_lock);
    3546           0 :         mutex_init(&adev->virt.vf_errors.lock);
    3547           0 :         hash_init(adev->mn_hash);
    3548           0 :         mutex_init(&adev->psp.mutex);
    3549           0 :         mutex_init(&adev->notifier_lock);
    3550           0 :         mutex_init(&adev->pm.stable_pstate_ctx_lock);
    3551           0 :         mutex_init(&adev->benchmark_mutex);
    3552             : 
    3553           0 :         amdgpu_device_init_apu_flags(adev);
    3554             : 
    3555           0 :         r = amdgpu_device_check_arguments(adev);
    3556           0 :         if (r)
    3557             :                 return r;
    3558             : 
    3559           0 :         spin_lock_init(&adev->mmio_idx_lock);
    3560           0 :         spin_lock_init(&adev->smc_idx_lock);
    3561           0 :         spin_lock_init(&adev->pcie_idx_lock);
    3562           0 :         spin_lock_init(&adev->uvd_ctx_idx_lock);
    3563           0 :         spin_lock_init(&adev->didt_idx_lock);
    3564           0 :         spin_lock_init(&adev->gc_cac_idx_lock);
    3565           0 :         spin_lock_init(&adev->se_cac_idx_lock);
    3566           0 :         spin_lock_init(&adev->audio_endpt_idx_lock);
    3567           0 :         spin_lock_init(&adev->mm_stats.lock);
    3568             : 
    3569           0 :         INIT_LIST_HEAD(&adev->shadow_list);
    3570           0 :         mutex_init(&adev->shadow_list_lock);
    3571             : 
    3572           0 :         INIT_LIST_HEAD(&adev->reset_list);
    3573             : 
    3574           0 :         INIT_LIST_HEAD(&adev->ras_list);
    3575             : 
    3576           0 :         INIT_DELAYED_WORK(&adev->delayed_init_work,
    3577             :                           amdgpu_device_delayed_init_work_handler);
    3578           0 :         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
    3579             :                           amdgpu_device_delay_enable_gfx_off);
    3580             : 
    3581           0 :         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
    3582             : 
    3583           0 :         adev->gfx.gfx_off_req_count = 1;
    3584           0 :         adev->gfx.gfx_off_residency = 0;
    3585           0 :         adev->gfx.gfx_off_entrycount = 0;
    3586           0 :         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
    3587             : 
    3588           0 :         atomic_set(&adev->throttling_logging_enabled, 1);
    3589             :         /*
    3590             :          * If throttling continues, logging will be performed every minute
    3591             :          * to avoid log flooding. "-1" is subtracted since the thermal
    3592             :          * throttling interrupt comes every second. Thus, the total logging
    3593             :          * interval is 59 seconds(retelimited printk interval) + 1(waiting
    3594             :          * for throttling interrupt) = 60 seconds.
    3595             :          */
    3596           0 :         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
    3597           0 :         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
    3598             : 
    3599             :         /* Registers mapping */
    3600             :         /* TODO: block userspace mapping of io register */
    3601           0 :         if (adev->asic_type >= CHIP_BONAIRE) {
    3602           0 :                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
    3603           0 :                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
    3604             :         } else {
    3605           0 :                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
    3606           0 :                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
    3607             :         }
    3608             : 
    3609           0 :         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
    3610           0 :                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
    3611             : 
    3612           0 :         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
    3613           0 :         if (adev->rmmio == NULL) {
    3614             :                 return -ENOMEM;
    3615             :         }
    3616           0 :         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
    3617           0 :         DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
    3618             : 
    3619           0 :         amdgpu_device_get_pcie_info(adev);
    3620             : 
    3621           0 :         if (amdgpu_mcbp)
    3622           0 :                 DRM_INFO("MCBP is enabled\n");
    3623             : 
    3624             :         /*
    3625             :          * Reset domain needs to be present early, before XGMI hive discovered
    3626             :          * (if any) and intitialized to use reset sem and in_gpu reset flag
    3627             :          * early on during init and before calling to RREG32.
    3628             :          */
    3629           0 :         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
    3630           0 :         if (!adev->reset_domain)
    3631             :                 return -ENOMEM;
    3632             : 
    3633             :         /* detect hw virtualization here */
    3634           0 :         amdgpu_detect_virtualization(adev);
    3635             : 
    3636           0 :         r = amdgpu_device_get_job_timeout_settings(adev);
    3637           0 :         if (r) {
    3638           0 :                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
    3639           0 :                 return r;
    3640             :         }
    3641             : 
    3642             :         /* early init functions */
    3643           0 :         r = amdgpu_device_ip_early_init(adev);
    3644           0 :         if (r)
    3645             :                 return r;
    3646             : 
    3647             :         /* Enable TMZ based on IP_VERSION */
    3648           0 :         amdgpu_gmc_tmz_set(adev);
    3649             : 
    3650           0 :         amdgpu_gmc_noretry_set(adev);
    3651             :         /* Need to get xgmi info early to decide the reset behavior*/
    3652           0 :         if (adev->gmc.xgmi.supported) {
    3653           0 :                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
    3654           0 :                 if (r)
    3655             :                         return r;
    3656             :         }
    3657             : 
    3658             :         /* enable PCIE atomic ops */
    3659           0 :         if (amdgpu_sriov_vf(adev))
    3660           0 :                 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
    3661           0 :                         adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
    3662             :                         (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
    3663             :         else
    3664           0 :                 adev->have_atomics_support =
    3665           0 :                         !pci_enable_atomic_ops_to_root(adev->pdev,
    3666             :                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
    3667             :                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
    3668           0 :         if (!adev->have_atomics_support)
    3669           0 :                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
    3670             : 
    3671             :         /* doorbell bar mapping and doorbell index init*/
    3672           0 :         amdgpu_device_doorbell_init(adev);
    3673             : 
    3674           0 :         if (amdgpu_emu_mode == 1) {
    3675             :                 /* post the asic on emulation mode */
    3676           0 :                 emu_soc_asic_init(adev);
    3677           0 :                 goto fence_driver_init;
    3678             :         }
    3679             : 
    3680           0 :         amdgpu_reset_init(adev);
    3681             : 
    3682             :         /* detect if we are with an SRIOV vbios */
    3683           0 :         amdgpu_device_detect_sriov_bios(adev);
    3684             : 
    3685             :         /* check if we need to reset the asic
    3686             :          *  E.g., driver was not cleanly unloaded previously, etc.
    3687             :          */
    3688           0 :         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
    3689           0 :                 if (adev->gmc.xgmi.num_physical_nodes) {
    3690           0 :                         dev_info(adev->dev, "Pending hive reset.\n");
    3691           0 :                         adev->gmc.xgmi.pending_reset = true;
    3692             :                         /* Only need to init necessary block for SMU to handle the reset */
    3693           0 :                         for (i = 0; i < adev->num_ip_blocks; i++) {
    3694           0 :                                 if (!adev->ip_blocks[i].status.valid)
    3695           0 :                                         continue;
    3696           0 :                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
    3697             :                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
    3698             :                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
    3699             :                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
    3700           0 :                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
    3701             :                                                 adev->ip_blocks[i].version->funcs->name);
    3702           0 :                                         adev->ip_blocks[i].status.hw = true;
    3703             :                                 }
    3704             :                         }
    3705             :                 } else {
    3706           0 :                         r = amdgpu_asic_reset(adev);
    3707           0 :                         if (r) {
    3708           0 :                                 dev_err(adev->dev, "asic reset on init failed\n");
    3709           0 :                                 goto failed;
    3710             :                         }
    3711             :                 }
    3712             :         }
    3713             : 
    3714           0 :         pci_enable_pcie_error_reporting(adev->pdev);
    3715             : 
    3716             :         /* Post card if necessary */
    3717           0 :         if (amdgpu_device_need_post(adev)) {
    3718           0 :                 if (!adev->bios) {
    3719           0 :                         dev_err(adev->dev, "no vBIOS found\n");
    3720           0 :                         r = -EINVAL;
    3721           0 :                         goto failed;
    3722             :                 }
    3723           0 :                 DRM_INFO("GPU posting now...\n");
    3724           0 :                 r = amdgpu_device_asic_init(adev);
    3725           0 :                 if (r) {
    3726           0 :                         dev_err(adev->dev, "gpu post error!\n");
    3727           0 :                         goto failed;
    3728             :                 }
    3729             :         }
    3730             : 
    3731           0 :         if (adev->is_atom_fw) {
    3732             :                 /* Initialize clocks */
    3733           0 :                 r = amdgpu_atomfirmware_get_clock_info(adev);
    3734           0 :                 if (r) {
    3735           0 :                         dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
    3736           0 :                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
    3737           0 :                         goto failed;
    3738             :                 }
    3739             :         } else {
    3740             :                 /* Initialize clocks */
    3741           0 :                 r = amdgpu_atombios_get_clock_info(adev);
    3742           0 :                 if (r) {
    3743           0 :                         dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
    3744           0 :                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
    3745           0 :                         goto failed;
    3746             :                 }
    3747             :                 /* init i2c buses */
    3748           0 :                 if (!amdgpu_device_has_dc_support(adev))
    3749           0 :                         amdgpu_atombios_i2c_init(adev);
    3750             :         }
    3751             : 
    3752             : fence_driver_init:
    3753             :         /* Fence driver */
    3754           0 :         r = amdgpu_fence_driver_sw_init(adev);
    3755           0 :         if (r) {
    3756           0 :                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
    3757           0 :                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
    3758           0 :                 goto failed;
    3759             :         }
    3760             : 
    3761             :         /* init the mode config */
    3762           0 :         drm_mode_config_init(adev_to_drm(adev));
    3763             : 
    3764           0 :         r = amdgpu_device_ip_init(adev);
    3765           0 :         if (r) {
    3766             :                 /* failed in exclusive mode due to timeout */
    3767           0 :                 if (amdgpu_sriov_vf(adev) &&
    3768           0 :                     !amdgpu_sriov_runtime(adev) &&
    3769           0 :                     amdgpu_virt_mmio_blocked(adev) &&
    3770           0 :                     !amdgpu_virt_wait_reset(adev)) {
    3771           0 :                         dev_err(adev->dev, "VF exclusive mode timeout\n");
    3772             :                         /* Don't send request since VF is inactive. */
    3773           0 :                         adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
    3774           0 :                         adev->virt.ops = NULL;
    3775           0 :                         r = -EAGAIN;
    3776           0 :                         goto release_ras_con;
    3777             :                 }
    3778           0 :                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
    3779           0 :                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
    3780           0 :                 goto release_ras_con;
    3781             :         }
    3782             : 
    3783           0 :         amdgpu_fence_driver_hw_init(adev);
    3784             : 
    3785           0 :         dev_info(adev->dev,
    3786             :                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
    3787             :                         adev->gfx.config.max_shader_engines,
    3788             :                         adev->gfx.config.max_sh_per_se,
    3789             :                         adev->gfx.config.max_cu_per_sh,
    3790             :                         adev->gfx.cu_info.number);
    3791             : 
    3792           0 :         adev->accel_working = true;
    3793             : 
    3794           0 :         amdgpu_vm_check_compute_bug(adev);
    3795             : 
    3796             :         /* Initialize the buffer migration limit. */
    3797           0 :         if (amdgpu_moverate >= 0)
    3798           0 :                 max_MBps = amdgpu_moverate;
    3799             :         else
    3800             :                 max_MBps = 8; /* Allow 8 MB/s. */
    3801             :         /* Get a log2 for easy divisions. */
    3802           0 :         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
    3803             : 
    3804           0 :         r = amdgpu_pm_sysfs_init(adev);
    3805           0 :         if (r) {
    3806           0 :                 adev->pm_sysfs_en = false;
    3807           0 :                 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
    3808             :         } else
    3809           0 :                 adev->pm_sysfs_en = true;
    3810             : 
    3811           0 :         r = amdgpu_ucode_sysfs_init(adev);
    3812           0 :         if (r) {
    3813           0 :                 adev->ucode_sysfs_en = false;
    3814           0 :                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
    3815             :         } else
    3816           0 :                 adev->ucode_sysfs_en = true;
    3817             : 
    3818           0 :         r = amdgpu_psp_sysfs_init(adev);
    3819           0 :         if (r) {
    3820           0 :                 adev->psp_sysfs_en = false;
    3821           0 :                 if (!amdgpu_sriov_vf(adev))
    3822           0 :                         DRM_ERROR("Creating psp sysfs failed\n");
    3823             :         } else
    3824           0 :                 adev->psp_sysfs_en = true;
    3825             : 
    3826             :         /*
    3827             :          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
    3828             :          * Otherwise the mgpu fan boost feature will be skipped due to the
    3829             :          * gpu instance is counted less.
    3830             :          */
    3831           0 :         amdgpu_register_gpu_instance(adev);
    3832             : 
    3833             :         /* enable clockgating, etc. after ib tests, etc. since some blocks require
    3834             :          * explicit gating rather than handling it automatically.
    3835             :          */
    3836           0 :         if (!adev->gmc.xgmi.pending_reset) {
    3837           0 :                 r = amdgpu_device_ip_late_init(adev);
    3838           0 :                 if (r) {
    3839           0 :                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
    3840           0 :                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
    3841           0 :                         goto release_ras_con;
    3842             :                 }
    3843             :                 /* must succeed. */
    3844           0 :                 amdgpu_ras_resume(adev);
    3845           0 :                 queue_delayed_work(system_wq, &adev->delayed_init_work,
    3846             :                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
    3847             :         }
    3848             : 
    3849           0 :         if (amdgpu_sriov_vf(adev))
    3850           0 :                 flush_delayed_work(&adev->delayed_init_work);
    3851             : 
    3852           0 :         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
    3853           0 :         if (r)
    3854           0 :                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
    3855             : 
    3856             :         if (IS_ENABLED(CONFIG_PERF_EVENTS))
    3857             :                 r = amdgpu_pmu_init(adev);
    3858           0 :         if (r)
    3859           0 :                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
    3860             : 
    3861             :         /* Have stored pci confspace at hand for restore in sudden PCI error */
    3862           0 :         if (amdgpu_device_cache_pci_state(adev->pdev))
    3863           0 :                 pci_restore_state(pdev);
    3864             : 
    3865             :         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
    3866             :         /* this will fail for cards that aren't VGA class devices, just
    3867             :          * ignore it */
    3868           0 :         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
    3869           0 :                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
    3870             : 
    3871           0 :         if (amdgpu_device_supports_px(ddev)) {
    3872             :                 px = true;
    3873             :                 vga_switcheroo_register_client(adev->pdev,
    3874             :                                                &amdgpu_switcheroo_ops, px);
    3875             :                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
    3876             :         }
    3877             : 
    3878           0 :         if (adev->gmc.xgmi.pending_reset)
    3879           0 :                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
    3880             :                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
    3881             : 
    3882           0 :         amdgpu_device_check_iommu_direct_map(adev);
    3883             : 
    3884           0 :         return 0;
    3885             : 
    3886             : release_ras_con:
    3887           0 :         amdgpu_release_ras_context(adev);
    3888             : 
    3889             : failed:
    3890           0 :         amdgpu_vf_error_trans_all(adev);
    3891             : 
    3892           0 :         return r;
    3893             : }
    3894             : 
    3895           0 : static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
    3896             : {
    3897             : 
    3898             :         /* Clear all CPU mappings pointing to this device */
    3899           0 :         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
    3900             : 
    3901             :         /* Unmap all mapped bars - Doorbell, registers and VRAM */
    3902           0 :         amdgpu_device_doorbell_fini(adev);
    3903             : 
    3904           0 :         iounmap(adev->rmmio);
    3905           0 :         adev->rmmio = NULL;
    3906           0 :         if (adev->mman.aper_base_kaddr)
    3907           0 :                 iounmap(adev->mman.aper_base_kaddr);
    3908           0 :         adev->mman.aper_base_kaddr = NULL;
    3909             : 
    3910             :         /* Memory manager related */
    3911             :         if (!adev->gmc.xgmi.connected_to_cpu) {
    3912             :                 arch_phys_wc_del(adev->gmc.vram_mtrr);
    3913             :                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
    3914             :         }
    3915           0 : }
    3916             : 
    3917             : /**
    3918             :  * amdgpu_device_fini_hw - tear down the driver
    3919             :  *
    3920             :  * @adev: amdgpu_device pointer
    3921             :  *
    3922             :  * Tear down the driver info (all asics).
    3923             :  * Called at driver shutdown.
    3924             :  */
    3925           0 : void amdgpu_device_fini_hw(struct amdgpu_device *adev)
    3926             : {
    3927           0 :         dev_info(adev->dev, "amdgpu: finishing device.\n");
    3928           0 :         flush_delayed_work(&adev->delayed_init_work);
    3929           0 :         adev->shutdown = true;
    3930             : 
    3931             :         /* make sure IB test finished before entering exclusive mode
    3932             :          * to avoid preemption on IB test
    3933             :          * */
    3934           0 :         if (amdgpu_sriov_vf(adev)) {
    3935           0 :                 amdgpu_virt_request_full_gpu(adev, false);
    3936           0 :                 amdgpu_virt_fini_data_exchange(adev);
    3937             :         }
    3938             : 
    3939             :         /* disable all interrupts */
    3940           0 :         amdgpu_irq_disable_all(adev);
    3941           0 :         if (adev->mode_info.mode_config_initialized){
    3942           0 :                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
    3943           0 :                         drm_helper_force_disable_all(adev_to_drm(adev));
    3944             :                 else
    3945           0 :                         drm_atomic_helper_shutdown(adev_to_drm(adev));
    3946             :         }
    3947           0 :         amdgpu_fence_driver_hw_fini(adev);
    3948             : 
    3949           0 :         if (adev->mman.initialized) {
    3950           0 :                 flush_delayed_work(&adev->mman.bdev.wq);
    3951           0 :                 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
    3952             :         }
    3953             : 
    3954           0 :         if (adev->pm_sysfs_en)
    3955           0 :                 amdgpu_pm_sysfs_fini(adev);
    3956           0 :         if (adev->ucode_sysfs_en)
    3957           0 :                 amdgpu_ucode_sysfs_fini(adev);
    3958           0 :         if (adev->psp_sysfs_en)
    3959           0 :                 amdgpu_psp_sysfs_fini(adev);
    3960           0 :         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
    3961             : 
    3962             :         /* disable ras feature must before hw fini */
    3963           0 :         amdgpu_ras_pre_fini(adev);
    3964             : 
    3965           0 :         amdgpu_device_ip_fini_early(adev);
    3966             : 
    3967           0 :         amdgpu_irq_fini_hw(adev);
    3968             : 
    3969           0 :         if (adev->mman.initialized)
    3970           0 :                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
    3971             : 
    3972           0 :         amdgpu_gart_dummy_page_fini(adev);
    3973             : 
    3974           0 :         amdgpu_device_unmap_mmio(adev);
    3975             : 
    3976           0 : }
    3977             : 
    3978           0 : void amdgpu_device_fini_sw(struct amdgpu_device *adev)
    3979             : {
    3980             :         int idx;
    3981             : 
    3982           0 :         amdgpu_fence_driver_sw_fini(adev);
    3983           0 :         amdgpu_device_ip_fini(adev);
    3984           0 :         release_firmware(adev->firmware.gpu_info_fw);
    3985           0 :         adev->firmware.gpu_info_fw = NULL;
    3986           0 :         adev->accel_working = false;
    3987             : 
    3988           0 :         amdgpu_reset_fini(adev);
    3989             : 
    3990             :         /* free i2c buses */
    3991           0 :         if (!amdgpu_device_has_dc_support(adev))
    3992           0 :                 amdgpu_i2c_fini(adev);
    3993             : 
    3994           0 :         if (amdgpu_emu_mode != 1)
    3995           0 :                 amdgpu_atombios_fini(adev);
    3996             : 
    3997           0 :         kfree(adev->bios);
    3998           0 :         adev->bios = NULL;
    3999           0 :         if (amdgpu_device_supports_px(adev_to_drm(adev))) {
    4000             :                 vga_switcheroo_unregister_client(adev->pdev);
    4001             :                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
    4002             :         }
    4003           0 :         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
    4004           0 :                 vga_client_unregister(adev->pdev);
    4005             : 
    4006           0 :         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
    4007             : 
    4008           0 :                 iounmap(adev->rmmio);
    4009           0 :                 adev->rmmio = NULL;
    4010           0 :                 amdgpu_device_doorbell_fini(adev);
    4011           0 :                 drm_dev_exit(idx);
    4012             :         }
    4013             : 
    4014             :         if (IS_ENABLED(CONFIG_PERF_EVENTS))
    4015             :                 amdgpu_pmu_fini(adev);
    4016           0 :         if (adev->mman.discovery_bin)
    4017           0 :                 amdgpu_discovery_fini(adev);
    4018             : 
    4019           0 :         amdgpu_reset_put_reset_domain(adev->reset_domain);
    4020           0 :         adev->reset_domain = NULL;
    4021             : 
    4022           0 :         kfree(adev->pci_state);
    4023             : 
    4024           0 : }
    4025             : 
    4026             : /**
    4027             :  * amdgpu_device_evict_resources - evict device resources
    4028             :  * @adev: amdgpu device object
    4029             :  *
    4030             :  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
    4031             :  * of the vram memory type. Mainly used for evicting device resources
    4032             :  * at suspend time.
    4033             :  *
    4034             :  */
    4035           0 : static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
    4036             : {
    4037             :         /* No need to evict vram on APUs for suspend to ram or s2idle */
    4038           0 :         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
    4039             :                 return;
    4040             : 
    4041           0 :         if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
    4042           0 :                 DRM_WARN("evicting device resources failed\n");
    4043             : 
    4044             : }
    4045             : 
    4046             : /*
    4047             :  * Suspend & resume.
    4048             :  */
    4049             : /**
    4050             :  * amdgpu_device_suspend - initiate device suspend
    4051             :  *
    4052             :  * @dev: drm dev pointer
    4053             :  * @fbcon : notify the fbdev of suspend
    4054             :  *
    4055             :  * Puts the hw in the suspend state (all asics).
    4056             :  * Returns 0 for success or an error on failure.
    4057             :  * Called at driver suspend.
    4058             :  */
    4059           0 : int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
    4060             : {
    4061           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    4062             : 
    4063           0 :         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
    4064             :                 return 0;
    4065             : 
    4066           0 :         adev->in_suspend = true;
    4067             : 
    4068           0 :         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
    4069             :                 DRM_WARN("smart shift update failed\n");
    4070             : 
    4071           0 :         drm_kms_helper_poll_disable(dev);
    4072             : 
    4073             :         if (fbcon)
    4074             :                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
    4075             : 
    4076           0 :         cancel_delayed_work_sync(&adev->delayed_init_work);
    4077             : 
    4078           0 :         amdgpu_ras_suspend(adev);
    4079             : 
    4080           0 :         amdgpu_device_ip_suspend_phase1(adev);
    4081             : 
    4082           0 :         if (!adev->in_s0ix)
    4083           0 :                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
    4084             : 
    4085           0 :         amdgpu_device_evict_resources(adev);
    4086             : 
    4087           0 :         amdgpu_fence_driver_hw_fini(adev);
    4088             : 
    4089           0 :         amdgpu_device_ip_suspend_phase2(adev);
    4090             : 
    4091           0 :         return 0;
    4092             : }
    4093             : 
    4094             : /**
    4095             :  * amdgpu_device_resume - initiate device resume
    4096             :  *
    4097             :  * @dev: drm dev pointer
    4098             :  * @fbcon : notify the fbdev of resume
    4099             :  *
    4100             :  * Bring the hw back to operating state (all asics).
    4101             :  * Returns 0 for success or an error on failure.
    4102             :  * Called at driver resume.
    4103             :  */
    4104           0 : int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
    4105             : {
    4106           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    4107           0 :         int r = 0;
    4108             : 
    4109           0 :         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
    4110             :                 return 0;
    4111             : 
    4112           0 :         if (adev->in_s0ix)
    4113           0 :                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
    4114             : 
    4115             :         /* post card */
    4116           0 :         if (amdgpu_device_need_post(adev)) {
    4117           0 :                 r = amdgpu_device_asic_init(adev);
    4118           0 :                 if (r)
    4119           0 :                         dev_err(adev->dev, "amdgpu asic init failed\n");
    4120             :         }
    4121             : 
    4122           0 :         r = amdgpu_device_ip_resume(adev);
    4123           0 :         if (r) {
    4124           0 :                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
    4125           0 :                 return r;
    4126             :         }
    4127           0 :         amdgpu_fence_driver_hw_init(adev);
    4128             : 
    4129           0 :         r = amdgpu_device_ip_late_init(adev);
    4130           0 :         if (r)
    4131             :                 return r;
    4132             : 
    4133           0 :         queue_delayed_work(system_wq, &adev->delayed_init_work,
    4134             :                            msecs_to_jiffies(AMDGPU_RESUME_MS));
    4135             : 
    4136           0 :         if (!adev->in_s0ix) {
    4137           0 :                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
    4138           0 :                 if (r)
    4139             :                         return r;
    4140             :         }
    4141             : 
    4142             :         /* Make sure IB tests flushed */
    4143           0 :         flush_delayed_work(&adev->delayed_init_work);
    4144             : 
    4145             :         if (fbcon)
    4146             :                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
    4147             : 
    4148           0 :         drm_kms_helper_poll_enable(dev);
    4149             : 
    4150           0 :         amdgpu_ras_resume(adev);
    4151             : 
    4152             :         /*
    4153             :          * Most of the connector probing functions try to acquire runtime pm
    4154             :          * refs to ensure that the GPU is powered on when connector polling is
    4155             :          * performed. Since we're calling this from a runtime PM callback,
    4156             :          * trying to acquire rpm refs will cause us to deadlock.
    4157             :          *
    4158             :          * Since we're guaranteed to be holding the rpm lock, it's safe to
    4159             :          * temporarily disable the rpm helpers so this doesn't deadlock us.
    4160             :          */
    4161             : #ifdef CONFIG_PM
    4162           0 :         dev->dev->power.disable_depth++;
    4163             : #endif
    4164           0 :         if (!amdgpu_device_has_dc_support(adev))
    4165           0 :                 drm_helper_hpd_irq_event(dev);
    4166             :         else
    4167           0 :                 drm_kms_helper_hotplug_event(dev);
    4168             : #ifdef CONFIG_PM
    4169           0 :         dev->dev->power.disable_depth--;
    4170             : #endif
    4171           0 :         adev->in_suspend = false;
    4172             : 
    4173           0 :         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
    4174             :                 DRM_WARN("smart shift update failed\n");
    4175             : 
    4176           0 :         return 0;
    4177             : }
    4178             : 
    4179             : /**
    4180             :  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
    4181             :  *
    4182             :  * @adev: amdgpu_device pointer
    4183             :  *
    4184             :  * The list of all the hardware IPs that make up the asic is walked and
    4185             :  * the check_soft_reset callbacks are run.  check_soft_reset determines
    4186             :  * if the asic is still hung or not.
    4187             :  * Returns true if any of the IPs are still in a hung state, false if not.
    4188             :  */
    4189           0 : static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
    4190             : {
    4191             :         int i;
    4192           0 :         bool asic_hang = false;
    4193             : 
    4194           0 :         if (amdgpu_sriov_vf(adev))
    4195             :                 return true;
    4196             : 
    4197           0 :         if (amdgpu_asic_need_full_reset(adev))
    4198             :                 return true;
    4199             : 
    4200           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    4201           0 :                 if (!adev->ip_blocks[i].status.valid)
    4202           0 :                         continue;
    4203           0 :                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
    4204           0 :                         adev->ip_blocks[i].status.hang =
    4205           0 :                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
    4206           0 :                 if (adev->ip_blocks[i].status.hang) {
    4207           0 :                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
    4208           0 :                         asic_hang = true;
    4209             :                 }
    4210             :         }
    4211             :         return asic_hang;
    4212             : }
    4213             : 
    4214             : /**
    4215             :  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
    4216             :  *
    4217             :  * @adev: amdgpu_device pointer
    4218             :  *
    4219             :  * The list of all the hardware IPs that make up the asic is walked and the
    4220             :  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
    4221             :  * handles any IP specific hardware or software state changes that are
    4222             :  * necessary for a soft reset to succeed.
    4223             :  * Returns 0 on success, negative error code on failure.
    4224             :  */
    4225           0 : static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
    4226             : {
    4227           0 :         int i, r = 0;
    4228             : 
    4229           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    4230           0 :                 if (!adev->ip_blocks[i].status.valid)
    4231           0 :                         continue;
    4232           0 :                 if (adev->ip_blocks[i].status.hang &&
    4233           0 :                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
    4234           0 :                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
    4235           0 :                         if (r)
    4236             :                                 return r;
    4237             :                 }
    4238             :         }
    4239             : 
    4240             :         return 0;
    4241             : }
    4242             : 
    4243             : /**
    4244             :  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
    4245             :  *
    4246             :  * @adev: amdgpu_device pointer
    4247             :  *
    4248             :  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
    4249             :  * reset is necessary to recover.
    4250             :  * Returns true if a full asic reset is required, false if not.
    4251             :  */
    4252           0 : static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
    4253             : {
    4254             :         int i;
    4255             : 
    4256           0 :         if (amdgpu_asic_need_full_reset(adev))
    4257             :                 return true;
    4258             : 
    4259           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    4260           0 :                 if (!adev->ip_blocks[i].status.valid)
    4261           0 :                         continue;
    4262           0 :                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
    4263           0 :                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
    4264           0 :                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
    4265           0 :                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
    4266             :                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
    4267           0 :                         if (adev->ip_blocks[i].status.hang) {
    4268           0 :                                 dev_info(adev->dev, "Some block need full reset!\n");
    4269           0 :                                 return true;
    4270             :                         }
    4271             :                 }
    4272             :         }
    4273             :         return false;
    4274             : }
    4275             : 
    4276             : /**
    4277             :  * amdgpu_device_ip_soft_reset - do a soft reset
    4278             :  *
    4279             :  * @adev: amdgpu_device pointer
    4280             :  *
    4281             :  * The list of all the hardware IPs that make up the asic is walked and the
    4282             :  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
    4283             :  * IP specific hardware or software state changes that are necessary to soft
    4284             :  * reset the IP.
    4285             :  * Returns 0 on success, negative error code on failure.
    4286             :  */
    4287           0 : static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
    4288             : {
    4289           0 :         int i, r = 0;
    4290             : 
    4291           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    4292           0 :                 if (!adev->ip_blocks[i].status.valid)
    4293           0 :                         continue;
    4294           0 :                 if (adev->ip_blocks[i].status.hang &&
    4295           0 :                     adev->ip_blocks[i].version->funcs->soft_reset) {
    4296           0 :                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
    4297           0 :                         if (r)
    4298             :                                 return r;
    4299             :                 }
    4300             :         }
    4301             : 
    4302             :         return 0;
    4303             : }
    4304             : 
    4305             : /**
    4306             :  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
    4307             :  *
    4308             :  * @adev: amdgpu_device pointer
    4309             :  *
    4310             :  * The list of all the hardware IPs that make up the asic is walked and the
    4311             :  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
    4312             :  * handles any IP specific hardware or software state changes that are
    4313             :  * necessary after the IP has been soft reset.
    4314             :  * Returns 0 on success, negative error code on failure.
    4315             :  */
    4316           0 : static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
    4317             : {
    4318           0 :         int i, r = 0;
    4319             : 
    4320           0 :         for (i = 0; i < adev->num_ip_blocks; i++) {
    4321           0 :                 if (!adev->ip_blocks[i].status.valid)
    4322           0 :                         continue;
    4323           0 :                 if (adev->ip_blocks[i].status.hang &&
    4324           0 :                     adev->ip_blocks[i].version->funcs->post_soft_reset)
    4325           0 :                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
    4326           0 :                 if (r)
    4327             :                         return r;
    4328             :         }
    4329             : 
    4330             :         return 0;
    4331             : }
    4332             : 
    4333             : /**
    4334             :  * amdgpu_device_recover_vram - Recover some VRAM contents
    4335             :  *
    4336             :  * @adev: amdgpu_device pointer
    4337             :  *
    4338             :  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
    4339             :  * restore things like GPUVM page tables after a GPU reset where
    4340             :  * the contents of VRAM might be lost.
    4341             :  *
    4342             :  * Returns:
    4343             :  * 0 on success, negative error code on failure.
    4344             :  */
    4345           0 : static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
    4346             : {
    4347           0 :         struct dma_fence *fence = NULL, *next = NULL;
    4348             :         struct amdgpu_bo *shadow;
    4349             :         struct amdgpu_bo_vm *vmbo;
    4350           0 :         long r = 1, tmo;
    4351             : 
    4352           0 :         if (amdgpu_sriov_runtime(adev))
    4353             :                 tmo = msecs_to_jiffies(8000);
    4354             :         else
    4355           0 :                 tmo = msecs_to_jiffies(100);
    4356             : 
    4357           0 :         dev_info(adev->dev, "recover vram bo from shadow start\n");
    4358           0 :         mutex_lock(&adev->shadow_list_lock);
    4359           0 :         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
    4360           0 :                 shadow = &vmbo->bo;
    4361             :                 /* No need to recover an evicted BO */
    4362           0 :                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
    4363           0 :                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
    4364           0 :                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
    4365           0 :                         continue;
    4366             : 
    4367           0 :                 r = amdgpu_bo_restore_shadow(shadow, &next);
    4368           0 :                 if (r)
    4369             :                         break;
    4370             : 
    4371           0 :                 if (fence) {
    4372           0 :                         tmo = dma_fence_wait_timeout(fence, false, tmo);
    4373           0 :                         dma_fence_put(fence);
    4374           0 :                         fence = next;
    4375           0 :                         if (tmo == 0) {
    4376             :                                 r = -ETIMEDOUT;
    4377             :                                 break;
    4378           0 :                         } else if (tmo < 0) {
    4379             :                                 r = tmo;
    4380             :                                 break;
    4381             :                         }
    4382             :                 } else {
    4383           0 :                         fence = next;
    4384             :                 }
    4385             :         }
    4386           0 :         mutex_unlock(&adev->shadow_list_lock);
    4387             : 
    4388           0 :         if (fence)
    4389           0 :                 tmo = dma_fence_wait_timeout(fence, false, tmo);
    4390           0 :         dma_fence_put(fence);
    4391             : 
    4392           0 :         if (r < 0 || tmo <= 0) {
    4393           0 :                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
    4394           0 :                 return -EIO;
    4395             :         }
    4396             : 
    4397           0 :         dev_info(adev->dev, "recover vram bo from shadow done\n");
    4398           0 :         return 0;
    4399             : }
    4400             : 
    4401             : 
    4402             : /**
    4403             :  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
    4404             :  *
    4405             :  * @adev: amdgpu_device pointer
    4406             :  * @from_hypervisor: request from hypervisor
    4407             :  *
    4408             :  * do VF FLR and reinitialize Asic
    4409             :  * return 0 means succeeded otherwise failed
    4410             :  */
    4411           0 : static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
    4412             :                                      bool from_hypervisor)
    4413             : {
    4414             :         int r;
    4415           0 :         struct amdgpu_hive_info *hive = NULL;
    4416           0 :         int retry_limit = 0;
    4417             : 
    4418             : retry:
    4419           0 :         amdgpu_amdkfd_pre_reset(adev);
    4420             : 
    4421           0 :         if (from_hypervisor)
    4422           0 :                 r = amdgpu_virt_request_full_gpu(adev, true);
    4423             :         else
    4424           0 :                 r = amdgpu_virt_reset_gpu(adev);
    4425           0 :         if (r)
    4426             :                 return r;
    4427             : 
    4428             :         /* Resume IP prior to SMC */
    4429           0 :         r = amdgpu_device_ip_reinit_early_sriov(adev);
    4430           0 :         if (r)
    4431             :                 goto error;
    4432             : 
    4433           0 :         amdgpu_virt_init_data_exchange(adev);
    4434             : 
    4435           0 :         r = amdgpu_device_fw_loading(adev);
    4436           0 :         if (r)
    4437             :                 return r;
    4438             : 
    4439             :         /* now we are okay to resume SMC/CP/SDMA */
    4440           0 :         r = amdgpu_device_ip_reinit_late_sriov(adev);
    4441           0 :         if (r)
    4442             :                 goto error;
    4443             : 
    4444           0 :         hive = amdgpu_get_xgmi_hive(adev);
    4445             :         /* Update PSP FW topology after reset */
    4446           0 :         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
    4447           0 :                 r = amdgpu_xgmi_update_topology(hive, adev);
    4448             : 
    4449           0 :         if (hive)
    4450           0 :                 amdgpu_put_xgmi_hive(hive);
    4451             : 
    4452           0 :         if (!r) {
    4453           0 :                 amdgpu_irq_gpu_reset_resume_helper(adev);
    4454           0 :                 r = amdgpu_ib_ring_tests(adev);
    4455             : 
    4456           0 :                 amdgpu_amdkfd_post_reset(adev);
    4457             :         }
    4458             : 
    4459             : error:
    4460           0 :         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
    4461           0 :                 amdgpu_inc_vram_lost(adev);
    4462           0 :                 r = amdgpu_device_recover_vram(adev);
    4463             :         }
    4464           0 :         amdgpu_virt_release_full_gpu(adev, true);
    4465             : 
    4466           0 :         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
    4467           0 :                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
    4468           0 :                         retry_limit++;
    4469           0 :                         goto retry;
    4470             :                 } else
    4471           0 :                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
    4472             :         }
    4473             : 
    4474             :         return r;
    4475             : }
    4476             : 
    4477             : /**
    4478             :  * amdgpu_device_has_job_running - check if there is any job in mirror list
    4479             :  *
    4480             :  * @adev: amdgpu_device pointer
    4481             :  *
    4482             :  * check if there is any job in mirror list
    4483             :  */
    4484           0 : bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
    4485             : {
    4486             :         int i;
    4487             :         struct drm_sched_job *job;
    4488             : 
    4489           0 :         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    4490           0 :                 struct amdgpu_ring *ring = adev->rings[i];
    4491             : 
    4492           0 :                 if (!ring || !ring->sched.thread)
    4493           0 :                         continue;
    4494             : 
    4495           0 :                 spin_lock(&ring->sched.job_list_lock);
    4496           0 :                 job = list_first_entry_or_null(&ring->sched.pending_list,
    4497             :                                                struct drm_sched_job, list);
    4498           0 :                 spin_unlock(&ring->sched.job_list_lock);
    4499           0 :                 if (job)
    4500             :                         return true;
    4501             :         }
    4502             :         return false;
    4503             : }
    4504             : 
    4505             : /**
    4506             :  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
    4507             :  *
    4508             :  * @adev: amdgpu_device pointer
    4509             :  *
    4510             :  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
    4511             :  * a hung GPU.
    4512             :  */
    4513           0 : bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
    4514             : {
    4515             : 
    4516           0 :         if (amdgpu_gpu_recovery == 0)
    4517             :                 goto disabled;
    4518             : 
    4519           0 :         if (!amdgpu_device_ip_check_soft_reset(adev)) {
    4520           0 :                 dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
    4521           0 :                 return false;
    4522             :         }
    4523             : 
    4524           0 :         if (amdgpu_sriov_vf(adev))
    4525             :                 return true;
    4526             : 
    4527           0 :         if (amdgpu_gpu_recovery == -1) {
    4528           0 :                 switch (adev->asic_type) {
    4529             : #ifdef CONFIG_DRM_AMDGPU_SI
    4530             :                 case CHIP_VERDE:
    4531             :                 case CHIP_TAHITI:
    4532             :                 case CHIP_PITCAIRN:
    4533             :                 case CHIP_OLAND:
    4534             :                 case CHIP_HAINAN:
    4535             : #endif
    4536             : #ifdef CONFIG_DRM_AMDGPU_CIK
    4537             :                 case CHIP_KAVERI:
    4538             :                 case CHIP_KABINI:
    4539             :                 case CHIP_MULLINS:
    4540             : #endif
    4541             :                 case CHIP_CARRIZO:
    4542             :                 case CHIP_STONEY:
    4543             :                 case CHIP_CYAN_SKILLFISH:
    4544             :                         goto disabled;
    4545             :                 default:
    4546             :                         break;
    4547             :                 }
    4548             :         }
    4549             : 
    4550             :         return true;
    4551             : 
    4552             : disabled:
    4553           0 :                 dev_info(adev->dev, "GPU recovery disabled.\n");
    4554           0 :                 return false;
    4555             : }
    4556             : 
    4557           0 : int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
    4558             : {
    4559             :         u32 i;
    4560           0 :         int ret = 0;
    4561             : 
    4562           0 :         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
    4563             : 
    4564           0 :         dev_info(adev->dev, "GPU mode1 reset\n");
    4565             : 
    4566             :         /* disable BM */
    4567           0 :         pci_clear_master(adev->pdev);
    4568             : 
    4569           0 :         amdgpu_device_cache_pci_state(adev->pdev);
    4570             : 
    4571           0 :         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
    4572           0 :                 dev_info(adev->dev, "GPU smu mode1 reset\n");
    4573           0 :                 ret = amdgpu_dpm_mode1_reset(adev);
    4574             :         } else {
    4575           0 :                 dev_info(adev->dev, "GPU psp mode1 reset\n");
    4576           0 :                 ret = psp_gpu_reset(adev);
    4577             :         }
    4578             : 
    4579           0 :         if (ret)
    4580           0 :                 dev_err(adev->dev, "GPU mode1 reset failed\n");
    4581             : 
    4582           0 :         amdgpu_device_load_pci_state(adev->pdev);
    4583             : 
    4584             :         /* wait for asic to come out of reset */
    4585           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    4586           0 :                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
    4587             : 
    4588           0 :                 if (memsize != 0xffffffff)
    4589             :                         break;
    4590           0 :                 udelay(1);
    4591             :         }
    4592             : 
    4593           0 :         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
    4594           0 :         return ret;
    4595             : }
    4596             : 
    4597           0 : int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
    4598             :                                  struct amdgpu_reset_context *reset_context)
    4599             : {
    4600           0 :         int i, r = 0;
    4601           0 :         struct amdgpu_job *job = NULL;
    4602           0 :         bool need_full_reset =
    4603           0 :                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
    4604             : 
    4605           0 :         if (reset_context->reset_req_dev == adev)
    4606           0 :                 job = reset_context->job;
    4607             : 
    4608           0 :         if (amdgpu_sriov_vf(adev)) {
    4609             :                 /* stop the data exchange thread */
    4610           0 :                 amdgpu_virt_fini_data_exchange(adev);
    4611             :         }
    4612             : 
    4613           0 :         amdgpu_fence_driver_isr_toggle(adev, true);
    4614             : 
    4615             :         /* block all schedulers and reset given job's ring */
    4616           0 :         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    4617           0 :                 struct amdgpu_ring *ring = adev->rings[i];
    4618             : 
    4619           0 :                 if (!ring || !ring->sched.thread)
    4620           0 :                         continue;
    4621             : 
    4622             :                 /*clear job fence from fence drv to avoid force_completion
    4623             :                  *leave NULL and vm flush fence in fence drv */
    4624           0 :                 amdgpu_fence_driver_clear_job_fences(ring);
    4625             : 
    4626             :                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
    4627           0 :                 amdgpu_fence_driver_force_completion(ring);
    4628             :         }
    4629             : 
    4630           0 :         amdgpu_fence_driver_isr_toggle(adev, false);
    4631             : 
    4632           0 :         if (job && job->vm)
    4633           0 :                 drm_sched_increase_karma(&job->base);
    4634             : 
    4635           0 :         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
    4636             :         /* If reset handler not implemented, continue; otherwise return */
    4637           0 :         if (r == -ENOSYS)
    4638           0 :                 r = 0;
    4639             :         else
    4640             :                 return r;
    4641             : 
    4642             :         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
    4643           0 :         if (!amdgpu_sriov_vf(adev)) {
    4644             : 
    4645           0 :                 if (!need_full_reset)
    4646           0 :                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
    4647             : 
    4648           0 :                 if (!need_full_reset && amdgpu_gpu_recovery) {
    4649           0 :                         amdgpu_device_ip_pre_soft_reset(adev);
    4650           0 :                         r = amdgpu_device_ip_soft_reset(adev);
    4651           0 :                         amdgpu_device_ip_post_soft_reset(adev);
    4652           0 :                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
    4653           0 :                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
    4654           0 :                                 need_full_reset = true;
    4655             :                         }
    4656             :                 }
    4657             : 
    4658           0 :                 if (need_full_reset)
    4659           0 :                         r = amdgpu_device_ip_suspend(adev);
    4660           0 :                 if (need_full_reset)
    4661           0 :                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
    4662             :                 else
    4663             :                         clear_bit(AMDGPU_NEED_FULL_RESET,
    4664           0 :                                   &reset_context->flags);
    4665             :         }
    4666             : 
    4667             :         return r;
    4668             : }
    4669             : 
    4670           0 : static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
    4671             : {
    4672             :         int i;
    4673             : 
    4674             :         lockdep_assert_held(&adev->reset_domain->sem);
    4675             : 
    4676           0 :         for (i = 0; i < adev->num_regs; i++) {
    4677           0 :                 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
    4678           0 :                 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
    4679           0 :                                              adev->reset_dump_reg_value[i]);
    4680             :         }
    4681             : 
    4682           0 :         return 0;
    4683             : }
    4684             : 
    4685             : #ifdef CONFIG_DEV_COREDUMP
    4686             : static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
    4687             :                 size_t count, void *data, size_t datalen)
    4688             : {
    4689             :         struct drm_printer p;
    4690             :         struct amdgpu_device *adev = data;
    4691             :         struct drm_print_iterator iter;
    4692             :         int i;
    4693             : 
    4694             :         iter.data = buffer;
    4695             :         iter.offset = 0;
    4696             :         iter.start = offset;
    4697             :         iter.remain = count;
    4698             : 
    4699             :         p = drm_coredump_printer(&iter);
    4700             : 
    4701             :         drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
    4702             :         drm_printf(&p, "kernel: " UTS_RELEASE "\n");
    4703             :         drm_printf(&p, "module: " KBUILD_MODNAME "\n");
    4704             :         drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
    4705             :         if (adev->reset_task_info.pid)
    4706             :                 drm_printf(&p, "process_name: %s PID: %d\n",
    4707             :                            adev->reset_task_info.process_name,
    4708             :                            adev->reset_task_info.pid);
    4709             : 
    4710             :         if (adev->reset_vram_lost)
    4711             :                 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
    4712             :         if (adev->num_regs) {
    4713             :                 drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
    4714             : 
    4715             :                 for (i = 0; i < adev->num_regs; i++)
    4716             :                         drm_printf(&p, "0x%08x: 0x%08x\n",
    4717             :                                    adev->reset_dump_reg_list[i],
    4718             :                                    adev->reset_dump_reg_value[i]);
    4719             :         }
    4720             : 
    4721             :         return count - iter.remain;
    4722             : }
    4723             : 
    4724             : static void amdgpu_devcoredump_free(void *data)
    4725             : {
    4726             : }
    4727             : 
    4728             : static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
    4729             : {
    4730             :         struct drm_device *dev = adev_to_drm(adev);
    4731             : 
    4732             :         ktime_get_ts64(&adev->reset_time);
    4733             :         dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
    4734             :                       amdgpu_devcoredump_read, amdgpu_devcoredump_free);
    4735             : }
    4736             : #endif
    4737             : 
    4738           0 : int amdgpu_do_asic_reset(struct list_head *device_list_handle,
    4739             :                          struct amdgpu_reset_context *reset_context)
    4740             : {
    4741           0 :         struct amdgpu_device *tmp_adev = NULL;
    4742           0 :         bool need_full_reset, skip_hw_reset, vram_lost = false;
    4743           0 :         int r = 0;
    4744             : 
    4745             :         /* Try reset handler method first */
    4746           0 :         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
    4747             :                                     reset_list);
    4748           0 :         amdgpu_reset_reg_dumps(tmp_adev);
    4749             : 
    4750           0 :         reset_context->reset_device_list = device_list_handle;
    4751           0 :         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
    4752             :         /* If reset handler not implemented, continue; otherwise return */
    4753           0 :         if (r == -ENOSYS)
    4754           0 :                 r = 0;
    4755             :         else
    4756             :                 return r;
    4757             : 
    4758             :         /* Reset handler not implemented, use the default method */
    4759           0 :         need_full_reset =
    4760           0 :                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
    4761           0 :         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
    4762             : 
    4763             :         /*
    4764             :          * ASIC reset has to be done on all XGMI hive nodes ASAP
    4765             :          * to allow proper links negotiation in FW (within 1 sec)
    4766             :          */
    4767           0 :         if (!skip_hw_reset && need_full_reset) {
    4768           0 :                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    4769             :                         /* For XGMI run all resets in parallel to speed up the process */
    4770           0 :                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
    4771           0 :                                 tmp_adev->gmc.xgmi.pending_reset = false;
    4772           0 :                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
    4773           0 :                                         r = -EALREADY;
    4774             :                         } else
    4775           0 :                                 r = amdgpu_asic_reset(tmp_adev);
    4776             : 
    4777           0 :                         if (r) {
    4778           0 :                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
    4779             :                                          r, adev_to_drm(tmp_adev)->unique);
    4780           0 :                                 break;
    4781             :                         }
    4782             :                 }
    4783             : 
    4784             :                 /* For XGMI wait for all resets to complete before proceed */
    4785           0 :                 if (!r) {
    4786           0 :                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    4787           0 :                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
    4788           0 :                                         flush_work(&tmp_adev->xgmi_reset_work);
    4789           0 :                                         r = tmp_adev->asic_reset_res;
    4790           0 :                                         if (r)
    4791             :                                                 break;
    4792             :                                 }
    4793             :                         }
    4794             :                 }
    4795             :         }
    4796             : 
    4797           0 :         if (!r && amdgpu_ras_intr_triggered()) {
    4798           0 :                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    4799           0 :                         if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
    4800           0 :                             tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
    4801           0 :                                 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
    4802             :                 }
    4803             : 
    4804             :                 amdgpu_ras_intr_cleared();
    4805             :         }
    4806             : 
    4807           0 :         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    4808           0 :                 if (need_full_reset) {
    4809             :                         /* post card */
    4810           0 :                         r = amdgpu_device_asic_init(tmp_adev);
    4811           0 :                         if (r) {
    4812           0 :                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
    4813             :                         } else {
    4814           0 :                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
    4815           0 :                                 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
    4816           0 :                                 if (r)
    4817             :                                         goto out;
    4818             : 
    4819           0 :                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
    4820           0 :                                 if (r)
    4821             :                                         goto out;
    4822             : 
    4823           0 :                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
    4824             : #ifdef CONFIG_DEV_COREDUMP
    4825             :                                 tmp_adev->reset_vram_lost = vram_lost;
    4826             :                                 memset(&tmp_adev->reset_task_info, 0,
    4827             :                                                 sizeof(tmp_adev->reset_task_info));
    4828             :                                 if (reset_context->job && reset_context->job->vm)
    4829             :                                         tmp_adev->reset_task_info =
    4830             :                                                 reset_context->job->vm->task_info;
    4831             :                                 amdgpu_reset_capture_coredumpm(tmp_adev);
    4832             : #endif
    4833           0 :                                 if (vram_lost) {
    4834           0 :                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
    4835           0 :                                         amdgpu_inc_vram_lost(tmp_adev);
    4836             :                                 }
    4837             : 
    4838           0 :                                 r = amdgpu_device_fw_loading(tmp_adev);
    4839           0 :                                 if (r)
    4840             :                                         return r;
    4841             : 
    4842           0 :                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
    4843           0 :                                 if (r)
    4844             :                                         goto out;
    4845             : 
    4846           0 :                                 if (vram_lost)
    4847             :                                         amdgpu_device_fill_reset_magic(tmp_adev);
    4848             : 
    4849             :                                 /*
    4850             :                                  * Add this ASIC as tracked as reset was already
    4851             :                                  * complete successfully.
    4852             :                                  */
    4853           0 :                                 amdgpu_register_gpu_instance(tmp_adev);
    4854             : 
    4855           0 :                                 if (!reset_context->hive &&
    4856           0 :                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
    4857           0 :                                         amdgpu_xgmi_add_device(tmp_adev);
    4858             : 
    4859           0 :                                 r = amdgpu_device_ip_late_init(tmp_adev);
    4860           0 :                                 if (r)
    4861             :                                         goto out;
    4862             : 
    4863           0 :                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
    4864             : 
    4865             :                                 /*
    4866             :                                  * The GPU enters bad state once faulty pages
    4867             :                                  * by ECC has reached the threshold, and ras
    4868             :                                  * recovery is scheduled next. So add one check
    4869             :                                  * here to break recovery if it indeed exceeds
    4870             :                                  * bad page threshold, and remind user to
    4871             :                                  * retire this GPU or setting one bigger
    4872             :                                  * bad_page_threshold value to fix this once
    4873             :                                  * probing driver again.
    4874             :                                  */
    4875           0 :                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
    4876             :                                         /* must succeed. */
    4877           0 :                                         amdgpu_ras_resume(tmp_adev);
    4878             :                                 } else {
    4879             :                                         r = -EINVAL;
    4880             :                                         goto out;
    4881             :                                 }
    4882             : 
    4883             :                                 /* Update PSP FW topology after reset */
    4884           0 :                                 if (reset_context->hive &&
    4885           0 :                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
    4886           0 :                                         r = amdgpu_xgmi_update_topology(
    4887             :                                                 reset_context->hive, tmp_adev);
    4888             :                         }
    4889             :                 }
    4890             : 
    4891             : out:
    4892           0 :                 if (!r) {
    4893           0 :                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
    4894           0 :                         r = amdgpu_ib_ring_tests(tmp_adev);
    4895           0 :                         if (r) {
    4896           0 :                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
    4897           0 :                                 need_full_reset = true;
    4898           0 :                                 r = -EAGAIN;
    4899           0 :                                 goto end;
    4900             :                         }
    4901             :                 }
    4902             : 
    4903           0 :                 if (!r)
    4904           0 :                         r = amdgpu_device_recover_vram(tmp_adev);
    4905             :                 else
    4906           0 :                         tmp_adev->asic_reset_res = r;
    4907             :         }
    4908             : 
    4909             : end:
    4910           0 :         if (need_full_reset)
    4911           0 :                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
    4912             :         else
    4913           0 :                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
    4914             :         return r;
    4915             : }
    4916             : 
    4917             : static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
    4918             : {
    4919             : 
    4920           0 :         switch (amdgpu_asic_reset_method(adev)) {
    4921             :         case AMD_RESET_METHOD_MODE1:
    4922           0 :                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
    4923             :                 break;
    4924             :         case AMD_RESET_METHOD_MODE2:
    4925           0 :                 adev->mp1_state = PP_MP1_STATE_RESET;
    4926             :                 break;
    4927             :         default:
    4928           0 :                 adev->mp1_state = PP_MP1_STATE_NONE;
    4929             :                 break;
    4930             :         }
    4931             : }
    4932             : 
    4933             : static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
    4934             : {
    4935           0 :         amdgpu_vf_error_trans_all(adev);
    4936           0 :         adev->mp1_state = PP_MP1_STATE_NONE;
    4937             : }
    4938             : 
    4939           0 : static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
    4940             : {
    4941           0 :         struct pci_dev *p = NULL;
    4942             : 
    4943           0 :         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
    4944           0 :                         adev->pdev->bus->number, 1);
    4945           0 :         if (p) {
    4946           0 :                 pm_runtime_enable(&(p->dev));
    4947           0 :                 pm_runtime_resume(&(p->dev));
    4948             :         }
    4949           0 : }
    4950             : 
    4951           0 : static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
    4952             : {
    4953             :         enum amd_reset_method reset_method;
    4954           0 :         struct pci_dev *p = NULL;
    4955             :         u64 expires;
    4956             : 
    4957             :         /*
    4958             :          * For now, only BACO and mode1 reset are confirmed
    4959             :          * to suffer the audio issue without proper suspended.
    4960             :          */
    4961           0 :         reset_method = amdgpu_asic_reset_method(adev);
    4962           0 :         if ((reset_method != AMD_RESET_METHOD_BACO) &&
    4963           0 :              (reset_method != AMD_RESET_METHOD_MODE1))
    4964             :                 return -EINVAL;
    4965             : 
    4966           0 :         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
    4967           0 :                         adev->pdev->bus->number, 1);
    4968           0 :         if (!p)
    4969             :                 return -ENODEV;
    4970             : 
    4971           0 :         expires = pm_runtime_autosuspend_expiration(&(p->dev));
    4972           0 :         if (!expires)
    4973             :                 /*
    4974             :                  * If we cannot get the audio device autosuspend delay,
    4975             :                  * a fixed 4S interval will be used. Considering 3S is
    4976             :                  * the audio controller default autosuspend delay setting.
    4977             :                  * 4S used here is guaranteed to cover that.
    4978             :                  */
    4979           0 :                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
    4980             : 
    4981           0 :         while (!pm_runtime_status_suspended(&(p->dev))) {
    4982           0 :                 if (!pm_runtime_suspend(&(p->dev)))
    4983             :                         break;
    4984             : 
    4985           0 :                 if (expires < ktime_get_mono_fast_ns()) {
    4986           0 :                         dev_warn(adev->dev, "failed to suspend display audio\n");
    4987             :                         /* TODO: abort the succeeding gpu reset? */
    4988           0 :                         return -ETIMEDOUT;
    4989             :                 }
    4990             :         }
    4991             : 
    4992           0 :         pm_runtime_disable(&(p->dev));
    4993             : 
    4994           0 :         return 0;
    4995             : }
    4996             : 
    4997           0 : static void amdgpu_device_recheck_guilty_jobs(
    4998             :         struct amdgpu_device *adev, struct list_head *device_list_handle,
    4999             :         struct amdgpu_reset_context *reset_context)
    5000             : {
    5001           0 :         int i, r = 0;
    5002             : 
    5003           0 :         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    5004           0 :                 struct amdgpu_ring *ring = adev->rings[i];
    5005           0 :                 int ret = 0;
    5006             :                 struct drm_sched_job *s_job;
    5007             : 
    5008           0 :                 if (!ring || !ring->sched.thread)
    5009           0 :                         continue;
    5010             : 
    5011           0 :                 s_job = list_first_entry_or_null(&ring->sched.pending_list,
    5012             :                                 struct drm_sched_job, list);
    5013           0 :                 if (s_job == NULL)
    5014           0 :                         continue;
    5015             : 
    5016             :                 /* clear job's guilty and depend the folowing step to decide the real one */
    5017           0 :                 drm_sched_reset_karma(s_job);
    5018           0 :                 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
    5019             : 
    5020           0 :                 if (!s_job->s_fence->parent) {
    5021           0 :                         DRM_WARN("Failed to get a HW fence for job!");
    5022           0 :                         continue;
    5023             :                 }
    5024             : 
    5025           0 :                 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
    5026           0 :                 if (ret == 0) { /* timeout */
    5027           0 :                         DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
    5028             :                                                 ring->sched.name, s_job->id);
    5029             : 
    5030             : 
    5031           0 :                         amdgpu_fence_driver_isr_toggle(adev, true);
    5032             : 
    5033             :                         /* Clear this failed job from fence array */
    5034           0 :                         amdgpu_fence_driver_clear_job_fences(ring);
    5035             : 
    5036           0 :                         amdgpu_fence_driver_isr_toggle(adev, false);
    5037             : 
    5038             :                         /* Since the job won't signal and we go for
    5039             :                          * another resubmit drop this parent pointer
    5040             :                          */
    5041           0 :                         dma_fence_put(s_job->s_fence->parent);
    5042           0 :                         s_job->s_fence->parent = NULL;
    5043             : 
    5044             :                         /* set guilty */
    5045           0 :                         drm_sched_increase_karma(s_job);
    5046           0 :                         amdgpu_reset_prepare_hwcontext(adev, reset_context);
    5047             : retry:
    5048             :                         /* do hw reset */
    5049           0 :                         if (amdgpu_sriov_vf(adev)) {
    5050           0 :                                 amdgpu_virt_fini_data_exchange(adev);
    5051           0 :                                 r = amdgpu_device_reset_sriov(adev, false);
    5052           0 :                                 if (r)
    5053           0 :                                         adev->asic_reset_res = r;
    5054             :                         } else {
    5055           0 :                                 clear_bit(AMDGPU_SKIP_HW_RESET,
    5056           0 :                                           &reset_context->flags);
    5057           0 :                                 r = amdgpu_do_asic_reset(device_list_handle,
    5058             :                                                          reset_context);
    5059           0 :                                 if (r && r == -EAGAIN)
    5060             :                                         goto retry;
    5061             :                         }
    5062             : 
    5063             :                         /*
    5064             :                          * add reset counter so that the following
    5065             :                          * resubmitted job could flush vmid
    5066             :                          */
    5067           0 :                         atomic_inc(&adev->gpu_reset_counter);
    5068           0 :                         continue;
    5069             :                 }
    5070             : 
    5071             :                 /* got the hw fence, signal finished fence */
    5072           0 :                 atomic_dec(ring->sched.score);
    5073           0 :                 dma_fence_get(&s_job->s_fence->finished);
    5074           0 :                 dma_fence_signal(&s_job->s_fence->finished);
    5075           0 :                 dma_fence_put(&s_job->s_fence->finished);
    5076             : 
    5077             :                 /* remove node from list and free the job */
    5078           0 :                 spin_lock(&ring->sched.job_list_lock);
    5079           0 :                 list_del_init(&s_job->list);
    5080           0 :                 spin_unlock(&ring->sched.job_list_lock);
    5081           0 :                 ring->sched.ops->free_job(s_job);
    5082             :         }
    5083           0 : }
    5084             : 
    5085           0 : static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
    5086             : {
    5087           0 :         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
    5088             : 
    5089             : #if defined(CONFIG_DEBUG_FS)
    5090             :         if (!amdgpu_sriov_vf(adev))
    5091             :                 cancel_work(&adev->reset_work);
    5092             : #endif
    5093             : 
    5094           0 :         if (adev->kfd.dev)
    5095           0 :                 cancel_work(&adev->kfd.reset_work);
    5096             : 
    5097           0 :         if (amdgpu_sriov_vf(adev))
    5098           0 :                 cancel_work(&adev->virt.flr_work);
    5099             : 
    5100           0 :         if (con && adev->ras_enabled)
    5101           0 :                 cancel_work(&con->recovery_work);
    5102             : 
    5103           0 : }
    5104             : 
    5105             : 
    5106             : /**
    5107             :  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
    5108             :  *
    5109             :  * @adev: amdgpu_device pointer
    5110             :  * @job: which job trigger hang
    5111             :  *
    5112             :  * Attempt to reset the GPU if it has hung (all asics).
    5113             :  * Attempt to do soft-reset or full-reset and reinitialize Asic
    5114             :  * Returns 0 for success or an error on failure.
    5115             :  */
    5116             : 
    5117           0 : int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
    5118             :                               struct amdgpu_job *job,
    5119             :                               struct amdgpu_reset_context *reset_context)
    5120             : {
    5121           0 :         struct list_head device_list, *device_list_handle =  NULL;
    5122           0 :         bool job_signaled = false;
    5123           0 :         struct amdgpu_hive_info *hive = NULL;
    5124           0 :         struct amdgpu_device *tmp_adev = NULL;
    5125           0 :         int i, r = 0;
    5126           0 :         bool need_emergency_restart = false;
    5127           0 :         bool audio_suspended = false;
    5128             :         int tmp_vram_lost_counter;
    5129             : 
    5130             :         /*
    5131             :          * Special case: RAS triggered and full reset isn't supported
    5132             :          */
    5133           0 :         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
    5134             : 
    5135             :         /*
    5136             :          * Flush RAM to disk so that after reboot
    5137             :          * the user can read log and see why the system rebooted.
    5138             :          */
    5139           0 :         if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
    5140           0 :                 DRM_WARN("Emergency reboot.");
    5141             : 
    5142           0 :                 ksys_sync_helper();
    5143           0 :                 emergency_restart();
    5144             :         }
    5145             : 
    5146           0 :         dev_info(adev->dev, "GPU %s begin!\n",
    5147             :                 need_emergency_restart ? "jobs stop":"reset");
    5148             : 
    5149           0 :         if (!amdgpu_sriov_vf(adev))
    5150           0 :                 hive = amdgpu_get_xgmi_hive(adev);
    5151           0 :         if (hive)
    5152           0 :                 mutex_lock(&hive->hive_lock);
    5153             : 
    5154           0 :         reset_context->job = job;
    5155           0 :         reset_context->hive = hive;
    5156             : 
    5157             :         /*
    5158             :          * Build list of devices to reset.
    5159             :          * In case we are in XGMI hive mode, resort the device list
    5160             :          * to put adev in the 1st position.
    5161             :          */
    5162           0 :         INIT_LIST_HEAD(&device_list);
    5163           0 :         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
    5164           0 :                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
    5165           0 :                         list_add_tail(&tmp_adev->reset_list, &device_list);
    5166           0 :                 if (!list_is_first(&adev->reset_list, &device_list))
    5167           0 :                         list_rotate_to_front(&adev->reset_list, &device_list);
    5168             :                 device_list_handle = &device_list;
    5169             :         } else {
    5170           0 :                 list_add_tail(&adev->reset_list, &device_list);
    5171           0 :                 device_list_handle = &device_list;
    5172             :         }
    5173             : 
    5174             :         /* We need to lock reset domain only once both for XGMI and single device */
    5175           0 :         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
    5176             :                                     reset_list);
    5177           0 :         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
    5178             : 
    5179             :         /* block all schedulers and reset given job's ring */
    5180           0 :         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    5181             : 
    5182           0 :                 amdgpu_device_set_mp1_state(tmp_adev);
    5183             : 
    5184             :                 /*
    5185             :                  * Try to put the audio codec into suspend state
    5186             :                  * before gpu reset started.
    5187             :                  *
    5188             :                  * Due to the power domain of the graphics device
    5189             :                  * is shared with AZ power domain. Without this,
    5190             :                  * we may change the audio hardware from behind
    5191             :                  * the audio driver's back. That will trigger
    5192             :                  * some audio codec errors.
    5193             :                  */
    5194           0 :                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
    5195           0 :                         audio_suspended = true;
    5196             : 
    5197           0 :                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
    5198             : 
    5199           0 :                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
    5200             : 
    5201           0 :                 if (!amdgpu_sriov_vf(tmp_adev))
    5202           0 :                         amdgpu_amdkfd_pre_reset(tmp_adev);
    5203             : 
    5204             :                 /*
    5205             :                  * Mark these ASICs to be reseted as untracked first
    5206             :                  * And add them back after reset completed
    5207             :                  */
    5208           0 :                 amdgpu_unregister_gpu_instance(tmp_adev);
    5209             : 
    5210           0 :                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
    5211             : 
    5212             :                 /* disable ras on ALL IPs */
    5213           0 :                 if (!need_emergency_restart &&
    5214           0 :                       amdgpu_device_ip_need_full_reset(tmp_adev))
    5215           0 :                         amdgpu_ras_suspend(tmp_adev);
    5216             : 
    5217           0 :                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    5218           0 :                         struct amdgpu_ring *ring = tmp_adev->rings[i];
    5219             : 
    5220           0 :                         if (!ring || !ring->sched.thread)
    5221           0 :                                 continue;
    5222             : 
    5223           0 :                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
    5224             : 
    5225           0 :                         if (need_emergency_restart)
    5226           0 :                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
    5227             :                 }
    5228           0 :                 atomic_inc(&tmp_adev->gpu_reset_counter);
    5229             :         }
    5230             : 
    5231           0 :         if (need_emergency_restart)
    5232             :                 goto skip_sched_resume;
    5233             : 
    5234             :         /*
    5235             :          * Must check guilty signal here since after this point all old
    5236             :          * HW fences are force signaled.
    5237             :          *
    5238             :          * job->base holds a reference to parent fence
    5239             :          */
    5240           0 :         if (job && dma_fence_is_signaled(&job->hw_fence)) {
    5241           0 :                 job_signaled = true;
    5242           0 :                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
    5243           0 :                 goto skip_hw_reset;
    5244             :         }
    5245             : 
    5246             : retry:  /* Rest of adevs pre asic reset from XGMI hive. */
    5247           0 :         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    5248           0 :                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
    5249             :                 /*TODO Should we stop ?*/
    5250           0 :                 if (r) {
    5251           0 :                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
    5252             :                                   r, adev_to_drm(tmp_adev)->unique);
    5253           0 :                         tmp_adev->asic_reset_res = r;
    5254             :                 }
    5255             : 
    5256             :                 /*
    5257             :                  * Drop all pending non scheduler resets. Scheduler resets
    5258             :                  * were already dropped during drm_sched_stop
    5259             :                  */
    5260           0 :                 amdgpu_device_stop_pending_resets(tmp_adev);
    5261             :         }
    5262             : 
    5263           0 :         tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
    5264             :         /* Actual ASIC resets if needed.*/
    5265             :         /* Host driver will handle XGMI hive reset for SRIOV */
    5266           0 :         if (amdgpu_sriov_vf(adev)) {
    5267           0 :                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
    5268           0 :                 if (r)
    5269           0 :                         adev->asic_reset_res = r;
    5270             : 
    5271             :                 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
    5272           0 :                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
    5273           0 :                         amdgpu_ras_resume(adev);
    5274             :         } else {
    5275           0 :                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
    5276           0 :                 if (r && r == -EAGAIN) {
    5277           0 :                         set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags);
    5278           0 :                         adev->asic_reset_res = 0;
    5279           0 :                         goto retry;
    5280             :                 }
    5281             :         }
    5282             : 
    5283             : skip_hw_reset:
    5284             : 
    5285             :         /* Post ASIC reset for all devs .*/
    5286           0 :         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    5287             : 
    5288             :                 /*
    5289             :                  * Sometimes a later bad compute job can block a good gfx job as gfx
    5290             :                  * and compute ring share internal GC HW mutually. We add an additional
    5291             :                  * guilty jobs recheck step to find the real guilty job, it synchronously
    5292             :                  * submits and pends for the first job being signaled. If it gets timeout,
    5293             :                  * we identify it as a real guilty job.
    5294             :                  */
    5295           0 :                 if (amdgpu_gpu_recovery == 2 &&
    5296           0 :                         !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
    5297           0 :                         amdgpu_device_recheck_guilty_jobs(
    5298             :                                 tmp_adev, device_list_handle, reset_context);
    5299             : 
    5300           0 :                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    5301           0 :                         struct amdgpu_ring *ring = tmp_adev->rings[i];
    5302             : 
    5303           0 :                         if (!ring || !ring->sched.thread)
    5304           0 :                                 continue;
    5305             : 
    5306             :                         /* No point to resubmit jobs if we didn't HW reset*/
    5307           0 :                         if (!tmp_adev->asic_reset_res && !job_signaled)
    5308           0 :                                 drm_sched_resubmit_jobs(&ring->sched);
    5309             : 
    5310           0 :                         drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
    5311             :                 }
    5312             : 
    5313           0 :                 if (adev->enable_mes)
    5314           0 :                         amdgpu_mes_self_test(tmp_adev);
    5315             : 
    5316           0 :                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
    5317           0 :                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
    5318             :                 }
    5319             : 
    5320           0 :                 if (tmp_adev->asic_reset_res)
    5321           0 :                         r = tmp_adev->asic_reset_res;
    5322             : 
    5323           0 :                 tmp_adev->asic_reset_res = 0;
    5324             : 
    5325           0 :                 if (r) {
    5326             :                         /* bad news, how to tell it to userspace ? */
    5327           0 :                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
    5328           0 :                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
    5329             :                 } else {
    5330           0 :                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
    5331           0 :                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
    5332             :                                 DRM_WARN("smart shift update failed\n");
    5333             :                 }
    5334             :         }
    5335             : 
    5336             : skip_sched_resume:
    5337           0 :         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
    5338             :                 /* unlock kfd: SRIOV would do it separately */
    5339           0 :                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
    5340           0 :                         amdgpu_amdkfd_post_reset(tmp_adev);
    5341             : 
    5342             :                 /* kfd_post_reset will do nothing if kfd device is not initialized,
    5343             :                  * need to bring up kfd here if it's not be initialized before
    5344             :                  */
    5345           0 :                 if (!adev->kfd.init_complete)
    5346           0 :                         amdgpu_amdkfd_device_init(adev);
    5347             : 
    5348           0 :                 if (audio_suspended)
    5349           0 :                         amdgpu_device_resume_display_audio(tmp_adev);
    5350             : 
    5351           0 :                 amdgpu_device_unset_mp1_state(tmp_adev);
    5352             :         }
    5353             : 
    5354           0 :         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
    5355             :                                             reset_list);
    5356           0 :         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
    5357             : 
    5358           0 :         if (hive) {
    5359           0 :                 mutex_unlock(&hive->hive_lock);
    5360           0 :                 amdgpu_put_xgmi_hive(hive);
    5361             :         }
    5362             : 
    5363           0 :         if (r)
    5364           0 :                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
    5365             : 
    5366           0 :         atomic_set(&adev->reset_domain->reset_res, r);
    5367           0 :         return r;
    5368             : }
    5369             : 
    5370             : /**
    5371             :  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
    5372             :  *
    5373             :  * @adev: amdgpu_device pointer
    5374             :  *
    5375             :  * Fetchs and stores in the driver the PCIE capabilities (gen speed
    5376             :  * and lanes) of the slot the device is in. Handles APUs and
    5377             :  * virtualized environments where PCIE config space may not be available.
    5378             :  */
    5379           0 : static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
    5380             : {
    5381             :         struct pci_dev *pdev;
    5382             :         enum pci_bus_speed speed_cap, platform_speed_cap;
    5383             :         enum pcie_link_width platform_link_width;
    5384             : 
    5385           0 :         if (amdgpu_pcie_gen_cap)
    5386           0 :                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
    5387             : 
    5388           0 :         if (amdgpu_pcie_lane_cap)
    5389           0 :                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
    5390             : 
    5391             :         /* covers APUs as well */
    5392           0 :         if (pci_is_root_bus(adev->pdev->bus)) {
    5393           0 :                 if (adev->pm.pcie_gen_mask == 0)
    5394           0 :                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
    5395           0 :                 if (adev->pm.pcie_mlw_mask == 0)
    5396           0 :                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
    5397           0 :                 return;
    5398             :         }
    5399             : 
    5400           0 :         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
    5401             :                 return;
    5402             : 
    5403           0 :         pcie_bandwidth_available(adev->pdev, NULL,
    5404             :                                  &platform_speed_cap, &platform_link_width);
    5405             : 
    5406           0 :         if (adev->pm.pcie_gen_mask == 0) {
    5407             :                 /* asic caps */
    5408           0 :                 pdev = adev->pdev;
    5409           0 :                 speed_cap = pcie_get_speed_cap(pdev);
    5410           0 :                 if (speed_cap == PCI_SPEED_UNKNOWN) {
    5411           0 :                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5412             :                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5413             :                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
    5414             :                 } else {
    5415           0 :                         if (speed_cap == PCIE_SPEED_32_0GT)
    5416           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5417             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5418             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
    5419             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
    5420             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
    5421           0 :                         else if (speed_cap == PCIE_SPEED_16_0GT)
    5422           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5423             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5424             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
    5425             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
    5426           0 :                         else if (speed_cap == PCIE_SPEED_8_0GT)
    5427           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5428             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5429             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
    5430           0 :                         else if (speed_cap == PCIE_SPEED_5_0GT)
    5431           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5432             :                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
    5433             :                         else
    5434           0 :                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
    5435             :                 }
    5436             :                 /* platform caps */
    5437           0 :                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
    5438           0 :                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5439             :                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
    5440             :                 } else {
    5441           0 :                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
    5442           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5443             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5444             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
    5445             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
    5446             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
    5447           0 :                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
    5448           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5449             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5450             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
    5451             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
    5452           0 :                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
    5453           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5454             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
    5455             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
    5456           0 :                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
    5457           0 :                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
    5458             :                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
    5459             :                         else
    5460           0 :                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
    5461             : 
    5462             :                 }
    5463             :         }
    5464           0 :         if (adev->pm.pcie_mlw_mask == 0) {
    5465           0 :                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
    5466           0 :                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
    5467             :                 } else {
    5468           0 :                         switch (platform_link_width) {
    5469             :                         case PCIE_LNK_X32:
    5470           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
    5471             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
    5472             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
    5473             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
    5474             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
    5475             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5476             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5477           0 :                                 break;
    5478             :                         case PCIE_LNK_X16:
    5479           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
    5480             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
    5481             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
    5482             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
    5483             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5484             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5485           0 :                                 break;
    5486             :                         case PCIE_LNK_X12:
    5487           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
    5488             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
    5489             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
    5490             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5491             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5492           0 :                                 break;
    5493             :                         case PCIE_LNK_X8:
    5494           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
    5495             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
    5496             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5497             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5498           0 :                                 break;
    5499             :                         case PCIE_LNK_X4:
    5500           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
    5501             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5502             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5503           0 :                                 break;
    5504             :                         case PCIE_LNK_X2:
    5505           0 :                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
    5506             :                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
    5507           0 :                                 break;
    5508             :                         case PCIE_LNK_X1:
    5509           0 :                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
    5510           0 :                                 break;
    5511             :                         default:
    5512             :                                 break;
    5513             :                         }
    5514             :                 }
    5515             :         }
    5516             : }
    5517             : 
    5518             : /**
    5519             :  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
    5520             :  *
    5521             :  * @adev: amdgpu_device pointer
    5522             :  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
    5523             :  *
    5524             :  * Return true if @peer_adev can access (DMA) @adev through the PCIe
    5525             :  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
    5526             :  * @peer_adev.
    5527             :  */
    5528           0 : bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
    5529             :                                       struct amdgpu_device *peer_adev)
    5530             : {
    5531             : #ifdef CONFIG_HSA_AMD_P2P
    5532             :         uint64_t address_mask = peer_adev->dev->dma_mask ?
    5533             :                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
    5534             :         resource_size_t aper_limit =
    5535             :                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
    5536             :         bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
    5537             :                           !(pci_p2pdma_distance_many(adev->pdev,
    5538             :                                         &peer_adev->dev, 1, true) < 0);
    5539             : 
    5540             :         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
    5541             :                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
    5542             :                 !(adev->gmc.aper_base & address_mask ||
    5543             :                   aper_limit & address_mask));
    5544             : #else
    5545           0 :         return false;
    5546             : #endif
    5547             : }
    5548             : 
    5549           0 : int amdgpu_device_baco_enter(struct drm_device *dev)
    5550             : {
    5551           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5552           0 :         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
    5553             : 
    5554           0 :         if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
    5555             :                 return -ENOTSUPP;
    5556             : 
    5557           0 :         if (ras && adev->ras_enabled &&
    5558           0 :             adev->nbio.funcs->enable_doorbell_interrupt)
    5559           0 :                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
    5560             : 
    5561           0 :         return amdgpu_dpm_baco_enter(adev);
    5562             : }
    5563             : 
    5564           0 : int amdgpu_device_baco_exit(struct drm_device *dev)
    5565             : {
    5566           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5567           0 :         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
    5568           0 :         int ret = 0;
    5569             : 
    5570           0 :         if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
    5571             :                 return -ENOTSUPP;
    5572             : 
    5573           0 :         ret = amdgpu_dpm_baco_exit(adev);
    5574           0 :         if (ret)
    5575             :                 return ret;
    5576             : 
    5577           0 :         if (ras && adev->ras_enabled &&
    5578           0 :             adev->nbio.funcs->enable_doorbell_interrupt)
    5579           0 :                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
    5580             : 
    5581           0 :         if (amdgpu_passthrough(adev) &&
    5582           0 :             adev->nbio.funcs->clear_doorbell_interrupt)
    5583           0 :                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
    5584             : 
    5585             :         return 0;
    5586             : }
    5587             : 
    5588             : /**
    5589             :  * amdgpu_pci_error_detected - Called when a PCI error is detected.
    5590             :  * @pdev: PCI device struct
    5591             :  * @state: PCI channel state
    5592             :  *
    5593             :  * Description: Called when a PCI error is detected.
    5594             :  *
    5595             :  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
    5596             :  */
    5597           0 : pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
    5598             : {
    5599           0 :         struct drm_device *dev = pci_get_drvdata(pdev);
    5600           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5601             :         int i;
    5602             : 
    5603           0 :         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
    5604             : 
    5605           0 :         if (adev->gmc.xgmi.num_physical_nodes > 1) {
    5606           0 :                 DRM_WARN("No support for XGMI hive yet...");
    5607           0 :                 return PCI_ERS_RESULT_DISCONNECT;
    5608             :         }
    5609             : 
    5610           0 :         adev->pci_channel_state = state;
    5611             : 
    5612           0 :         switch (state) {
    5613             :         case pci_channel_io_normal:
    5614             :                 return PCI_ERS_RESULT_CAN_RECOVER;
    5615             :         /* Fatal error, prepare for slot reset */
    5616             :         case pci_channel_io_frozen:
    5617             :                 /*
    5618             :                  * Locking adev->reset_domain->sem will prevent any external access
    5619             :                  * to GPU during PCI error recovery
    5620             :                  */
    5621           0 :                 amdgpu_device_lock_reset_domain(adev->reset_domain);
    5622             :                 amdgpu_device_set_mp1_state(adev);
    5623             : 
    5624             :                 /*
    5625             :                  * Block any work scheduling as we do for regular GPU reset
    5626             :                  * for the duration of the recovery
    5627             :                  */
    5628           0 :                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    5629           0 :                         struct amdgpu_ring *ring = adev->rings[i];
    5630             : 
    5631           0 :                         if (!ring || !ring->sched.thread)
    5632           0 :                                 continue;
    5633             : 
    5634           0 :                         drm_sched_stop(&ring->sched, NULL);
    5635             :                 }
    5636           0 :                 atomic_inc(&adev->gpu_reset_counter);
    5637           0 :                 return PCI_ERS_RESULT_NEED_RESET;
    5638             :         case pci_channel_io_perm_failure:
    5639             :                 /* Permanent error, prepare for device removal */
    5640           0 :                 return PCI_ERS_RESULT_DISCONNECT;
    5641             :         }
    5642             : 
    5643           0 :         return PCI_ERS_RESULT_NEED_RESET;
    5644             : }
    5645             : 
    5646             : /**
    5647             :  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
    5648             :  * @pdev: pointer to PCI device
    5649             :  */
    5650           0 : pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
    5651             : {
    5652             : 
    5653           0 :         DRM_INFO("PCI error: mmio enabled callback!!\n");
    5654             : 
    5655             :         /* TODO - dump whatever for debugging purposes */
    5656             : 
    5657             :         /* This called only if amdgpu_pci_error_detected returns
    5658             :          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
    5659             :          * works, no need to reset slot.
    5660             :          */
    5661             : 
    5662           0 :         return PCI_ERS_RESULT_RECOVERED;
    5663             : }
    5664             : 
    5665             : /**
    5666             :  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
    5667             :  * @pdev: PCI device struct
    5668             :  *
    5669             :  * Description: This routine is called by the pci error recovery
    5670             :  * code after the PCI slot has been reset, just before we
    5671             :  * should resume normal operations.
    5672             :  */
    5673           0 : pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
    5674             : {
    5675           0 :         struct drm_device *dev = pci_get_drvdata(pdev);
    5676           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5677             :         int r, i;
    5678             :         struct amdgpu_reset_context reset_context;
    5679             :         u32 memsize;
    5680             :         struct list_head device_list;
    5681             : 
    5682           0 :         DRM_INFO("PCI error: slot reset callback!!\n");
    5683             : 
    5684           0 :         memset(&reset_context, 0, sizeof(reset_context));
    5685             : 
    5686           0 :         INIT_LIST_HEAD(&device_list);
    5687           0 :         list_add_tail(&adev->reset_list, &device_list);
    5688             : 
    5689             :         /* wait for asic to come out of reset */
    5690           0 :         msleep(500);
    5691             : 
    5692             :         /* Restore PCI confspace */
    5693           0 :         amdgpu_device_load_pci_state(pdev);
    5694             : 
    5695             :         /* confirm  ASIC came out of reset */
    5696           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    5697           0 :                 memsize = amdgpu_asic_get_config_memsize(adev);
    5698             : 
    5699           0 :                 if (memsize != 0xffffffff)
    5700             :                         break;
    5701           0 :                 udelay(1);
    5702             :         }
    5703           0 :         if (memsize == 0xffffffff) {
    5704             :                 r = -ETIME;
    5705             :                 goto out;
    5706             :         }
    5707             : 
    5708           0 :         reset_context.method = AMD_RESET_METHOD_NONE;
    5709           0 :         reset_context.reset_req_dev = adev;
    5710           0 :         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
    5711           0 :         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
    5712           0 :         set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags);
    5713             : 
    5714           0 :         adev->no_hw_access = true;
    5715           0 :         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
    5716           0 :         adev->no_hw_access = false;
    5717           0 :         if (r)
    5718             :                 goto out;
    5719             : 
    5720           0 :         r = amdgpu_do_asic_reset(&device_list, &reset_context);
    5721             : 
    5722             : out:
    5723           0 :         if (!r) {
    5724           0 :                 if (amdgpu_device_cache_pci_state(adev->pdev))
    5725           0 :                         pci_restore_state(adev->pdev);
    5726             : 
    5727           0 :                 DRM_INFO("PCIe error recovery succeeded\n");
    5728             :         } else {
    5729           0 :                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
    5730           0 :                 amdgpu_device_unset_mp1_state(adev);
    5731           0 :                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
    5732             :         }
    5733             : 
    5734           0 :         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
    5735             : }
    5736             : 
    5737             : /**
    5738             :  * amdgpu_pci_resume() - resume normal ops after PCI reset
    5739             :  * @pdev: pointer to PCI device
    5740             :  *
    5741             :  * Called when the error recovery driver tells us that its
    5742             :  * OK to resume normal operation.
    5743             :  */
    5744           0 : void amdgpu_pci_resume(struct pci_dev *pdev)
    5745             : {
    5746           0 :         struct drm_device *dev = pci_get_drvdata(pdev);
    5747           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5748             :         int i;
    5749             : 
    5750             : 
    5751           0 :         DRM_INFO("PCI error: resume callback!!\n");
    5752             : 
    5753             :         /* Only continue execution for the case of pci_channel_io_frozen */
    5754           0 :         if (adev->pci_channel_state != pci_channel_io_frozen)
    5755             :                 return;
    5756             : 
    5757           0 :         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
    5758           0 :                 struct amdgpu_ring *ring = adev->rings[i];
    5759             : 
    5760           0 :                 if (!ring || !ring->sched.thread)
    5761           0 :                         continue;
    5762             : 
    5763             : 
    5764           0 :                 drm_sched_resubmit_jobs(&ring->sched);
    5765           0 :                 drm_sched_start(&ring->sched, true);
    5766             :         }
    5767             : 
    5768           0 :         amdgpu_device_unset_mp1_state(adev);
    5769           0 :         amdgpu_device_unlock_reset_domain(adev->reset_domain);
    5770             : }
    5771             : 
    5772           0 : bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
    5773             : {
    5774           0 :         struct drm_device *dev = pci_get_drvdata(pdev);
    5775           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5776             :         int r;
    5777             : 
    5778           0 :         r = pci_save_state(pdev);
    5779           0 :         if (!r) {
    5780           0 :                 kfree(adev->pci_state);
    5781             : 
    5782           0 :                 adev->pci_state = pci_store_saved_state(pdev);
    5783             : 
    5784           0 :                 if (!adev->pci_state) {
    5785           0 :                         DRM_ERROR("Failed to store PCI saved state");
    5786           0 :                         return false;
    5787             :                 }
    5788             :         } else {
    5789           0 :                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
    5790           0 :                 return false;
    5791             :         }
    5792             : 
    5793             :         return true;
    5794             : }
    5795             : 
    5796           0 : bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
    5797             : {
    5798           0 :         struct drm_device *dev = pci_get_drvdata(pdev);
    5799           0 :         struct amdgpu_device *adev = drm_to_adev(dev);
    5800             :         int r;
    5801             : 
    5802           0 :         if (!adev->pci_state)
    5803             :                 return false;
    5804             : 
    5805           0 :         r = pci_load_saved_state(pdev, adev->pci_state);
    5806             : 
    5807           0 :         if (!r) {
    5808           0 :                 pci_restore_state(pdev);
    5809             :         } else {
    5810           0 :                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
    5811           0 :                 return false;
    5812             :         }
    5813             : 
    5814           0 :         return true;
    5815             : }
    5816             : 
    5817           0 : void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
    5818             :                 struct amdgpu_ring *ring)
    5819             : {
    5820             : #ifdef CONFIG_X86_64
    5821           0 :         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
    5822             :                 return;
    5823             : #endif
    5824           0 :         if (adev->gmc.xgmi.connected_to_cpu)
    5825             :                 return;
    5826             : 
    5827           0 :         if (ring && ring->funcs->emit_hdp_flush)
    5828           0 :                 amdgpu_ring_emit_hdp_flush(ring);
    5829             :         else
    5830           0 :                 amdgpu_asic_flush_hdp(adev, ring);
    5831             : }
    5832             : 
    5833           0 : void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
    5834             :                 struct amdgpu_ring *ring)
    5835             : {
    5836             : #ifdef CONFIG_X86_64
    5837           0 :         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
    5838             :                 return;
    5839             : #endif
    5840           0 :         if (adev->gmc.xgmi.connected_to_cpu)
    5841             :                 return;
    5842             : 
    5843           0 :         amdgpu_asic_invalidate_hdp(adev, ring);
    5844             : }
    5845             : 
    5846           0 : int amdgpu_in_reset(struct amdgpu_device *adev)
    5847             : {
    5848           0 :         return atomic_read(&adev->reset_domain->in_gpu_reset);
    5849             :         }
    5850             :         
    5851             : /**
    5852             :  * amdgpu_device_halt() - bring hardware to some kind of halt state
    5853             :  *
    5854             :  * @adev: amdgpu_device pointer
    5855             :  *
    5856             :  * Bring hardware to some kind of halt state so that no one can touch it
    5857             :  * any more. It will help to maintain error context when error occurred.
    5858             :  * Compare to a simple hang, the system will keep stable at least for SSH
    5859             :  * access. Then it should be trivial to inspect the hardware state and
    5860             :  * see what's going on. Implemented as following:
    5861             :  *
    5862             :  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
    5863             :  *    clears all CPU mappings to device, disallows remappings through page faults
    5864             :  * 2. amdgpu_irq_disable_all() disables all interrupts
    5865             :  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
    5866             :  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
    5867             :  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
    5868             :  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
    5869             :  *    flush any in flight DMA operations
    5870             :  */
    5871           0 : void amdgpu_device_halt(struct amdgpu_device *adev)
    5872             : {
    5873           0 :         struct pci_dev *pdev = adev->pdev;
    5874           0 :         struct drm_device *ddev = adev_to_drm(adev);
    5875             : 
    5876           0 :         drm_dev_unplug(ddev);
    5877             : 
    5878           0 :         amdgpu_irq_disable_all(adev);
    5879             : 
    5880           0 :         amdgpu_fence_driver_hw_fini(adev);
    5881             : 
    5882           0 :         adev->no_hw_access = true;
    5883             : 
    5884           0 :         amdgpu_device_unmap_mmio(adev);
    5885             : 
    5886           0 :         pci_disable_device(pdev);
    5887           0 :         pci_wait_for_pending_transaction(pdev);
    5888           0 : }
    5889             : 
    5890           0 : u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
    5891             :                                 u32 reg)
    5892             : {
    5893             :         unsigned long flags, address, data;
    5894             :         u32 r;
    5895             : 
    5896           0 :         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
    5897           0 :         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
    5898             : 
    5899           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    5900           0 :         WREG32(address, reg * 4);
    5901           0 :         (void)RREG32(address);
    5902           0 :         r = RREG32(data);
    5903           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    5904           0 :         return r;
    5905             : }
    5906             : 
    5907           0 : void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
    5908             :                                 u32 reg, u32 v)
    5909             : {
    5910             :         unsigned long flags, address, data;
    5911             : 
    5912           0 :         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
    5913           0 :         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
    5914             : 
    5915           0 :         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
    5916           0 :         WREG32(address, reg * 4);
    5917           0 :         (void)RREG32(address);
    5918           0 :         WREG32(data, v);
    5919           0 :         (void)RREG32(data);
    5920           0 :         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
    5921           0 : }

Generated by: LCOV version 1.14