LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - gfx_v11_0.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 3223 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 167 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2021 Advanced Micro Devices, Inc.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included in
      12             :  * all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      17             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      18             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      19             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      20             :  * OTHER DEALINGS IN THE SOFTWARE.
      21             :  *
      22             :  */
      23             : #include <linux/delay.h>
      24             : #include <linux/kernel.h>
      25             : #include <linux/firmware.h>
      26             : #include <linux/module.h>
      27             : #include <linux/pci.h>
      28             : #include "amdgpu.h"
      29             : #include "amdgpu_gfx.h"
      30             : #include "amdgpu_psp.h"
      31             : #include "amdgpu_smu.h"
      32             : #include "amdgpu_atomfirmware.h"
      33             : #include "imu_v11_0.h"
      34             : #include "soc21.h"
      35             : #include "nvd.h"
      36             : 
      37             : #include "gc/gc_11_0_0_offset.h"
      38             : #include "gc/gc_11_0_0_sh_mask.h"
      39             : #include "smuio/smuio_13_0_6_offset.h"
      40             : #include "smuio/smuio_13_0_6_sh_mask.h"
      41             : #include "navi10_enum.h"
      42             : #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
      43             : 
      44             : #include "soc15.h"
      45             : #include "soc15d.h"
      46             : #include "clearstate_gfx11.h"
      47             : #include "v11_structs.h"
      48             : #include "gfx_v11_0.h"
      49             : #include "nbio_v4_3.h"
      50             : #include "mes_v11_0.h"
      51             : 
      52             : #define GFX11_NUM_GFX_RINGS             1
      53             : #define GFX11_MEC_HPD_SIZE      2048
      54             : 
      55             : #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
      56             : #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1        0x1388
      57             : 
      58             : #define regCGTT_WD_CLK_CTRL             0x5086
      59             : #define regCGTT_WD_CLK_CTRL_BASE_IDX    1
      60             : #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1   0x4e7e
      61             : #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX  1
      62             : 
      63             : MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
      64             : MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
      65             : MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
      66             : MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
      67             : MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
      68             : MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
      69             : MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
      70             : MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
      71             : MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
      72             : MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
      73             : MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
      74             : MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
      75             : MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
      76             : MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
      77             : MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
      78             : MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
      79             : MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
      80             : 
      81             : static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
      82             : {
      83             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
      84             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
      85             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
      86             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
      87             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
      88             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
      89             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
      90             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
      91             :         SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
      92             : };
      93             : 
      94             : #define DEFAULT_SH_MEM_CONFIG \
      95             :         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
      96             :          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
      97             :          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
      98             : 
      99             : static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
     100             : static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
     101             : static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
     102             : static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
     103             : static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
     104             : static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
     105             : static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
     106             : static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
     107             :                                  struct amdgpu_cu_info *cu_info);
     108             : static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
     109             : static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
     110             :                                    u32 sh_num, u32 instance);
     111             : static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
     112             : 
     113             : static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
     114             : static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
     115             : static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
     116             :                                      uint32_t val);
     117             : static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
     118             : static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
     119             :                                            uint16_t pasid, uint32_t flush_type,
     120             :                                            bool all_hub, uint8_t dst_sel);
     121             : static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
     122             : static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
     123             : static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
     124             :                                       bool enable);
     125             : 
     126           0 : static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
     127             : {
     128           0 :         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
     129           0 :         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
     130             :                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
     131           0 :         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
     132           0 :         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
     133           0 :         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
     134           0 :         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
     135           0 :         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
     136           0 :         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
     137           0 : }
     138             : 
     139           0 : static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
     140             :                                  struct amdgpu_ring *ring)
     141             : {
     142           0 :         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
     143           0 :         uint64_t wptr_addr = ring->wptr_gpu_addr;
     144           0 :         uint32_t me = 0, eng_sel = 0;
     145             : 
     146           0 :         switch (ring->funcs->type) {
     147             :         case AMDGPU_RING_TYPE_COMPUTE:
     148             :                 me = 1;
     149             :                 eng_sel = 0;
     150             :                 break;
     151             :         case AMDGPU_RING_TYPE_GFX:
     152           0 :                 me = 0;
     153           0 :                 eng_sel = 4;
     154           0 :                 break;
     155             :         case AMDGPU_RING_TYPE_MES:
     156           0 :                 me = 2;
     157           0 :                 eng_sel = 5;
     158           0 :                 break;
     159             :         default:
     160           0 :                 WARN_ON(1);
     161             :         }
     162             : 
     163           0 :         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
     164             :         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
     165           0 :         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
     166             :                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
     167           0 :                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
     168           0 :                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
     169           0 :                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
     170           0 :                           PACKET3_MAP_QUEUES_ME((me)) |
     171           0 :                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
     172           0 :                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
     173           0 :                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
     174             :                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
     175           0 :         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
     176           0 :         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
     177           0 :         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
     178           0 :         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
     179           0 :         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
     180           0 : }
     181             : 
     182           0 : static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
     183             :                                    struct amdgpu_ring *ring,
     184             :                                    enum amdgpu_unmap_queues_action action,
     185             :                                    u64 gpu_addr, u64 seq)
     186             : {
     187           0 :         struct amdgpu_device *adev = kiq_ring->adev;
     188           0 :         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
     189             : 
     190           0 :         if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) {
     191           0 :                 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
     192           0 :                 return;
     193             :         }
     194             : 
     195           0 :         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
     196           0 :         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
     197             :                           PACKET3_UNMAP_QUEUES_ACTION(action) |
     198           0 :                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
     199           0 :                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
     200             :                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
     201           0 :         amdgpu_ring_write(kiq_ring,
     202           0 :                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
     203             : 
     204           0 :         if (action == PREEMPT_QUEUES_NO_UNMAP) {
     205           0 :                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
     206           0 :                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
     207           0 :                 amdgpu_ring_write(kiq_ring, seq);
     208             :         } else {
     209           0 :                 amdgpu_ring_write(kiq_ring, 0);
     210           0 :                 amdgpu_ring_write(kiq_ring, 0);
     211           0 :                 amdgpu_ring_write(kiq_ring, 0);
     212             :         }
     213             : }
     214             : 
     215           0 : static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
     216             :                                    struct amdgpu_ring *ring,
     217             :                                    u64 addr,
     218             :                                    u64 seq)
     219             : {
     220           0 :         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
     221             : 
     222           0 :         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
     223           0 :         amdgpu_ring_write(kiq_ring,
     224             :                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
     225             :                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
     226             :                           PACKET3_QUERY_STATUS_COMMAND(2));
     227           0 :         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
     228           0 :                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
     229           0 :                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
     230           0 :         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
     231           0 :         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
     232           0 :         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
     233           0 :         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
     234           0 : }
     235             : 
     236           0 : static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
     237             :                                 uint16_t pasid, uint32_t flush_type,
     238             :                                 bool all_hub)
     239             : {
     240           0 :         gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
     241           0 : }
     242             : 
     243             : static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
     244             :         .kiq_set_resources = gfx11_kiq_set_resources,
     245             :         .kiq_map_queues = gfx11_kiq_map_queues,
     246             :         .kiq_unmap_queues = gfx11_kiq_unmap_queues,
     247             :         .kiq_query_status = gfx11_kiq_query_status,
     248             :         .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
     249             :         .set_resources_size = 8,
     250             :         .map_queues_size = 7,
     251             :         .unmap_queues_size = 6,
     252             :         .query_status_size = 7,
     253             :         .invalidate_tlbs_size = 2,
     254             : };
     255             : 
     256             : static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
     257             : {
     258           0 :         adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs;
     259             : }
     260             : 
     261             : static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
     262             : {
     263           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
     264             :         case IP_VERSION(11, 0, 1):
     265           0 :                 soc15_program_register_sequence(adev,
     266             :                                                 golden_settings_gc_11_0_1,
     267             :                                                 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
     268             :                 break;
     269             :         default:
     270             :                 break;
     271             :         }
     272             : }
     273             : 
     274           0 : static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
     275             :                                        bool wc, uint32_t reg, uint32_t val)
     276             : {
     277           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
     278           0 :         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
     279           0 :                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
     280           0 :         amdgpu_ring_write(ring, reg);
     281           0 :         amdgpu_ring_write(ring, 0);
     282           0 :         amdgpu_ring_write(ring, val);
     283           0 : }
     284             : 
     285           0 : static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
     286             :                                   int mem_space, int opt, uint32_t addr0,
     287             :                                   uint32_t addr1, uint32_t ref, uint32_t mask,
     288             :                                   uint32_t inv)
     289             : {
     290           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
     291           0 :         amdgpu_ring_write(ring,
     292             :                           /* memory (1) or register (0) */
     293           0 :                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
     294           0 :                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
     295           0 :                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
     296           0 :                            WAIT_REG_MEM_ENGINE(eng_sel)));
     297             : 
     298           0 :         if (mem_space)
     299           0 :                 BUG_ON(addr0 & 0x3); /* Dword align */
     300           0 :         amdgpu_ring_write(ring, addr0);
     301           0 :         amdgpu_ring_write(ring, addr1);
     302           0 :         amdgpu_ring_write(ring, ref);
     303           0 :         amdgpu_ring_write(ring, mask);
     304           0 :         amdgpu_ring_write(ring, inv); /* poll interval */
     305           0 : }
     306             : 
     307           0 : static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
     308             : {
     309           0 :         struct amdgpu_device *adev = ring->adev;
     310           0 :         uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
     311           0 :         uint32_t tmp = 0;
     312             :         unsigned i;
     313             :         int r;
     314             : 
     315           0 :         WREG32(scratch, 0xCAFEDEAD);
     316           0 :         r = amdgpu_ring_alloc(ring, 5);
     317           0 :         if (r) {
     318           0 :                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
     319             :                           ring->idx, r);
     320           0 :                 return r;
     321             :         }
     322             : 
     323           0 :         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
     324           0 :                 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
     325             :         } else {
     326           0 :                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
     327           0 :                 amdgpu_ring_write(ring, scratch -
     328             :                                   PACKET3_SET_UCONFIG_REG_START);
     329           0 :                 amdgpu_ring_write(ring, 0xDEADBEEF);
     330             :         }
     331           0 :         amdgpu_ring_commit(ring);
     332             : 
     333           0 :         for (i = 0; i < adev->usec_timeout; i++) {
     334           0 :                 tmp = RREG32(scratch);
     335           0 :                 if (tmp == 0xDEADBEEF)
     336             :                         break;
     337           0 :                 if (amdgpu_emu_mode == 1)
     338           0 :                         msleep(1);
     339             :                 else
     340             :                         udelay(1);
     341             :         }
     342             : 
     343           0 :         if (i >= adev->usec_timeout)
     344           0 :                 r = -ETIMEDOUT;
     345             :         return r;
     346             : }
     347             : 
     348           0 : static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
     349             : {
     350           0 :         struct amdgpu_device *adev = ring->adev;
     351             :         struct amdgpu_ib ib;
     352           0 :         struct dma_fence *f = NULL;
     353             :         unsigned index;
     354             :         uint64_t gpu_addr;
     355             :         volatile uint32_t *cpu_ptr;
     356             :         long r;
     357             : 
     358             :         /* MES KIQ fw hasn't indirect buffer support for now */
     359           0 :         if (adev->enable_mes_kiq &&
     360           0 :             ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
     361             :                 return 0;
     362             : 
     363           0 :         memset(&ib, 0, sizeof(ib));
     364             : 
     365           0 :         if (ring->is_mes_queue) {
     366             :                 uint32_t padding, offset;
     367             : 
     368           0 :                 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
     369           0 :                 padding = amdgpu_mes_ctx_get_offs(ring,
     370             :                                                   AMDGPU_MES_CTX_PADDING_OFFS);
     371             : 
     372           0 :                 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
     373           0 :                 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
     374             : 
     375           0 :                 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
     376           0 :                 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
     377           0 :                 *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
     378             :         } else {
     379           0 :                 r = amdgpu_device_wb_get(adev, &index);
     380           0 :                 if (r)
     381             :                         return r;
     382             : 
     383           0 :                 gpu_addr = adev->wb.gpu_addr + (index * 4);
     384           0 :                 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
     385           0 :                 cpu_ptr = &adev->wb.wb[index];
     386             : 
     387           0 :                 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
     388           0 :                 if (r) {
     389           0 :                         DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
     390           0 :                         goto err1;
     391             :                 }
     392             :         }
     393             : 
     394           0 :         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
     395           0 :         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
     396           0 :         ib.ptr[2] = lower_32_bits(gpu_addr);
     397           0 :         ib.ptr[3] = upper_32_bits(gpu_addr);
     398           0 :         ib.ptr[4] = 0xDEADBEEF;
     399           0 :         ib.length_dw = 5;
     400             : 
     401           0 :         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
     402           0 :         if (r)
     403             :                 goto err2;
     404             : 
     405           0 :         r = dma_fence_wait_timeout(f, false, timeout);
     406           0 :         if (r == 0) {
     407             :                 r = -ETIMEDOUT;
     408             :                 goto err2;
     409           0 :         } else if (r < 0) {
     410             :                 goto err2;
     411             :         }
     412             : 
     413           0 :         if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
     414             :                 r = 0;
     415             :         else
     416           0 :                 r = -EINVAL;
     417             : err2:
     418           0 :         if (!ring->is_mes_queue)
     419           0 :                 amdgpu_ib_free(adev, &ib, NULL);
     420           0 :         dma_fence_put(f);
     421             : err1:
     422           0 :         if (!ring->is_mes_queue)
     423           0 :                 amdgpu_device_wb_free(adev, index);
     424           0 :         return r;
     425             : }
     426             : 
     427           0 : static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
     428             : {
     429           0 :         release_firmware(adev->gfx.pfp_fw);
     430           0 :         adev->gfx.pfp_fw = NULL;
     431           0 :         release_firmware(adev->gfx.me_fw);
     432           0 :         adev->gfx.me_fw = NULL;
     433           0 :         release_firmware(adev->gfx.rlc_fw);
     434           0 :         adev->gfx.rlc_fw = NULL;
     435           0 :         release_firmware(adev->gfx.mec_fw);
     436           0 :         adev->gfx.mec_fw = NULL;
     437             : 
     438           0 :         kfree(adev->gfx.rlc.register_list_format);
     439           0 : }
     440             : 
     441           0 : static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
     442             : {
     443             :         const struct rlc_firmware_header_v2_1 *rlc_hdr;
     444             : 
     445           0 :         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
     446           0 :         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
     447           0 :         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
     448           0 :         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
     449           0 :         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
     450           0 :         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
     451           0 :         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
     452           0 :         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
     453           0 :         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
     454           0 :         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
     455           0 :         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
     456           0 :         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
     457           0 :         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
     458           0 :         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
     459           0 :                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
     460           0 : }
     461             : 
     462             : static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
     463             : {
     464             :         const struct rlc_firmware_header_v2_2 *rlc_hdr;
     465             : 
     466           0 :         rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
     467           0 :         adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
     468           0 :         adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
     469           0 :         adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
     470           0 :         adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
     471             : }
     472             : 
     473             : static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev)
     474             : {
     475             :         const struct rlc_firmware_header_v2_3 *rlc_hdr;
     476             : 
     477           0 :         rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
     478           0 :         adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
     479           0 :         adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
     480           0 :         adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
     481           0 :         adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
     482             : }
     483             : 
     484           0 : static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
     485             : {
     486             :         char fw_name[40];
     487             :         char ucode_prefix[30];
     488             :         int err;
     489           0 :         struct amdgpu_firmware_info *info = NULL;
     490           0 :         const struct common_firmware_header *header = NULL;
     491             :         const struct gfx_firmware_header_v1_0 *cp_hdr;
     492             :         const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
     493             :         const struct rlc_firmware_header_v2_0 *rlc_hdr;
     494           0 :         unsigned int *tmp = NULL;
     495           0 :         unsigned int i = 0;
     496             :         uint16_t version_major;
     497             :         uint16_t version_minor;
     498             : 
     499           0 :         DRM_DEBUG("\n");
     500             : 
     501           0 :         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
     502             : 
     503           0 :         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
     504           0 :         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
     505           0 :         if (err)
     506             :                 goto out;
     507           0 :         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
     508           0 :         if (err)
     509             :                 goto out;
     510             :         /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
     511           0 :         adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
     512             :                                 (union amdgpu_firmware_header *)
     513           0 :                                 adev->gfx.pfp_fw->data, 2, 0);
     514           0 :         if (adev->gfx.rs64_enable) {
     515           0 :                 dev_info(adev->dev, "CP RS64 enable\n");
     516           0 :                 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
     517           0 :                 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
     518           0 :                 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
     519             :                 
     520             :         } else {
     521           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
     522           0 :                 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
     523           0 :                 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
     524             :         }
     525             : 
     526           0 :         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
     527           0 :         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
     528           0 :         if (err)
     529             :                 goto out;
     530           0 :         err = amdgpu_ucode_validate(adev->gfx.me_fw);
     531           0 :         if (err)
     532             :                 goto out;
     533           0 :         if (adev->gfx.rs64_enable) {
     534           0 :                 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
     535           0 :                 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
     536           0 :                 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
     537             :                 
     538             :         } else {
     539           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
     540           0 :                 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
     541           0 :                 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
     542             :         }
     543             : 
     544           0 :         if (!amdgpu_sriov_vf(adev)) {
     545           0 :                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
     546           0 :                 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
     547           0 :                 if (err)
     548             :                         goto out;
     549           0 :                 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
     550           0 :                 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
     551           0 :                 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
     552           0 :                 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
     553             : 
     554           0 :                 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
     555           0 :                 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
     556           0 :                 adev->gfx.rlc.save_and_restore_offset =
     557           0 :                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
     558           0 :                 adev->gfx.rlc.clear_state_descriptor_offset =
     559           0 :                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
     560           0 :                 adev->gfx.rlc.avail_scratch_ram_locations =
     561           0 :                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
     562           0 :                 adev->gfx.rlc.reg_restore_list_size =
     563           0 :                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
     564           0 :                 adev->gfx.rlc.reg_list_format_start =
     565           0 :                         le32_to_cpu(rlc_hdr->reg_list_format_start);
     566           0 :                 adev->gfx.rlc.reg_list_format_separate_start =
     567           0 :                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
     568           0 :                 adev->gfx.rlc.starting_offsets_start =
     569           0 :                         le32_to_cpu(rlc_hdr->starting_offsets_start);
     570           0 :                 adev->gfx.rlc.reg_list_format_size_bytes =
     571           0 :                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
     572           0 :                 adev->gfx.rlc.reg_list_size_bytes =
     573           0 :                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
     574           0 :                 adev->gfx.rlc.register_list_format =
     575           0 :                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
     576             :                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
     577           0 :                 if (!adev->gfx.rlc.register_list_format) {
     578             :                         err = -ENOMEM;
     579             :                         goto out;
     580             :                 }
     581             : 
     582           0 :                 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
     583           0 :                                                            le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
     584           0 :                 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
     585           0 :                         adev->gfx.rlc.register_list_format[i] =      le32_to_cpu(tmp[i]);
     586             : 
     587           0 :                 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
     588             : 
     589           0 :                 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
     590           0 :                                                            le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
     591           0 :                 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
     592           0 :                         adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
     593             : 
     594           0 :                 if (version_major == 2) {
     595           0 :                         if (version_minor >= 1)
     596           0 :                                 gfx_v11_0_init_rlc_ext_microcode(adev);
     597           0 :                         if (version_minor >= 2)
     598             :                                 gfx_v11_0_init_rlc_iram_dram_microcode(adev);
     599           0 :                         if (version_minor == 3)
     600             :                                 gfx_v11_0_init_rlcp_rlcv_microcode(adev);
     601             :                 }
     602             :         }
     603             : 
     604           0 :         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
     605           0 :         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
     606           0 :         if (err)
     607             :                 goto out;
     608           0 :         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
     609           0 :         if (err)
     610             :                 goto out;
     611           0 :         if (adev->gfx.rs64_enable) {
     612           0 :                 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
     613           0 :                 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
     614           0 :                 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
     615             :                 
     616             :         } else {
     617           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
     618           0 :                 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
     619           0 :                 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
     620             :         }
     621             : 
     622             :         /* only one MEC for gfx 11.0.0. */
     623           0 :         adev->gfx.mec2_fw = NULL;
     624             : 
     625           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
     626           0 :                 if (adev->gfx.rs64_enable) {
     627           0 :                         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data;
     628           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP];
     629           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP;
     630           0 :                         info->fw = adev->gfx.pfp_fw;
     631           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     632           0 :                         adev->firmware.fw_size +=
     633           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
     634             : 
     635           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK];
     636           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK;
     637           0 :                         info->fw = adev->gfx.pfp_fw;
     638           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     639           0 :                         adev->firmware.fw_size +=
     640           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     641             : 
     642           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK];
     643           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK;
     644           0 :                         info->fw = adev->gfx.pfp_fw;
     645           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     646           0 :                         adev->firmware.fw_size +=
     647           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     648             : 
     649           0 :                         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data;
     650           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME];
     651           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME;
     652           0 :                         info->fw = adev->gfx.me_fw;
     653           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     654           0 :                         adev->firmware.fw_size +=
     655           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
     656             : 
     657           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK];
     658           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK;
     659           0 :                         info->fw = adev->gfx.me_fw;
     660           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     661           0 :                         adev->firmware.fw_size +=
     662           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     663             : 
     664           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK];
     665           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK;
     666           0 :                         info->fw = adev->gfx.me_fw;
     667           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     668           0 :                         adev->firmware.fw_size +=
     669           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     670             : 
     671           0 :                         cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
     672           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC];
     673           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC;
     674           0 :                         info->fw = adev->gfx.mec_fw;
     675           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     676           0 :                         adev->firmware.fw_size +=
     677           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE);
     678             : 
     679           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK];
     680           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK;
     681           0 :                         info->fw = adev->gfx.mec_fw;
     682           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     683           0 :                         adev->firmware.fw_size +=
     684           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     685             : 
     686           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK];
     687           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK;
     688           0 :                         info->fw = adev->gfx.mec_fw;
     689           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     690           0 :                         adev->firmware.fw_size +=
     691           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     692             : 
     693           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK];
     694           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK;
     695           0 :                         info->fw = adev->gfx.mec_fw;
     696           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     697           0 :                         adev->firmware.fw_size +=
     698           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     699             : 
     700           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK];
     701           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK;
     702           0 :                         info->fw = adev->gfx.mec_fw;
     703           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     704           0 :                         adev->firmware.fw_size +=
     705           0 :                                 ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE);
     706             :                 } else {
     707           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
     708           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
     709           0 :                         info->fw = adev->gfx.pfp_fw;
     710           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     711           0 :                         adev->firmware.fw_size +=
     712           0 :                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
     713             : 
     714           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
     715           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
     716           0 :                         info->fw = adev->gfx.me_fw;
     717           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     718           0 :                         adev->firmware.fw_size +=
     719           0 :                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
     720             : 
     721           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
     722           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
     723           0 :                         info->fw = adev->gfx.mec_fw;
     724           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     725           0 :                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
     726           0 :                         adev->firmware.fw_size +=
     727           0 :                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) -
     728             :                                       le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
     729             : 
     730           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
     731           0 :                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
     732           0 :                         info->fw = adev->gfx.mec_fw;
     733           0 :                         adev->firmware.fw_size +=
     734           0 :                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
     735             :                 }
     736             : 
     737           0 :                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
     738           0 :                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
     739           0 :                 info->fw = adev->gfx.rlc_fw;
     740           0 :                 if (info->fw) {
     741           0 :                         header = (const struct common_firmware_header *)info->fw->data;
     742           0 :                         adev->firmware.fw_size +=
     743           0 :                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
     744             :                 }
     745           0 :                 if (adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
     746           0 :                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
     747           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
     748           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
     749           0 :                         info->fw = adev->gfx.rlc_fw;
     750           0 :                         adev->firmware.fw_size +=
     751           0 :                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
     752             : 
     753           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
     754           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
     755           0 :                         info->fw = adev->gfx.rlc_fw;
     756           0 :                         adev->firmware.fw_size +=
     757           0 :                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
     758             :                 }
     759             : 
     760           0 :                 if (adev->gfx.rlc.rlc_iram_ucode_size_bytes &&
     761           0 :                     adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
     762           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
     763           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
     764           0 :                         info->fw = adev->gfx.rlc_fw;
     765           0 :                         adev->firmware.fw_size +=
     766           0 :                                 ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
     767             : 
     768           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
     769           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
     770           0 :                         info->fw = adev->gfx.rlc_fw;
     771           0 :                         adev->firmware.fw_size +=
     772           0 :                                 ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
     773             :                 }
     774             : 
     775           0 :                 if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
     776           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
     777           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
     778           0 :                         info->fw = adev->gfx.rlc_fw;
     779           0 :                         adev->firmware.fw_size +=
     780           0 :                                 ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
     781             :                 }
     782             : 
     783           0 :                 if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
     784           0 :                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
     785           0 :                         info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
     786           0 :                         info->fw = adev->gfx.rlc_fw;
     787           0 :                         adev->firmware.fw_size +=
     788           0 :                                 ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
     789             :                 }
     790             :         }
     791             : 
     792             : out:
     793           0 :         if (err) {
     794           0 :                 dev_err(adev->dev,
     795             :                         "gfx11: Failed to load firmware \"%s\"\n",
     796             :                         fw_name);
     797           0 :                 release_firmware(adev->gfx.pfp_fw);
     798           0 :                 adev->gfx.pfp_fw = NULL;
     799           0 :                 release_firmware(adev->gfx.me_fw);
     800           0 :                 adev->gfx.me_fw = NULL;
     801           0 :                 release_firmware(adev->gfx.rlc_fw);
     802           0 :                 adev->gfx.rlc_fw = NULL;
     803           0 :                 release_firmware(adev->gfx.mec_fw);
     804           0 :                 adev->gfx.mec_fw = NULL;
     805             :         }
     806             : 
     807           0 :         return err;
     808             : }
     809             : 
     810           0 : static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev)
     811             : {
     812             :         const struct psp_firmware_header_v1_0 *toc_hdr;
     813           0 :         int err = 0;
     814             :         char fw_name[40];
     815             :         char ucode_prefix[30];
     816             : 
     817           0 :         amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
     818             : 
     819           0 :         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
     820           0 :         err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev);
     821           0 :         if (err)
     822             :                 goto out;
     823             : 
     824           0 :         err = amdgpu_ucode_validate(adev->psp.toc_fw);
     825           0 :         if (err)
     826             :                 goto out;
     827             : 
     828           0 :         toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
     829           0 :         adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
     830           0 :         adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
     831           0 :         adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
     832           0 :         adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
     833           0 :                                 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
     834           0 :         return 0;
     835             : out:
     836           0 :         dev_err(adev->dev, "Failed to load TOC microcode\n");
     837           0 :         release_firmware(adev->psp.toc_fw);
     838           0 :         adev->psp.toc_fw = NULL;
     839           0 :         return err;
     840             : }
     841             : 
     842           0 : static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
     843             : {
     844           0 :         u32 count = 0;
     845           0 :         const struct cs_section_def *sect = NULL;
     846           0 :         const struct cs_extent_def *ext = NULL;
     847             : 
     848             :         /* begin clear state */
     849           0 :         count += 2;
     850             :         /* context control state */
     851           0 :         count += 3;
     852             : 
     853           0 :         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
     854           0 :                 for (ext = sect->section; ext->extent != NULL; ++ext) {
     855           0 :                         if (sect->id == SECT_CONTEXT)
     856           0 :                                 count += 2 + ext->reg_count;
     857             :                         else
     858             :                                 return 0;
     859             :                 }
     860             :         }
     861             : 
     862             :         /* set PA_SC_TILE_STEERING_OVERRIDE */
     863           0 :         count += 3;
     864             :         /* end clear state */
     865           0 :         count += 2;
     866             :         /* clear state */
     867           0 :         count += 2;
     868             : 
     869           0 :         return count;
     870             : }
     871             : 
     872           0 : static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
     873             :                                     volatile u32 *buffer)
     874             : {
     875           0 :         u32 count = 0, i;
     876           0 :         const struct cs_section_def *sect = NULL;
     877           0 :         const struct cs_extent_def *ext = NULL;
     878             :         int ctx_reg_offset;
     879             : 
     880           0 :         if (adev->gfx.rlc.cs_data == NULL)
     881             :                 return;
     882           0 :         if (buffer == NULL)
     883             :                 return;
     884             : 
     885           0 :         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
     886           0 :         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
     887             : 
     888           0 :         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
     889           0 :         buffer[count++] = cpu_to_le32(0x80000000);
     890           0 :         buffer[count++] = cpu_to_le32(0x80000000);
     891             : 
     892           0 :         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
     893           0 :                 for (ext = sect->section; ext->extent != NULL; ++ext) {
     894           0 :                         if (sect->id == SECT_CONTEXT) {
     895           0 :                                 buffer[count++] =
     896           0 :                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
     897           0 :                                 buffer[count++] = cpu_to_le32(ext->reg_index -
     898             :                                                 PACKET3_SET_CONTEXT_REG_START);
     899           0 :                                 for (i = 0; i < ext->reg_count; i++)
     900           0 :                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
     901             :                         } else {
     902             :                                 return;
     903             :                         }
     904             :                 }
     905             :         }
     906             : 
     907           0 :         ctx_reg_offset =
     908           0 :                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
     909           0 :         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
     910           0 :         buffer[count++] = cpu_to_le32(ctx_reg_offset);
     911           0 :         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
     912             : 
     913           0 :         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
     914           0 :         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
     915             : 
     916           0 :         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
     917           0 :         buffer[count++] = cpu_to_le32(0);
     918             : }
     919             : 
     920           0 : static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
     921             : {
     922             :         /* clear state block */
     923           0 :         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
     924           0 :                         &adev->gfx.rlc.clear_state_gpu_addr,
     925           0 :                         (void **)&adev->gfx.rlc.cs_ptr);
     926             : 
     927             :         /* jump table block */
     928           0 :         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
     929           0 :                         &adev->gfx.rlc.cp_table_gpu_addr,
     930           0 :                         (void **)&adev->gfx.rlc.cp_table_ptr);
     931           0 : }
     932             : 
     933           0 : static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
     934             : {
     935             :         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
     936             : 
     937           0 :         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
     938           0 :         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
     939           0 :         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
     940           0 :         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
     941           0 :         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
     942           0 :         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
     943           0 :         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
     944           0 :         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
     945           0 :         adev->gfx.rlc.rlcg_reg_access_supported = true;
     946           0 : }
     947             : 
     948           0 : static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
     949             : {
     950             :         const struct cs_section_def *cs_data;
     951             :         int r;
     952             : 
     953           0 :         adev->gfx.rlc.cs_data = gfx11_cs_data;
     954             : 
     955           0 :         cs_data = adev->gfx.rlc.cs_data;
     956             : 
     957             :         if (cs_data) {
     958             :                 /* init clear state block */
     959           0 :                 r = amdgpu_gfx_rlc_init_csb(adev);
     960           0 :                 if (r)
     961             :                         return r;
     962             :         }
     963             : 
     964             :         /* init spm vmid with 0xf */
     965           0 :         if (adev->gfx.rlc.funcs->update_spm_vmid)
     966           0 :                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
     967             : 
     968             :         return 0;
     969             : }
     970             : 
     971           0 : static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
     972             : {
     973           0 :         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
     974           0 :         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
     975           0 :         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
     976           0 : }
     977             : 
     978           0 : static int gfx_v11_0_me_init(struct amdgpu_device *adev)
     979             : {
     980             :         int r;
     981             : 
     982           0 :         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
     983             : 
     984           0 :         amdgpu_gfx_graphics_queue_acquire(adev);
     985             : 
     986           0 :         r = gfx_v11_0_init_microcode(adev);
     987           0 :         if (r)
     988           0 :                 DRM_ERROR("Failed to load gfx firmware!\n");
     989             : 
     990           0 :         return r;
     991             : }
     992             : 
     993           0 : static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
     994             : {
     995             :         int r;
     996             :         u32 *hpd;
     997             :         size_t mec_hpd_size;
     998             : 
     999           0 :         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
    1000             : 
    1001             :         /* take ownership of the relevant compute queues */
    1002           0 :         amdgpu_gfx_compute_queue_acquire(adev);
    1003           0 :         mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
    1004             : 
    1005           0 :         if (mec_hpd_size) {
    1006           0 :                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
    1007             :                                               AMDGPU_GEM_DOMAIN_GTT,
    1008             :                                               &adev->gfx.mec.hpd_eop_obj,
    1009             :                                               &adev->gfx.mec.hpd_eop_gpu_addr,
    1010             :                                               (void **)&hpd);
    1011           0 :                 if (r) {
    1012           0 :                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
    1013           0 :                         gfx_v11_0_mec_fini(adev);
    1014           0 :                         return r;
    1015             :                 }
    1016             : 
    1017           0 :                 memset(hpd, 0, mec_hpd_size);
    1018             : 
    1019           0 :                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
    1020           0 :                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
    1021             :         }
    1022             : 
    1023             :         return 0;
    1024             : }
    1025             : 
    1026           0 : static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
    1027             : {
    1028           0 :         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
    1029             :                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
    1030             :                 (address << SQ_IND_INDEX__INDEX__SHIFT));
    1031           0 :         return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
    1032             : }
    1033             : 
    1034           0 : static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
    1035             :                            uint32_t thread, uint32_t regno,
    1036             :                            uint32_t num, uint32_t *out)
    1037             : {
    1038           0 :         WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
    1039             :                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
    1040             :                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
    1041             :                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
    1042             :                 (SQ_IND_INDEX__AUTO_INCR_MASK));
    1043           0 :         while (num--)
    1044           0 :                 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
    1045           0 : }
    1046             : 
    1047           0 : static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
    1048             : {
    1049             :         /* in gfx11 the SIMD_ID is specified as part of the INSTANCE
    1050             :          * field when performing a select_se_sh so it should be
    1051             :          * zero here */
    1052           0 :         WARN_ON(simd != 0);
    1053             : 
    1054             :         /* type 2 wave data */
    1055           0 :         dst[(*no_fields)++] = 2;
    1056           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
    1057           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
    1058           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
    1059           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
    1060           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
    1061           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
    1062           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
    1063           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
    1064           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
    1065           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
    1066           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
    1067           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
    1068           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
    1069           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
    1070           0 :         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
    1071           0 : }
    1072             : 
    1073           0 : static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
    1074             :                                      uint32_t wave, uint32_t start,
    1075             :                                      uint32_t size, uint32_t *dst)
    1076             : {
    1077           0 :         WARN_ON(simd != 0);
    1078             : 
    1079           0 :         wave_read_regs(
    1080             :                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
    1081             :                 dst);
    1082           0 : }
    1083             : 
    1084           0 : static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
    1085             :                                       uint32_t wave, uint32_t thread,
    1086             :                                       uint32_t start, uint32_t size,
    1087             :                                       uint32_t *dst)
    1088             : {
    1089           0 :         wave_read_regs(
    1090             :                 adev, wave, thread,
    1091             :                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
    1092           0 : }
    1093             : 
    1094           0 : static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
    1095             :                                                                           u32 me, u32 pipe, u32 q, u32 vm)
    1096             : {
    1097           0 :         soc21_grbm_select(adev, me, pipe, q, vm);
    1098           0 : }
    1099             : 
    1100             : static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
    1101             :         .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
    1102             :         .select_se_sh = &gfx_v11_0_select_se_sh,
    1103             :         .read_wave_data = &gfx_v11_0_read_wave_data,
    1104             :         .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
    1105             :         .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
    1106             :         .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
    1107             :         .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
    1108             : };
    1109             : 
    1110           0 : static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
    1111             : {
    1112           0 :         adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
    1113             : 
    1114           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
    1115             :         case IP_VERSION(11, 0, 0):
    1116             :         case IP_VERSION(11, 0, 2):
    1117             :         case IP_VERSION(11, 0, 3):
    1118           0 :                 adev->gfx.config.max_hw_contexts = 8;
    1119           0 :                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
    1120           0 :                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
    1121           0 :                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
    1122           0 :                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
    1123           0 :                 break;
    1124             :         case IP_VERSION(11, 0, 1):
    1125           0 :                 adev->gfx.config.max_hw_contexts = 8;
    1126           0 :                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
    1127           0 :                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
    1128           0 :                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
    1129           0 :                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
    1130           0 :                 break;
    1131             :         default:
    1132           0 :                 BUG();
    1133             :                 break;
    1134             :         }
    1135             : 
    1136           0 :         return 0;
    1137             : }
    1138             : 
    1139           0 : static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
    1140             :                                    int me, int pipe, int queue)
    1141             : {
    1142             :         int r;
    1143             :         struct amdgpu_ring *ring;
    1144             :         unsigned int irq_type;
    1145             : 
    1146           0 :         ring = &adev->gfx.gfx_ring[ring_id];
    1147             : 
    1148           0 :         ring->me = me;
    1149           0 :         ring->pipe = pipe;
    1150           0 :         ring->queue = queue;
    1151             : 
    1152           0 :         ring->ring_obj = NULL;
    1153           0 :         ring->use_doorbell = true;
    1154             : 
    1155           0 :         if (!ring_id)
    1156           0 :                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
    1157             :         else
    1158           0 :                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
    1159           0 :         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
    1160             : 
    1161           0 :         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
    1162           0 :         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
    1163             :                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    1164           0 :         if (r)
    1165             :                 return r;
    1166           0 :         return 0;
    1167             : }
    1168             : 
    1169           0 : static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
    1170             :                                        int mec, int pipe, int queue)
    1171             : {
    1172             :         int r;
    1173             :         unsigned irq_type;
    1174             :         struct amdgpu_ring *ring;
    1175             :         unsigned int hw_prio;
    1176             : 
    1177           0 :         ring = &adev->gfx.compute_ring[ring_id];
    1178             : 
    1179             :         /* mec0 is me1 */
    1180           0 :         ring->me = mec + 1;
    1181           0 :         ring->pipe = pipe;
    1182           0 :         ring->queue = queue;
    1183             : 
    1184           0 :         ring->ring_obj = NULL;
    1185           0 :         ring->use_doorbell = true;
    1186           0 :         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
    1187           0 :         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
    1188           0 :                                 + (ring_id * GFX11_MEC_HPD_SIZE);
    1189           0 :         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
    1190             : 
    1191           0 :         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
    1192           0 :                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
    1193           0 :                 + ring->pipe;
    1194           0 :         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
    1195           0 :                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
    1196             :         /* type-2 packets are deprecated on MEC, use type-3 instead */
    1197           0 :         r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
    1198             :                              hw_prio, NULL);
    1199           0 :         if (r)
    1200             :                 return r;
    1201             : 
    1202           0 :         return 0;
    1203             : }
    1204             : 
    1205             : static struct {
    1206             :         SOC21_FIRMWARE_ID       id;
    1207             :         unsigned int            offset;
    1208             :         unsigned int            size;
    1209             : } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
    1210             : 
    1211           0 : static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
    1212             : {
    1213           0 :         RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
    1214             : 
    1215           0 :         while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
    1216             :                         (ucode->id < SOC21_FIRMWARE_ID_MAX)) {
    1217           0 :                 rlc_autoload_info[ucode->id].id = ucode->id;
    1218           0 :                 rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
    1219           0 :                 rlc_autoload_info[ucode->id].size = ucode->size * 4;
    1220             : 
    1221           0 :                 ucode++;
    1222             :         }
    1223           0 : }
    1224             : 
    1225           0 : static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
    1226             : {
    1227           0 :         uint32_t total_size = 0;
    1228             :         SOC21_FIRMWARE_ID id;
    1229             : 
    1230           0 :         gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
    1231             : 
    1232           0 :         for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
    1233           0 :                 total_size += rlc_autoload_info[id].size;
    1234             : 
    1235             :         /* In case the offset in rlc toc ucode is aligned */
    1236           0 :         if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
    1237           0 :                 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
    1238           0 :                         rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
    1239             : 
    1240           0 :         return total_size;
    1241             : }
    1242             : 
    1243           0 : static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
    1244             : {
    1245             :         int r;
    1246             :         uint32_t total_size;
    1247             : 
    1248           0 :         total_size = gfx_v11_0_calc_toc_total_size(adev);
    1249             : 
    1250           0 :         r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
    1251             :                         AMDGPU_GEM_DOMAIN_VRAM,
    1252             :                         &adev->gfx.rlc.rlc_autoload_bo,
    1253             :                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
    1254             :                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
    1255             : 
    1256           0 :         if (r) {
    1257           0 :                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
    1258           0 :                 return r;
    1259             :         }
    1260             : 
    1261             :         return 0;
    1262             : }
    1263             : 
    1264           0 : static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
    1265             :                                               SOC21_FIRMWARE_ID id,
    1266             :                                               const void *fw_data,
    1267             :                                               uint32_t fw_size,
    1268             :                                               uint32_t *fw_autoload_mask)
    1269             : {
    1270             :         uint32_t toc_offset;
    1271             :         uint32_t toc_fw_size;
    1272           0 :         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
    1273             : 
    1274           0 :         if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
    1275             :                 return;
    1276             : 
    1277           0 :         toc_offset = rlc_autoload_info[id].offset;
    1278           0 :         toc_fw_size = rlc_autoload_info[id].size;
    1279             : 
    1280           0 :         if (fw_size == 0)
    1281           0 :                 fw_size = toc_fw_size;
    1282             : 
    1283           0 :         if (fw_size > toc_fw_size)
    1284           0 :                 fw_size = toc_fw_size;
    1285             : 
    1286           0 :         memcpy(ptr + toc_offset, fw_data, fw_size);
    1287             : 
    1288           0 :         if (fw_size < toc_fw_size)
    1289           0 :                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
    1290             : 
    1291           0 :         if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
    1292           0 :                 *(uint64_t *)fw_autoload_mask |= 1ULL << id;
    1293             : }
    1294             : 
    1295           0 : static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
    1296             :                                                         uint32_t *fw_autoload_mask)
    1297             : {
    1298             :         void *data;
    1299             :         uint32_t size;
    1300             :         uint64_t *toc_ptr;
    1301             : 
    1302           0 :         *(uint64_t *)fw_autoload_mask |= 0x1;
    1303             : 
    1304           0 :         DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
    1305             : 
    1306           0 :         data = adev->psp.toc.start_addr;
    1307           0 :         size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
    1308             : 
    1309           0 :         toc_ptr = (uint64_t *)data + size / 8 - 1;
    1310           0 :         *toc_ptr = *(uint64_t *)fw_autoload_mask;
    1311             : 
    1312           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
    1313             :                                         data, size, fw_autoload_mask);
    1314           0 : }
    1315             : 
    1316           0 : static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
    1317             :                                                         uint32_t *fw_autoload_mask)
    1318             : {
    1319             :         const __le32 *fw_data;
    1320             :         uint32_t fw_size;
    1321             :         const struct gfx_firmware_header_v1_0 *cp_hdr;
    1322             :         const struct gfx_firmware_header_v2_0 *cpv2_hdr;
    1323             :         const struct rlc_firmware_header_v2_0 *rlc_hdr;
    1324             :         const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
    1325             :         uint16_t version_major, version_minor;
    1326             : 
    1327           0 :         if (adev->gfx.rs64_enable) {
    1328             :                 /* pfp ucode */
    1329           0 :                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
    1330           0 :                         adev->gfx.pfp_fw->data;
    1331             :                 /* instruction */
    1332           0 :                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
    1333           0 :                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
    1334           0 :                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
    1335           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
    1336             :                                                 fw_data, fw_size, fw_autoload_mask);
    1337             :                 /* data */
    1338           0 :                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
    1339           0 :                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
    1340           0 :                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
    1341           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
    1342             :                                                 fw_data, fw_size, fw_autoload_mask);
    1343           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
    1344             :                                                 fw_data, fw_size, fw_autoload_mask);
    1345             :                 /* me ucode */
    1346           0 :                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
    1347           0 :                         adev->gfx.me_fw->data;
    1348             :                 /* instruction */
    1349           0 :                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
    1350           0 :                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
    1351           0 :                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
    1352           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
    1353             :                                                 fw_data, fw_size, fw_autoload_mask);
    1354             :                 /* data */
    1355           0 :                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
    1356           0 :                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
    1357           0 :                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
    1358           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
    1359             :                                                 fw_data, fw_size, fw_autoload_mask);
    1360           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
    1361             :                                                 fw_data, fw_size, fw_autoload_mask);
    1362             :                 /* mec ucode */
    1363           0 :                 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
    1364           0 :                         adev->gfx.mec_fw->data;
    1365             :                 /* instruction */
    1366           0 :                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
    1367           0 :                         le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
    1368           0 :                 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
    1369           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
    1370             :                                                 fw_data, fw_size, fw_autoload_mask);
    1371             :                 /* data */
    1372           0 :                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
    1373           0 :                         le32_to_cpu(cpv2_hdr->data_offset_bytes));
    1374           0 :                 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
    1375           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
    1376             :                                                 fw_data, fw_size, fw_autoload_mask);
    1377           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
    1378             :                                                 fw_data, fw_size, fw_autoload_mask);
    1379           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
    1380             :                                                 fw_data, fw_size, fw_autoload_mask);
    1381           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
    1382             :                                                 fw_data, fw_size, fw_autoload_mask);
    1383             :         } else {
    1384             :                 /* pfp ucode */
    1385           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
    1386           0 :                         adev->gfx.pfp_fw->data;
    1387           0 :                 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
    1388           0 :                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
    1389           0 :                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
    1390           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
    1391             :                                                 fw_data, fw_size, fw_autoload_mask);
    1392             : 
    1393             :                 /* me ucode */
    1394           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
    1395           0 :                         adev->gfx.me_fw->data;
    1396           0 :                 fw_data = (const __le32 *)(adev->gfx.me_fw->data +
    1397           0 :                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
    1398           0 :                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
    1399           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
    1400             :                                                 fw_data, fw_size, fw_autoload_mask);
    1401             : 
    1402             :                 /* mec ucode */
    1403           0 :                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
    1404           0 :                         adev->gfx.mec_fw->data;
    1405           0 :                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
    1406           0 :                                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
    1407           0 :                 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
    1408           0 :                         cp_hdr->jt_size * 4;
    1409           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
    1410             :                                                 fw_data, fw_size, fw_autoload_mask);
    1411             :         }
    1412             : 
    1413             :         /* rlc ucode */
    1414           0 :         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
    1415           0 :                 adev->gfx.rlc_fw->data;
    1416           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    1417           0 :                         le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
    1418           0 :         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
    1419           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
    1420             :                                         fw_data, fw_size, fw_autoload_mask);
    1421             : 
    1422           0 :         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
    1423           0 :         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
    1424           0 :         if (version_major == 2) {
    1425           0 :                 if (version_minor >= 2) {
    1426           0 :                         rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
    1427             : 
    1428           0 :                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    1429           0 :                                         le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
    1430           0 :                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
    1431           0 :                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
    1432             :                                         fw_data, fw_size, fw_autoload_mask);
    1433             : 
    1434           0 :                         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    1435           0 :                                         le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
    1436           0 :                         fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
    1437           0 :                         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
    1438             :                                         fw_data, fw_size, fw_autoload_mask);
    1439             :                 }
    1440             :         }
    1441           0 : }
    1442             : 
    1443           0 : static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
    1444             :                                                         uint32_t *fw_autoload_mask)
    1445             : {
    1446             :         const __le32 *fw_data;
    1447             :         uint32_t fw_size;
    1448             :         const struct sdma_firmware_header_v2_0 *sdma_hdr;
    1449             : 
    1450           0 :         sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
    1451           0 :                 adev->sdma.instance[0].fw->data;
    1452           0 :         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
    1453           0 :                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
    1454           0 :         fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
    1455             : 
    1456           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
    1457             :                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
    1458             : 
    1459           0 :         fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
    1460           0 :                         le32_to_cpu(sdma_hdr->ctl_ucode_offset));
    1461           0 :         fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
    1462             : 
    1463           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
    1464             :                         SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
    1465           0 : }
    1466             : 
    1467           0 : static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
    1468             :                                                         uint32_t *fw_autoload_mask)
    1469             : {
    1470             :         const __le32 *fw_data;
    1471             :         unsigned fw_size;
    1472             :         const struct mes_firmware_header_v1_0 *mes_hdr;
    1473             :         int pipe, ucode_id, data_id;
    1474             : 
    1475           0 :         for (pipe = 0; pipe < 2; pipe++) {
    1476           0 :                 if (pipe==0) {
    1477             :                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
    1478             :                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
    1479             :                 } else {
    1480           0 :                         ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
    1481           0 :                         data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
    1482             :                 }
    1483             : 
    1484           0 :                 mes_hdr = (const struct mes_firmware_header_v1_0 *)
    1485           0 :                         adev->mes.fw[pipe]->data;
    1486             : 
    1487           0 :                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
    1488           0 :                                 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
    1489           0 :                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
    1490             : 
    1491           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
    1492             :                                 ucode_id, fw_data, fw_size, fw_autoload_mask);
    1493             : 
    1494           0 :                 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
    1495           0 :                                 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
    1496           0 :                 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
    1497             : 
    1498           0 :                 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
    1499             :                                 data_id, fw_data, fw_size, fw_autoload_mask);
    1500             :         }
    1501           0 : }
    1502             : 
    1503           0 : static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
    1504             : {
    1505             :         uint32_t rlc_g_offset, rlc_g_size;
    1506             :         uint64_t gpu_addr;
    1507             :         uint32_t autoload_fw_id[2];
    1508             : 
    1509           0 :         memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
    1510             : 
    1511             :         /* RLC autoload sequence 2: copy ucode */
    1512           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
    1513           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
    1514           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
    1515           0 :         gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
    1516             : 
    1517           0 :         rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
    1518           0 :         rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
    1519           0 :         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
    1520             : 
    1521           0 :         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
    1522           0 :         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
    1523             : 
    1524           0 :         WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
    1525             : 
    1526             :         /* RLC autoload sequence 3: load IMU fw */
    1527           0 :         if (adev->gfx.imu.funcs->load_microcode)
    1528           0 :                 adev->gfx.imu.funcs->load_microcode(adev);
    1529             :         /* RLC autoload sequence 4 init IMU fw */
    1530           0 :         if (adev->gfx.imu.funcs->setup_imu)
    1531           0 :                 adev->gfx.imu.funcs->setup_imu(adev);
    1532           0 :         if (adev->gfx.imu.funcs->start_imu)
    1533           0 :                 adev->gfx.imu.funcs->start_imu(adev);
    1534             : 
    1535             :         /* RLC autoload sequence 5 disable gpa mode */
    1536           0 :         gfx_v11_0_disable_gpa_mode(adev);
    1537             : 
    1538           0 :         return 0;
    1539             : }
    1540             : 
    1541           0 : static int gfx_v11_0_sw_init(void *handle)
    1542             : {
    1543           0 :         int i, j, k, r, ring_id = 0;
    1544             :         struct amdgpu_kiq *kiq;
    1545           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    1546             : 
    1547           0 :         adev->gfxhub.funcs->init(adev);
    1548             : 
    1549           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
    1550             :         case IP_VERSION(11, 0, 0):
    1551             :         case IP_VERSION(11, 0, 1):
    1552             :         case IP_VERSION(11, 0, 2):
    1553             :         case IP_VERSION(11, 0, 3):
    1554           0 :                 adev->gfx.me.num_me = 1;
    1555           0 :                 adev->gfx.me.num_pipe_per_me = 1;
    1556           0 :                 adev->gfx.me.num_queue_per_pipe = 1;
    1557           0 :                 adev->gfx.mec.num_mec = 2;
    1558           0 :                 adev->gfx.mec.num_pipe_per_mec = 4;
    1559           0 :                 adev->gfx.mec.num_queue_per_pipe = 4;
    1560           0 :                 break;
    1561             :         default:
    1562           0 :                 adev->gfx.me.num_me = 1;
    1563           0 :                 adev->gfx.me.num_pipe_per_me = 1;
    1564           0 :                 adev->gfx.me.num_queue_per_pipe = 1;
    1565           0 :                 adev->gfx.mec.num_mec = 1;
    1566           0 :                 adev->gfx.mec.num_pipe_per_mec = 4;
    1567           0 :                 adev->gfx.mec.num_queue_per_pipe = 8;
    1568           0 :                 break;
    1569             :         }
    1570             : 
    1571             :         /* EOP Event */
    1572           0 :         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
    1573             :                               GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
    1574             :                               &adev->gfx.eop_irq);
    1575           0 :         if (r)
    1576             :                 return r;
    1577             : 
    1578             :         /* Privileged reg */
    1579           0 :         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
    1580             :                               GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
    1581             :                               &adev->gfx.priv_reg_irq);
    1582           0 :         if (r)
    1583             :                 return r;
    1584             : 
    1585             :         /* Privileged inst */
    1586           0 :         r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
    1587             :                               GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
    1588             :                               &adev->gfx.priv_inst_irq);
    1589           0 :         if (r)
    1590             :                 return r;
    1591             : 
    1592           0 :         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
    1593             : 
    1594           0 :         if (adev->gfx.imu.funcs) {
    1595           0 :                 if (adev->gfx.imu.funcs->init_microcode) {
    1596           0 :                         r = adev->gfx.imu.funcs->init_microcode(adev);
    1597           0 :                         if (r)
    1598           0 :                                 DRM_ERROR("Failed to load imu firmware!\n");
    1599             :                 }
    1600             :         }
    1601             : 
    1602           0 :         r = gfx_v11_0_me_init(adev);
    1603           0 :         if (r)
    1604             :                 return r;
    1605             : 
    1606           0 :         r = gfx_v11_0_rlc_init(adev);
    1607           0 :         if (r) {
    1608           0 :                 DRM_ERROR("Failed to init rlc BOs!\n");
    1609           0 :                 return r;
    1610             :         }
    1611             : 
    1612           0 :         r = gfx_v11_0_mec_init(adev);
    1613           0 :         if (r) {
    1614           0 :                 DRM_ERROR("Failed to init MEC BOs!\n");
    1615           0 :                 return r;
    1616             :         }
    1617             : 
    1618             :         /* set up the gfx ring */
    1619           0 :         for (i = 0; i < adev->gfx.me.num_me; i++) {
    1620           0 :                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
    1621           0 :                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
    1622           0 :                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
    1623           0 :                                         continue;
    1624             : 
    1625           0 :                                 r = gfx_v11_0_gfx_ring_init(adev, ring_id,
    1626             :                                                             i, k, j);
    1627           0 :                                 if (r)
    1628             :                                         return r;
    1629           0 :                                 ring_id++;
    1630             :                         }
    1631             :                 }
    1632             :         }
    1633             : 
    1634             :         ring_id = 0;
    1635             :         /* set up the compute queues - allocate horizontally across pipes */
    1636           0 :         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
    1637           0 :                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
    1638           0 :                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
    1639           0 :                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
    1640             :                                                                      j))
    1641           0 :                                         continue;
    1642             : 
    1643           0 :                                 r = gfx_v11_0_compute_ring_init(adev, ring_id,
    1644             :                                                                 i, k, j);
    1645           0 :                                 if (r)
    1646             :                                         return r;
    1647             : 
    1648           0 :                                 ring_id++;
    1649             :                         }
    1650             :                 }
    1651             :         }
    1652             : 
    1653           0 :         if (!adev->enable_mes_kiq) {
    1654           0 :                 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE);
    1655           0 :                 if (r) {
    1656           0 :                         DRM_ERROR("Failed to init KIQ BOs!\n");
    1657           0 :                         return r;
    1658             :                 }
    1659             : 
    1660           0 :                 kiq = &adev->gfx.kiq;
    1661           0 :                 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
    1662           0 :                 if (r)
    1663             :                         return r;
    1664             :         }
    1665             : 
    1666           0 :         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd));
    1667           0 :         if (r)
    1668             :                 return r;
    1669             : 
    1670             :         /* allocate visible FB for rlc auto-loading fw */
    1671           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
    1672           0 :                 r = gfx_v11_0_init_toc_microcode(adev);
    1673           0 :                 if (r)
    1674           0 :                         dev_err(adev->dev, "Failed to load toc firmware!\n");
    1675           0 :                 r = gfx_v11_0_rlc_autoload_buffer_init(adev);
    1676           0 :                 if (r)
    1677             :                         return r;
    1678             :         }
    1679             : 
    1680           0 :         r = gfx_v11_0_gpu_early_init(adev);
    1681           0 :         if (r)
    1682             :                 return r;
    1683             : 
    1684           0 :         return 0;
    1685             : }
    1686             : 
    1687           0 : static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
    1688             : {
    1689           0 :         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
    1690           0 :                               &adev->gfx.pfp.pfp_fw_gpu_addr,
    1691           0 :                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
    1692             : 
    1693           0 :         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
    1694           0 :                               &adev->gfx.pfp.pfp_fw_data_gpu_addr,
    1695           0 :                               (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
    1696           0 : }
    1697             : 
    1698           0 : static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
    1699             : {
    1700           0 :         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
    1701           0 :                               &adev->gfx.me.me_fw_gpu_addr,
    1702           0 :                               (void **)&adev->gfx.me.me_fw_ptr);
    1703             : 
    1704           0 :         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
    1705           0 :                                &adev->gfx.me.me_fw_data_gpu_addr,
    1706           0 :                                (void **)&adev->gfx.me.me_fw_data_ptr);
    1707           0 : }
    1708             : 
    1709             : static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
    1710             : {
    1711           0 :         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
    1712             :                         &adev->gfx.rlc.rlc_autoload_gpu_addr,
    1713             :                         (void **)&adev->gfx.rlc.rlc_autoload_ptr);
    1714             : }
    1715             : 
    1716           0 : static int gfx_v11_0_sw_fini(void *handle)
    1717             : {
    1718             :         int i;
    1719           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    1720             : 
    1721           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
    1722           0 :                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
    1723           0 :         for (i = 0; i < adev->gfx.num_compute_rings; i++)
    1724           0 :                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
    1725             : 
    1726           0 :         amdgpu_gfx_mqd_sw_fini(adev);
    1727             : 
    1728           0 :         if (!adev->enable_mes_kiq) {
    1729           0 :                 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
    1730           0 :                 amdgpu_gfx_kiq_fini(adev);
    1731             :         }
    1732             : 
    1733           0 :         gfx_v11_0_pfp_fini(adev);
    1734           0 :         gfx_v11_0_me_fini(adev);
    1735           0 :         gfx_v11_0_rlc_fini(adev);
    1736           0 :         gfx_v11_0_mec_fini(adev);
    1737             : 
    1738           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
    1739             :                 gfx_v11_0_rlc_autoload_buffer_fini(adev);
    1740             : 
    1741           0 :         gfx_v11_0_free_microcode(adev);
    1742             : 
    1743           0 :         return 0;
    1744             : }
    1745             : 
    1746           0 : static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
    1747             :                                    u32 sh_num, u32 instance)
    1748             : {
    1749             :         u32 data;
    1750             : 
    1751           0 :         if (instance == 0xffffffff)
    1752             :                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
    1753             :                                      INSTANCE_BROADCAST_WRITES, 1);
    1754             :         else
    1755           0 :                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
    1756             :                                      instance);
    1757             : 
    1758           0 :         if (se_num == 0xffffffff)
    1759           0 :                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
    1760             :                                      1);
    1761             :         else
    1762           0 :                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
    1763             : 
    1764           0 :         if (sh_num == 0xffffffff)
    1765           0 :                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
    1766             :                                      1);
    1767             :         else
    1768           0 :                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
    1769             : 
    1770           0 :         WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
    1771           0 : }
    1772             : 
    1773           0 : static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
    1774             : {
    1775             :         u32 data, mask;
    1776             : 
    1777           0 :         data = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
    1778           0 :         data |= RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
    1779             : 
    1780           0 :         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
    1781           0 :         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
    1782             : 
    1783           0 :         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
    1784           0 :                                          adev->gfx.config.max_sh_per_se);
    1785             : 
    1786           0 :         return (~data) & mask;
    1787             : }
    1788             : 
    1789           0 : static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
    1790             : {
    1791             :         int i, j;
    1792             :         u32 data;
    1793           0 :         u32 active_rbs = 0;
    1794           0 :         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
    1795           0 :                                         adev->gfx.config.max_sh_per_se;
    1796             : 
    1797           0 :         mutex_lock(&adev->grbm_idx_mutex);
    1798           0 :         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
    1799           0 :                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
    1800           0 :                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
    1801           0 :                         data = gfx_v11_0_get_rb_active_bitmap(adev);
    1802           0 :                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
    1803             :                                                rb_bitmap_width_per_sh);
    1804             :                 }
    1805             :         }
    1806           0 :         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
    1807           0 :         mutex_unlock(&adev->grbm_idx_mutex);
    1808             : 
    1809           0 :         adev->gfx.config.backend_enable_mask = active_rbs;
    1810           0 :         adev->gfx.config.num_rbs = hweight32(active_rbs);
    1811           0 : }
    1812             : 
    1813             : #define DEFAULT_SH_MEM_BASES    (0x6000)
    1814             : #define LDS_APP_BASE           0x1
    1815             : #define SCRATCH_APP_BASE       0x2
    1816             : 
    1817           0 : static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
    1818             : {
    1819             :         int i;
    1820             :         uint32_t sh_mem_bases;
    1821             :         uint32_t data;
    1822             : 
    1823             :         /*
    1824             :          * Configure apertures:
    1825             :          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
    1826             :          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
    1827             :          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
    1828             :          */
    1829           0 :         sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
    1830             :                         SCRATCH_APP_BASE;
    1831             : 
    1832           0 :         mutex_lock(&adev->srbm_mutex);
    1833           0 :         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
    1834           0 :                 soc21_grbm_select(adev, 0, 0, 0, i);
    1835             :                 /* CP and shaders */
    1836           0 :                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
    1837           0 :                 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
    1838             : 
    1839             :                 /* Enable trap for each kfd vmid. */
    1840           0 :                 data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL));
    1841           0 :                 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
    1842             :         }
    1843           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    1844           0 :         mutex_unlock(&adev->srbm_mutex);
    1845             : 
    1846             :         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
    1847             :            acccess. These should be enabled by FW for target VMIDs. */
    1848           0 :         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
    1849           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
    1850           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
    1851           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
    1852           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
    1853             :         }
    1854           0 : }
    1855             : 
    1856           0 : static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
    1857             : {
    1858             :         int vmid;
    1859             : 
    1860             :         /*
    1861             :          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
    1862             :          * access. Compute VMIDs should be enabled by FW for target VMIDs,
    1863             :          * the driver can enable them for graphics. VMID0 should maintain
    1864             :          * access so that HWS firmware can save/restore entries.
    1865             :          */
    1866           0 :         for (vmid = 1; vmid < 16; vmid++) {
    1867           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
    1868           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
    1869           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
    1870           0 :                 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
    1871             :         }
    1872           0 : }
    1873             : 
    1874             : static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
    1875             : {
    1876             :         /* TODO: harvest feature to be added later. */
    1877             : }
    1878             : 
    1879           0 : static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
    1880             : {
    1881             :         /* TCCs are global (not instanced). */
    1882           0 :         uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
    1883           0 :                                RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
    1884             : 
    1885           0 :         adev->gfx.config.tcc_disabled_mask =
    1886           0 :                 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
    1887           0 :                 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
    1888           0 : }
    1889             : 
    1890           0 : static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
    1891             : {
    1892             :         u32 tmp;
    1893             :         int i;
    1894             : 
    1895           0 :         WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
    1896             : 
    1897           0 :         gfx_v11_0_setup_rb(adev);
    1898           0 :         gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
    1899           0 :         gfx_v11_0_get_tcc_info(adev);
    1900           0 :         adev->gfx.config.pa_sc_tile_steering_override = 0;
    1901             : 
    1902             :         /* XXX SH_MEM regs */
    1903             :         /* where to put LDS, scratch, GPUVM in FSA64 space */
    1904           0 :         mutex_lock(&adev->srbm_mutex);
    1905           0 :         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
    1906           0 :                 soc21_grbm_select(adev, 0, 0, 0, i);
    1907             :                 /* CP and shaders */
    1908           0 :                 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
    1909           0 :                 if (i != 0) {
    1910           0 :                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
    1911             :                                 (adev->gmc.private_aperture_start >> 48));
    1912           0 :                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
    1913             :                                 (adev->gmc.shared_aperture_start >> 48));
    1914           0 :                         WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
    1915             :                 }
    1916             :         }
    1917           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    1918             : 
    1919           0 :         mutex_unlock(&adev->srbm_mutex);
    1920             : 
    1921           0 :         gfx_v11_0_init_compute_vmid(adev);
    1922           0 :         gfx_v11_0_init_gds_vmid(adev);
    1923           0 : }
    1924             : 
    1925           0 : static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
    1926             :                                                bool enable)
    1927             : {
    1928             :         u32 tmp;
    1929             : 
    1930           0 :         if (amdgpu_sriov_vf(adev))
    1931             :                 return;
    1932             : 
    1933           0 :         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
    1934             : 
    1935           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
    1936             :                             enable ? 1 : 0);
    1937           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
    1938             :                             enable ? 1 : 0);
    1939           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
    1940             :                             enable ? 1 : 0);
    1941           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
    1942             :                             enable ? 1 : 0);
    1943             : 
    1944           0 :         WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
    1945             : }
    1946             : 
    1947           0 : static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
    1948             : {
    1949           0 :         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
    1950             : 
    1951           0 :         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
    1952             :                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
    1953           0 :         WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
    1954             :                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
    1955           0 :         WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
    1956             : 
    1957           0 :         return 0;
    1958             : }
    1959             : 
    1960           0 : static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
    1961             : {
    1962           0 :         u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
    1963             : 
    1964           0 :         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
    1965           0 :         WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
    1966           0 : }
    1967             : 
    1968           0 : static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
    1969             : {
    1970           0 :         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
    1971           0 :         udelay(50);
    1972           0 :         WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
    1973           0 :         udelay(50);
    1974           0 : }
    1975             : 
    1976           0 : static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
    1977             :                                              bool enable)
    1978             : {
    1979             :         uint32_t rlc_pg_cntl;
    1980             : 
    1981           0 :         rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
    1982             : 
    1983           0 :         if (!enable) {
    1984             :                 /* RLC_PG_CNTL[23] = 0 (default)
    1985             :                  * RLC will wait for handshake acks with SMU
    1986             :                  * GFXOFF will be enabled
    1987             :                  * RLC_PG_CNTL[23] = 1
    1988             :                  * RLC will not issue any message to SMU
    1989             :                  * hence no handshake between SMU & RLC
    1990             :                  * GFXOFF will be disabled
    1991             :                  */
    1992           0 :                 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
    1993             :         } else
    1994           0 :                 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
    1995           0 :         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
    1996           0 : }
    1997             : 
    1998           0 : static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
    1999             : {
    2000             :         /* TODO: enable rlc & smu handshake until smu
    2001             :          * and gfxoff feature works as expected */
    2002           0 :         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
    2003           0 :                 gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
    2004             : 
    2005           0 :         WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
    2006           0 :         udelay(50);
    2007           0 : }
    2008             : 
    2009           0 : static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
    2010             : {
    2011             :         uint32_t tmp;
    2012             : 
    2013             :         /* enable Save Restore Machine */
    2014           0 :         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
    2015           0 :         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
    2016           0 :         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
    2017           0 :         WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
    2018           0 : }
    2019             : 
    2020           0 : static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
    2021             : {
    2022             :         const struct rlc_firmware_header_v2_0 *hdr;
    2023             :         const __le32 *fw_data;
    2024             :         unsigned i, fw_size;
    2025             : 
    2026           0 :         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
    2027           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    2028           0 :                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
    2029           0 :         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
    2030             : 
    2031           0 :         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
    2032             :                      RLCG_UCODE_LOADING_START_ADDRESS);
    2033             : 
    2034           0 :         for (i = 0; i < fw_size; i++)
    2035           0 :                 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
    2036             :                              le32_to_cpup(fw_data++));
    2037             : 
    2038           0 :         WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
    2039           0 : }
    2040             : 
    2041           0 : static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
    2042             : {
    2043             :         const struct rlc_firmware_header_v2_2 *hdr;
    2044             :         const __le32 *fw_data;
    2045             :         unsigned i, fw_size;
    2046             :         u32 tmp;
    2047             : 
    2048           0 :         hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
    2049             : 
    2050           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    2051           0 :                         le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
    2052           0 :         fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
    2053             : 
    2054           0 :         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
    2055             : 
    2056           0 :         for (i = 0; i < fw_size; i++) {
    2057           0 :                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
    2058           0 :                         msleep(1);
    2059           0 :                 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
    2060             :                                 le32_to_cpup(fw_data++));
    2061             :         }
    2062             : 
    2063           0 :         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
    2064             : 
    2065           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    2066           0 :                         le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
    2067           0 :         fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
    2068             : 
    2069           0 :         WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
    2070           0 :         for (i = 0; i < fw_size; i++) {
    2071           0 :                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
    2072           0 :                         msleep(1);
    2073           0 :                 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
    2074             :                                 le32_to_cpup(fw_data++));
    2075             :         }
    2076             : 
    2077           0 :         WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
    2078             : 
    2079           0 :         tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
    2080           0 :         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
    2081           0 :         tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
    2082           0 :         WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
    2083           0 : }
    2084             : 
    2085           0 : static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
    2086             : {
    2087             :         const struct rlc_firmware_header_v2_3 *hdr;
    2088             :         const __le32 *fw_data;
    2089             :         unsigned i, fw_size;
    2090             :         u32 tmp;
    2091             : 
    2092           0 :         hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
    2093             : 
    2094           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    2095           0 :                         le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
    2096           0 :         fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
    2097             : 
    2098           0 :         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
    2099             : 
    2100           0 :         for (i = 0; i < fw_size; i++) {
    2101           0 :                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
    2102           0 :                         msleep(1);
    2103           0 :                 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
    2104             :                                 le32_to_cpup(fw_data++));
    2105             :         }
    2106             : 
    2107           0 :         WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
    2108             : 
    2109           0 :         tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
    2110           0 :         tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
    2111           0 :         WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
    2112             : 
    2113           0 :         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
    2114           0 :                         le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
    2115           0 :         fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
    2116             : 
    2117           0 :         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
    2118             : 
    2119           0 :         for (i = 0; i < fw_size; i++) {
    2120           0 :                 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
    2121           0 :                         msleep(1);
    2122           0 :                 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
    2123             :                                 le32_to_cpup(fw_data++));
    2124             :         }
    2125             : 
    2126           0 :         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
    2127             : 
    2128           0 :         tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
    2129           0 :         tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
    2130           0 :         WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
    2131           0 : }
    2132             : 
    2133           0 : static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
    2134             : {
    2135             :         const struct rlc_firmware_header_v2_0 *hdr;
    2136             :         uint16_t version_major;
    2137             :         uint16_t version_minor;
    2138             : 
    2139           0 :         if (!adev->gfx.rlc_fw)
    2140             :                 return -EINVAL;
    2141             : 
    2142           0 :         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
    2143           0 :         amdgpu_ucode_print_rlc_hdr(&hdr->header);
    2144             : 
    2145           0 :         version_major = le16_to_cpu(hdr->header.header_version_major);
    2146           0 :         version_minor = le16_to_cpu(hdr->header.header_version_minor);
    2147             : 
    2148           0 :         if (version_major == 2) {
    2149           0 :                 gfx_v11_0_load_rlcg_microcode(adev);
    2150           0 :                 if (amdgpu_dpm == 1) {
    2151           0 :                         if (version_minor >= 2)
    2152           0 :                                 gfx_v11_0_load_rlc_iram_dram_microcode(adev);
    2153           0 :                         if (version_minor == 3)
    2154           0 :                                 gfx_v11_0_load_rlcp_rlcv_microcode(adev);
    2155             :                 }
    2156             :                 
    2157             :                 return 0;
    2158             :         }
    2159             : 
    2160             :         return -EINVAL;
    2161             : }
    2162             : 
    2163           0 : static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
    2164             : {
    2165             :         int r;
    2166             : 
    2167           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
    2168           0 :                 gfx_v11_0_init_csb(adev);
    2169             : 
    2170           0 :                 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
    2171           0 :                         gfx_v11_0_rlc_enable_srm(adev);
    2172             :         } else {
    2173           0 :                 if (amdgpu_sriov_vf(adev)) {
    2174           0 :                         gfx_v11_0_init_csb(adev);
    2175           0 :                         return 0;
    2176             :                 }
    2177             : 
    2178           0 :                 adev->gfx.rlc.funcs->stop(adev);
    2179             : 
    2180             :                 /* disable CG */
    2181           0 :                 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
    2182             : 
    2183             :                 /* disable PG */
    2184           0 :                 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
    2185             : 
    2186           0 :                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
    2187             :                         /* legacy rlc firmware loading */
    2188           0 :                         r = gfx_v11_0_rlc_load_microcode(adev);
    2189           0 :                         if (r)
    2190             :                                 return r;
    2191             :                 }
    2192             : 
    2193           0 :                 gfx_v11_0_init_csb(adev);
    2194             : 
    2195           0 :                 adev->gfx.rlc.funcs->start(adev);
    2196             :         }
    2197             :         return 0;
    2198             : }
    2199             : 
    2200           0 : static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
    2201             : {
    2202           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2203             :         uint32_t tmp;
    2204             :         int i;
    2205             : 
    2206             :         /* Trigger an invalidation of the L1 instruction caches */
    2207           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    2208           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    2209           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
    2210             : 
    2211             :         /* Wait for invalidation complete */
    2212           0 :         for (i = 0; i < usec_timeout; i++) {
    2213           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    2214           0 :                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
    2215             :                                         INVALIDATE_CACHE_COMPLETE))
    2216             :                         break;
    2217           0 :                 udelay(1);
    2218             :         }
    2219             : 
    2220           0 :         if (i >= usec_timeout) {
    2221           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2222           0 :                 return -EINVAL;
    2223             :         }
    2224             : 
    2225           0 :         if (amdgpu_emu_mode == 1)
    2226           0 :                 adev->hdp.funcs->flush_hdp(adev, NULL);
    2227             : 
    2228           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
    2229           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
    2230           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
    2231           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
    2232           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
    2233           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
    2234             : 
    2235             :         /* Program me ucode address into intruction cache address register */
    2236           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
    2237             :                         lower_32_bits(addr) & 0xFFFFF000);
    2238           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
    2239             :                         upper_32_bits(addr));
    2240             : 
    2241             :         return 0;
    2242             : }
    2243             : 
    2244           0 : static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
    2245             : {
    2246           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2247             :         uint32_t tmp;
    2248             :         int i;
    2249             : 
    2250             :         /* Trigger an invalidation of the L1 instruction caches */
    2251           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2252           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    2253           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
    2254             : 
    2255             :         /* Wait for invalidation complete */
    2256           0 :         for (i = 0; i < usec_timeout; i++) {
    2257           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2258           0 :                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
    2259             :                                         INVALIDATE_CACHE_COMPLETE))
    2260             :                         break;
    2261           0 :                 udelay(1);
    2262             :         }
    2263             : 
    2264           0 :         if (i >= usec_timeout) {
    2265           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2266           0 :                 return -EINVAL;
    2267             :         }
    2268             : 
    2269           0 :         if (amdgpu_emu_mode == 1)
    2270           0 :                 adev->hdp.funcs->flush_hdp(adev, NULL);
    2271             : 
    2272           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
    2273           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
    2274           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
    2275           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
    2276           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
    2277           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
    2278             : 
    2279             :         /* Program pfp ucode address into intruction cache address register */
    2280           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
    2281             :                         lower_32_bits(addr) & 0xFFFFF000);
    2282           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
    2283             :                         upper_32_bits(addr));
    2284             : 
    2285             :         return 0;
    2286             : }
    2287             : 
    2288           0 : static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
    2289             : {
    2290           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2291             :         uint32_t tmp;
    2292             :         int i;
    2293             : 
    2294             :         /* Trigger an invalidation of the L1 instruction caches */
    2295           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    2296           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    2297             : 
    2298           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
    2299             : 
    2300             :         /* Wait for invalidation complete */
    2301           0 :         for (i = 0; i < usec_timeout; i++) {
    2302           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    2303           0 :                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
    2304             :                                         INVALIDATE_CACHE_COMPLETE))
    2305             :                         break;
    2306           0 :                 udelay(1);
    2307             :         }
    2308             : 
    2309           0 :         if (i >= usec_timeout) {
    2310           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2311           0 :                 return -EINVAL;
    2312             :         }
    2313             : 
    2314           0 :         if (amdgpu_emu_mode == 1)
    2315           0 :                 adev->hdp.funcs->flush_hdp(adev, NULL);
    2316             : 
    2317           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
    2318           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
    2319           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
    2320           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
    2321           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
    2322             : 
    2323             :         /* Program mec1 ucode address into intruction cache address register */
    2324           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
    2325             :                         lower_32_bits(addr) & 0xFFFFF000);
    2326           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
    2327             :                         upper_32_bits(addr));
    2328             : 
    2329             :         return 0;
    2330             : }
    2331             : 
    2332           0 : static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
    2333             : {
    2334           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2335             :         uint32_t tmp;
    2336             :         unsigned i, pipe_id;
    2337             :         const struct gfx_firmware_header_v2_0 *pfp_hdr;
    2338             : 
    2339           0 :         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
    2340           0 :                 adev->gfx.pfp_fw->data;
    2341             : 
    2342           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
    2343             :                 lower_32_bits(addr));
    2344           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
    2345             :                 upper_32_bits(addr));
    2346             : 
    2347           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
    2348           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
    2349           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
    2350           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
    2351           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
    2352             : 
    2353             :         /*
    2354             :          * Programming any of the CP_PFP_IC_BASE registers
    2355             :          * forces invalidation of the ME L1 I$. Wait for the
    2356             :          * invalidation complete
    2357             :          */
    2358           0 :         for (i = 0; i < usec_timeout; i++) {
    2359           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2360           0 :                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
    2361             :                         INVALIDATE_CACHE_COMPLETE))
    2362             :                         break;
    2363           0 :                 udelay(1);
    2364             :         }
    2365             : 
    2366           0 :         if (i >= usec_timeout) {
    2367           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2368           0 :                 return -EINVAL;
    2369             :         }
    2370             : 
    2371             :         /* Prime the L1 instruction caches */
    2372           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2373           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
    2374           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
    2375             :         /* Waiting for cache primed*/
    2376           0 :         for (i = 0; i < usec_timeout; i++) {
    2377           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2378           0 :                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
    2379             :                         ICACHE_PRIMED))
    2380             :                         break;
    2381           0 :                 udelay(1);
    2382             :         }
    2383             : 
    2384           0 :         if (i >= usec_timeout) {
    2385           0 :                 dev_err(adev->dev, "failed to prime instruction cache\n");
    2386           0 :                 return -EINVAL;
    2387             :         }
    2388             : 
    2389           0 :         mutex_lock(&adev->srbm_mutex);
    2390           0 :         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
    2391           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    2392           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
    2393             :                         (pfp_hdr->ucode_start_addr_hi << 30) |
    2394             :                         (pfp_hdr->ucode_start_addr_lo >> 2));
    2395           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
    2396             :                         pfp_hdr->ucode_start_addr_hi >> 2);
    2397             : 
    2398             :                 /*
    2399             :                  * Program CP_ME_CNTL to reset given PIPE to take
    2400             :                  * effect of CP_PFP_PRGRM_CNTR_START.
    2401             :                  */
    2402           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    2403           0 :                 if (pipe_id == 0)
    2404           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2405             :                                         PFP_PIPE0_RESET, 1);
    2406             :                 else
    2407           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2408             :                                         PFP_PIPE1_RESET, 1);
    2409           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2410             : 
    2411             :                 /* Clear pfp pipe0 reset bit. */
    2412           0 :                 if (pipe_id == 0)
    2413           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2414             :                                         PFP_PIPE0_RESET, 0);
    2415             :                 else
    2416           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2417             :                                         PFP_PIPE1_RESET, 0);
    2418           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2419             : 
    2420           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
    2421             :                         lower_32_bits(addr2));
    2422           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
    2423             :                         upper_32_bits(addr2));
    2424             :         }
    2425           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2426           0 :         mutex_unlock(&adev->srbm_mutex);
    2427             : 
    2428           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
    2429           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
    2430           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
    2431           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
    2432             : 
    2433             :         /* Invalidate the data caches */
    2434           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    2435           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    2436           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
    2437             : 
    2438           0 :         for (i = 0; i < usec_timeout; i++) {
    2439           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    2440           0 :                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
    2441             :                         INVALIDATE_DCACHE_COMPLETE))
    2442             :                         break;
    2443           0 :                 udelay(1);
    2444             :         }
    2445             : 
    2446           0 :         if (i >= usec_timeout) {
    2447           0 :                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
    2448           0 :                 return -EINVAL;
    2449             :         }
    2450             : 
    2451             :         return 0;
    2452             : }
    2453             : 
    2454           0 : static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
    2455             : {
    2456           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2457             :         uint32_t tmp;
    2458             :         unsigned i, pipe_id;
    2459             :         const struct gfx_firmware_header_v2_0 *me_hdr;
    2460             : 
    2461           0 :         me_hdr = (const struct gfx_firmware_header_v2_0 *)
    2462           0 :                 adev->gfx.me_fw->data;
    2463             : 
    2464           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
    2465             :                 lower_32_bits(addr));
    2466           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
    2467             :                 upper_32_bits(addr));
    2468             : 
    2469           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
    2470           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
    2471           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
    2472           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
    2473           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
    2474             : 
    2475             :         /*
    2476             :          * Programming any of the CP_ME_IC_BASE registers
    2477             :          * forces invalidation of the ME L1 I$. Wait for the
    2478             :          * invalidation complete
    2479             :          */
    2480           0 :         for (i = 0; i < usec_timeout; i++) {
    2481           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    2482           0 :                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
    2483             :                         INVALIDATE_CACHE_COMPLETE))
    2484             :                         break;
    2485           0 :                 udelay(1);
    2486             :         }
    2487             : 
    2488           0 :         if (i >= usec_timeout) {
    2489           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2490           0 :                 return -EINVAL;
    2491             :         }
    2492             : 
    2493             :         /* Prime the instruction caches */
    2494           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    2495           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
    2496           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
    2497             : 
    2498             :         /* Waiting for instruction cache primed*/
    2499           0 :         for (i = 0; i < usec_timeout; i++) {
    2500           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    2501           0 :                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
    2502             :                         ICACHE_PRIMED))
    2503             :                         break;
    2504           0 :                 udelay(1);
    2505             :         }
    2506             : 
    2507           0 :         if (i >= usec_timeout) {
    2508           0 :                 dev_err(adev->dev, "failed to prime instruction cache\n");
    2509           0 :                 return -EINVAL;
    2510             :         }
    2511             : 
    2512           0 :         mutex_lock(&adev->srbm_mutex);
    2513           0 :         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
    2514           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    2515           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
    2516             :                         (me_hdr->ucode_start_addr_hi << 30) |
    2517             :                         (me_hdr->ucode_start_addr_lo >> 2) );
    2518           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
    2519             :                         me_hdr->ucode_start_addr_hi>>2);
    2520             : 
    2521             :                 /*
    2522             :                  * Program CP_ME_CNTL to reset given PIPE to take
    2523             :                  * effect of CP_PFP_PRGRM_CNTR_START.
    2524             :                  */
    2525           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    2526           0 :                 if (pipe_id == 0)
    2527           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2528             :                                         ME_PIPE0_RESET, 1);
    2529             :                 else
    2530           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2531             :                                         ME_PIPE1_RESET, 1);
    2532           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2533             : 
    2534             :                 /* Clear pfp pipe0 reset bit. */
    2535           0 :                 if (pipe_id == 0)
    2536           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2537             :                                         ME_PIPE0_RESET, 0);
    2538             :                 else
    2539           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    2540             :                                         ME_PIPE1_RESET, 0);
    2541           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2542             : 
    2543           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
    2544             :                         lower_32_bits(addr2));
    2545           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
    2546             :                         upper_32_bits(addr2));
    2547             :         }
    2548           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2549           0 :         mutex_unlock(&adev->srbm_mutex);
    2550             : 
    2551           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
    2552           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
    2553           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
    2554           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
    2555             : 
    2556             :         /* Invalidate the data caches */
    2557           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    2558           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    2559           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
    2560             : 
    2561           0 :         for (i = 0; i < usec_timeout; i++) {
    2562           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    2563           0 :                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
    2564             :                         INVALIDATE_DCACHE_COMPLETE))
    2565             :                         break;
    2566           0 :                 udelay(1);
    2567             :         }
    2568             : 
    2569           0 :         if (i >= usec_timeout) {
    2570           0 :                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
    2571           0 :                 return -EINVAL;
    2572             :         }
    2573             : 
    2574             :         return 0;
    2575             : }
    2576             : 
    2577           0 : static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
    2578             : {
    2579           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2580             :         uint32_t tmp;
    2581             :         unsigned i;
    2582             :         const struct gfx_firmware_header_v2_0 *mec_hdr;
    2583             : 
    2584           0 :         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
    2585           0 :                 adev->gfx.mec_fw->data;
    2586             : 
    2587           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
    2588           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
    2589           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
    2590           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
    2591           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
    2592             : 
    2593           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
    2594           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
    2595           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
    2596           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
    2597             : 
    2598           0 :         mutex_lock(&adev->srbm_mutex);
    2599           0 :         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
    2600           0 :                 soc21_grbm_select(adev, 1, i, 0, 0);
    2601             : 
    2602           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
    2603           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
    2604             :                      upper_32_bits(addr2));
    2605             : 
    2606           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
    2607             :                                         mec_hdr->ucode_start_addr_lo >> 2 |
    2608             :                                         mec_hdr->ucode_start_addr_hi << 30);
    2609           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
    2610             :                                         mec_hdr->ucode_start_addr_hi >> 2);
    2611             : 
    2612           0 :                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
    2613           0 :                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
    2614             :                      upper_32_bits(addr));
    2615             :         }
    2616           0 :         mutex_unlock(&adev->srbm_mutex);
    2617           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2618             : 
    2619             :         /* Trigger an invalidation of the L1 instruction caches */
    2620           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
    2621           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    2622           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
    2623             : 
    2624             :         /* Wait for invalidation complete */
    2625           0 :         for (i = 0; i < usec_timeout; i++) {
    2626           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
    2627           0 :                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
    2628             :                                        INVALIDATE_DCACHE_COMPLETE))
    2629             :                         break;
    2630           0 :                 udelay(1);
    2631             :         }
    2632             : 
    2633           0 :         if (i >= usec_timeout) {
    2634           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2635           0 :                 return -EINVAL;
    2636             :         }
    2637             : 
    2638             :         /* Trigger an invalidation of the L1 instruction caches */
    2639           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    2640           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    2641           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
    2642             : 
    2643             :         /* Wait for invalidation complete */
    2644           0 :         for (i = 0; i < usec_timeout; i++) {
    2645           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    2646           0 :                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
    2647             :                                        INVALIDATE_CACHE_COMPLETE))
    2648             :                         break;
    2649           0 :                 udelay(1);
    2650             :         }
    2651             : 
    2652           0 :         if (i >= usec_timeout) {
    2653           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2654           0 :                 return -EINVAL;
    2655             :         }
    2656             : 
    2657             :         return 0;
    2658             : }
    2659             : 
    2660           0 : static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
    2661             : {
    2662             :         const struct gfx_firmware_header_v2_0 *pfp_hdr;
    2663             :         const struct gfx_firmware_header_v2_0 *me_hdr;
    2664             :         const struct gfx_firmware_header_v2_0 *mec_hdr;
    2665             :         uint32_t pipe_id, tmp;
    2666             : 
    2667           0 :         mec_hdr = (const struct gfx_firmware_header_v2_0 *)
    2668           0 :                 adev->gfx.mec_fw->data;
    2669           0 :         me_hdr = (const struct gfx_firmware_header_v2_0 *)
    2670           0 :                 adev->gfx.me_fw->data;
    2671           0 :         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
    2672           0 :                 adev->gfx.pfp_fw->data;
    2673             : 
    2674             :         /* config pfp program start addr */
    2675           0 :         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
    2676           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    2677           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
    2678             :                         (pfp_hdr->ucode_start_addr_hi << 30) |
    2679             :                         (pfp_hdr->ucode_start_addr_lo >> 2));
    2680           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
    2681             :                         pfp_hdr->ucode_start_addr_hi >> 2);
    2682             :         }
    2683           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2684             : 
    2685             :         /* reset pfp pipe */
    2686           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    2687           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
    2688           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
    2689           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2690             : 
    2691             :         /* clear pfp pipe reset */
    2692           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
    2693           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
    2694           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2695             : 
    2696             :         /* config me program start addr */
    2697           0 :         for (pipe_id = 0; pipe_id < 2; pipe_id++) {
    2698           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    2699           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
    2700             :                         (me_hdr->ucode_start_addr_hi << 30) |
    2701             :                         (me_hdr->ucode_start_addr_lo >> 2) );
    2702           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
    2703             :                         me_hdr->ucode_start_addr_hi>>2);
    2704             :         }
    2705           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2706             : 
    2707             :         /* reset me pipe */
    2708           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    2709           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
    2710           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
    2711           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2712             : 
    2713             :         /* clear me pipe reset */
    2714           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
    2715           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
    2716           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2717             : 
    2718             :         /* config mec program start addr */
    2719           0 :         for (pipe_id = 0; pipe_id < 4; pipe_id++) {
    2720           0 :                 soc21_grbm_select(adev, 1, pipe_id, 0, 0);
    2721           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
    2722             :                                         mec_hdr->ucode_start_addr_lo >> 2 |
    2723             :                                         mec_hdr->ucode_start_addr_hi << 30);
    2724           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
    2725             :                                         mec_hdr->ucode_start_addr_hi >> 2);
    2726             :         }
    2727           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    2728             : 
    2729             :         /* reset mec pipe */
    2730           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
    2731           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
    2732           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
    2733           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
    2734           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
    2735           0 :         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
    2736             : 
    2737             :         /* clear mec pipe reset */
    2738           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
    2739           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
    2740           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
    2741           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
    2742           0 :         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
    2743           0 : }
    2744             : 
    2745           0 : static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
    2746             : {
    2747             :         uint32_t cp_status;
    2748             :         uint32_t bootload_status;
    2749             :         int i, r;
    2750             :         uint64_t addr, addr2;
    2751             : 
    2752           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    2753           0 :                 cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
    2754             : 
    2755           0 :                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
    2756           0 :                         bootload_status = RREG32_SOC15(GC, 0,
    2757             :                                         regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
    2758             :                 else
    2759           0 :                         bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
    2760             : 
    2761           0 :                 if ((cp_status == 0) &&
    2762           0 :                     (REG_GET_FIELD(bootload_status,
    2763             :                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
    2764             :                         break;
    2765             :                 }
    2766           0 :                 udelay(1);
    2767             :         }
    2768             : 
    2769           0 :         if (i >= adev->usec_timeout) {
    2770           0 :                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
    2771           0 :                 return -ETIMEDOUT;
    2772             :         }
    2773             : 
    2774           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
    2775           0 :                 if (adev->gfx.rs64_enable) {
    2776           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2777           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
    2778           0 :                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2779           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
    2780           0 :                         r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
    2781           0 :                         if (r)
    2782             :                                 return r;
    2783           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2784           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
    2785           0 :                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2786           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
    2787           0 :                         r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
    2788           0 :                         if (r)
    2789             :                                 return r;
    2790           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2791           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
    2792           0 :                         addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2793           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
    2794           0 :                         r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
    2795           0 :                         if (r)
    2796             :                                 return r;
    2797             :                 } else {
    2798           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2799           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
    2800           0 :                         r = gfx_v11_0_config_me_cache(adev, addr);
    2801           0 :                         if (r)
    2802             :                                 return r;
    2803           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2804           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
    2805           0 :                         r = gfx_v11_0_config_pfp_cache(adev, addr);
    2806           0 :                         if (r)
    2807             :                                 return r;
    2808           0 :                         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
    2809           0 :                                 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
    2810           0 :                         r = gfx_v11_0_config_mec_cache(adev, addr);
    2811           0 :                         if (r)
    2812             :                                 return r;
    2813             :                 }
    2814             :         }
    2815             : 
    2816             :         return 0;
    2817             : }
    2818             : 
    2819           0 : static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
    2820             : {
    2821             :         int i;
    2822           0 :         u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    2823             : 
    2824           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
    2825           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
    2826           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    2827             : 
    2828           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    2829           0 :                 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
    2830             :                         break;
    2831           0 :                 udelay(1);
    2832             :         }
    2833             : 
    2834           0 :         if (i >= adev->usec_timeout)
    2835           0 :                 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
    2836             : 
    2837           0 :         return 0;
    2838             : }
    2839             : 
    2840           0 : static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
    2841             : {
    2842             :         int r;
    2843             :         const struct gfx_firmware_header_v1_0 *pfp_hdr;
    2844             :         const __le32 *fw_data;
    2845             :         unsigned i, fw_size;
    2846             : 
    2847           0 :         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
    2848           0 :                 adev->gfx.pfp_fw->data;
    2849             : 
    2850           0 :         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
    2851             : 
    2852           0 :         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
    2853           0 :                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
    2854           0 :         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
    2855             : 
    2856           0 :         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
    2857             :                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    2858             :                                       &adev->gfx.pfp.pfp_fw_obj,
    2859           0 :                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
    2860           0 :                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
    2861           0 :         if (r) {
    2862           0 :                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
    2863           0 :                 gfx_v11_0_pfp_fini(adev);
    2864           0 :                 return r;
    2865             :         }
    2866             : 
    2867           0 :         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
    2868             : 
    2869           0 :         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
    2870           0 :         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
    2871             : 
    2872           0 :         gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
    2873             : 
    2874           0 :         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
    2875             : 
    2876           0 :         for (i = 0; i < pfp_hdr->jt_size; i++)
    2877           0 :                 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
    2878             :                              le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
    2879             : 
    2880           0 :         WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
    2881             : 
    2882             :         return 0;
    2883             : }
    2884             : 
    2885           0 : static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
    2886             : {
    2887             :         int r;
    2888             :         const struct gfx_firmware_header_v2_0 *pfp_hdr;
    2889             :         const __le32 *fw_ucode, *fw_data;
    2890             :         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
    2891             :         uint32_t tmp;
    2892           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    2893             : 
    2894           0 :         pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
    2895           0 :                 adev->gfx.pfp_fw->data;
    2896             : 
    2897           0 :         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
    2898             : 
    2899             :         /* instruction */
    2900           0 :         fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
    2901           0 :                 le32_to_cpu(pfp_hdr->ucode_offset_bytes));
    2902           0 :         fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
    2903             :         /* data */
    2904           0 :         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
    2905           0 :                 le32_to_cpu(pfp_hdr->data_offset_bytes));
    2906           0 :         fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
    2907             : 
    2908             :         /* 64kb align */
    2909           0 :         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
    2910             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    2911             :                                       &adev->gfx.pfp.pfp_fw_obj,
    2912           0 :                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
    2913           0 :                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
    2914           0 :         if (r) {
    2915           0 :                 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
    2916           0 :                 gfx_v11_0_pfp_fini(adev);
    2917           0 :                 return r;
    2918             :         }
    2919             : 
    2920           0 :         r = amdgpu_bo_create_reserved(adev, fw_data_size,
    2921             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    2922             :                                       &adev->gfx.pfp.pfp_fw_data_obj,
    2923           0 :                                       &adev->gfx.pfp.pfp_fw_data_gpu_addr,
    2924           0 :                                       (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
    2925           0 :         if (r) {
    2926           0 :                 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
    2927           0 :                 gfx_v11_0_pfp_fini(adev);
    2928           0 :                 return r;
    2929             :         }
    2930             : 
    2931           0 :         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
    2932           0 :         memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
    2933             : 
    2934           0 :         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
    2935           0 :         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
    2936           0 :         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
    2937           0 :         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
    2938             : 
    2939           0 :         if (amdgpu_emu_mode == 1)
    2940           0 :                 adev->hdp.funcs->flush_hdp(adev, NULL);
    2941             : 
    2942           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
    2943             :                 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
    2944           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
    2945             :                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
    2946             : 
    2947           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
    2948           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
    2949           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
    2950           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
    2951           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
    2952             : 
    2953             :         /*
    2954             :          * Programming any of the CP_PFP_IC_BASE registers
    2955             :          * forces invalidation of the ME L1 I$. Wait for the
    2956             :          * invalidation complete
    2957             :          */
    2958           0 :         for (i = 0; i < usec_timeout; i++) {
    2959           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2960           0 :                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
    2961             :                         INVALIDATE_CACHE_COMPLETE))
    2962             :                         break;
    2963           0 :                 udelay(1);
    2964             :         }
    2965             : 
    2966           0 :         if (i >= usec_timeout) {
    2967           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    2968           0 :                 return -EINVAL;
    2969             :         }
    2970             : 
    2971             :         /* Prime the L1 instruction caches */
    2972           0 :         tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2973           0 :         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
    2974           0 :         WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
    2975             :         /* Waiting for cache primed*/
    2976           0 :         for (i = 0; i < usec_timeout; i++) {
    2977           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
    2978           0 :                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
    2979             :                         ICACHE_PRIMED))
    2980             :                         break;
    2981           0 :                 udelay(1);
    2982             :         }
    2983             : 
    2984           0 :         if (i >= usec_timeout) {
    2985           0 :                 dev_err(adev->dev, "failed to prime instruction cache\n");
    2986           0 :                 return -EINVAL;
    2987             :         }
    2988             : 
    2989           0 :         mutex_lock(&adev->srbm_mutex);
    2990           0 :         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
    2991           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    2992           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
    2993             :                         (pfp_hdr->ucode_start_addr_hi << 30) |
    2994             :                         (pfp_hdr->ucode_start_addr_lo >> 2) );
    2995           0 :                 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
    2996             :                         pfp_hdr->ucode_start_addr_hi>>2);
    2997             : 
    2998             :                 /*
    2999             :                  * Program CP_ME_CNTL to reset given PIPE to take
    3000             :                  * effect of CP_PFP_PRGRM_CNTR_START.
    3001             :                  */
    3002           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    3003           0 :                 if (pipe_id == 0)
    3004           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3005             :                                         PFP_PIPE0_RESET, 1);
    3006             :                 else
    3007           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3008             :                                         PFP_PIPE1_RESET, 1);
    3009           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    3010             : 
    3011             :                 /* Clear pfp pipe0 reset bit. */
    3012           0 :                 if (pipe_id == 0)
    3013           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3014             :                                         PFP_PIPE0_RESET, 0);
    3015             :                 else
    3016           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3017             :                                         PFP_PIPE1_RESET, 0);
    3018           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    3019             : 
    3020           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
    3021             :                         lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
    3022           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
    3023             :                         upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
    3024             :         }
    3025           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    3026           0 :         mutex_unlock(&adev->srbm_mutex);
    3027             : 
    3028           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
    3029           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
    3030           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
    3031           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
    3032             : 
    3033             :         /* Invalidate the data caches */
    3034           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    3035           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    3036           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
    3037             : 
    3038           0 :         for (i = 0; i < usec_timeout; i++) {
    3039           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    3040           0 :                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
    3041             :                         INVALIDATE_DCACHE_COMPLETE))
    3042             :                         break;
    3043           0 :                 udelay(1);
    3044             :         }
    3045             : 
    3046           0 :         if (i >= usec_timeout) {
    3047           0 :                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
    3048           0 :                 return -EINVAL;
    3049             :         }
    3050             : 
    3051             :         return 0;
    3052             : }
    3053             : 
    3054           0 : static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
    3055             : {
    3056             :         int r;
    3057             :         const struct gfx_firmware_header_v1_0 *me_hdr;
    3058             :         const __le32 *fw_data;
    3059             :         unsigned i, fw_size;
    3060             : 
    3061           0 :         me_hdr = (const struct gfx_firmware_header_v1_0 *)
    3062           0 :                 adev->gfx.me_fw->data;
    3063             : 
    3064           0 :         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
    3065             : 
    3066           0 :         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
    3067           0 :                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
    3068           0 :         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
    3069             : 
    3070           0 :         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
    3071             :                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    3072             :                                       &adev->gfx.me.me_fw_obj,
    3073           0 :                                       &adev->gfx.me.me_fw_gpu_addr,
    3074           0 :                                       (void **)&adev->gfx.me.me_fw_ptr);
    3075           0 :         if (r) {
    3076           0 :                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
    3077           0 :                 gfx_v11_0_me_fini(adev);
    3078           0 :                 return r;
    3079             :         }
    3080             : 
    3081           0 :         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
    3082             : 
    3083           0 :         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
    3084           0 :         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
    3085             : 
    3086           0 :         gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
    3087             : 
    3088           0 :         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
    3089             : 
    3090           0 :         for (i = 0; i < me_hdr->jt_size; i++)
    3091           0 :                 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
    3092             :                              le32_to_cpup(fw_data + me_hdr->jt_offset + i));
    3093             : 
    3094           0 :         WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
    3095             : 
    3096             :         return 0;
    3097             : }
    3098             : 
    3099           0 : static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
    3100             : {
    3101             :         int r;
    3102             :         const struct gfx_firmware_header_v2_0 *me_hdr;
    3103             :         const __le32 *fw_ucode, *fw_data;
    3104             :         unsigned i, pipe_id, fw_ucode_size, fw_data_size;
    3105             :         uint32_t tmp;
    3106           0 :         uint32_t usec_timeout = 50000;  /* wait for 50ms */
    3107             : 
    3108           0 :         me_hdr = (const struct gfx_firmware_header_v2_0 *)
    3109           0 :                 adev->gfx.me_fw->data;
    3110             : 
    3111           0 :         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
    3112             : 
    3113             :         /* instruction */
    3114           0 :         fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
    3115           0 :                 le32_to_cpu(me_hdr->ucode_offset_bytes));
    3116           0 :         fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
    3117             :         /* data */
    3118           0 :         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
    3119           0 :                 le32_to_cpu(me_hdr->data_offset_bytes));
    3120           0 :         fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
    3121             : 
    3122             :         /* 64kb align*/
    3123           0 :         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
    3124             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    3125             :                                       &adev->gfx.me.me_fw_obj,
    3126           0 :                                       &adev->gfx.me.me_fw_gpu_addr,
    3127           0 :                                       (void **)&adev->gfx.me.me_fw_ptr);
    3128           0 :         if (r) {
    3129           0 :                 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
    3130           0 :                 gfx_v11_0_me_fini(adev);
    3131           0 :                 return r;
    3132             :         }
    3133             : 
    3134           0 :         r = amdgpu_bo_create_reserved(adev, fw_data_size,
    3135             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    3136             :                                       &adev->gfx.me.me_fw_data_obj,
    3137           0 :                                       &adev->gfx.me.me_fw_data_gpu_addr,
    3138           0 :                                       (void **)&adev->gfx.me.me_fw_data_ptr);
    3139           0 :         if (r) {
    3140           0 :                 dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
    3141           0 :                 gfx_v11_0_pfp_fini(adev);
    3142           0 :                 return r;
    3143             :         }
    3144             : 
    3145           0 :         memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
    3146           0 :         memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
    3147             : 
    3148           0 :         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
    3149           0 :         amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
    3150           0 :         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
    3151           0 :         amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
    3152             : 
    3153           0 :         if (amdgpu_emu_mode == 1)
    3154           0 :                 adev->hdp.funcs->flush_hdp(adev, NULL);
    3155             : 
    3156           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
    3157             :                 lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
    3158           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
    3159             :                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
    3160             : 
    3161           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
    3162           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
    3163           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
    3164           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
    3165           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
    3166             : 
    3167             :         /*
    3168             :          * Programming any of the CP_ME_IC_BASE registers
    3169             :          * forces invalidation of the ME L1 I$. Wait for the
    3170             :          * invalidation complete
    3171             :          */
    3172           0 :         for (i = 0; i < usec_timeout; i++) {
    3173           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    3174           0 :                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
    3175             :                         INVALIDATE_CACHE_COMPLETE))
    3176             :                         break;
    3177           0 :                 udelay(1);
    3178             :         }
    3179             : 
    3180           0 :         if (i >= usec_timeout) {
    3181           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    3182           0 :                 return -EINVAL;
    3183             :         }
    3184             : 
    3185             :         /* Prime the instruction caches */
    3186           0 :         tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    3187           0 :         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
    3188           0 :         WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
    3189             : 
    3190             :         /* Waiting for instruction cache primed*/
    3191           0 :         for (i = 0; i < usec_timeout; i++) {
    3192           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
    3193           0 :                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
    3194             :                         ICACHE_PRIMED))
    3195             :                         break;
    3196           0 :                 udelay(1);
    3197             :         }
    3198             : 
    3199           0 :         if (i >= usec_timeout) {
    3200           0 :                 dev_err(adev->dev, "failed to prime instruction cache\n");
    3201           0 :                 return -EINVAL;
    3202             :         }
    3203             : 
    3204           0 :         mutex_lock(&adev->srbm_mutex);
    3205           0 :         for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
    3206           0 :                 soc21_grbm_select(adev, 0, pipe_id, 0, 0);
    3207           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
    3208             :                         (me_hdr->ucode_start_addr_hi << 30) |
    3209             :                         (me_hdr->ucode_start_addr_lo >> 2) );
    3210           0 :                 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
    3211             :                         me_hdr->ucode_start_addr_hi>>2);
    3212             : 
    3213             :                 /*
    3214             :                  * Program CP_ME_CNTL to reset given PIPE to take
    3215             :                  * effect of CP_PFP_PRGRM_CNTR_START.
    3216             :                  */
    3217           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
    3218           0 :                 if (pipe_id == 0)
    3219           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3220             :                                         ME_PIPE0_RESET, 1);
    3221             :                 else
    3222           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3223             :                                         ME_PIPE1_RESET, 1);
    3224           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    3225             : 
    3226             :                 /* Clear pfp pipe0 reset bit. */
    3227           0 :                 if (pipe_id == 0)
    3228           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3229             :                                         ME_PIPE0_RESET, 0);
    3230             :                 else
    3231           0 :                         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
    3232             :                                         ME_PIPE1_RESET, 0);
    3233           0 :                 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
    3234             : 
    3235           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
    3236             :                         lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
    3237           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
    3238             :                         upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
    3239             :         }
    3240           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    3241           0 :         mutex_unlock(&adev->srbm_mutex);
    3242             : 
    3243           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
    3244           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
    3245           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
    3246           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
    3247             : 
    3248             :         /* Invalidate the data caches */
    3249           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    3250           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    3251           0 :         WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
    3252             : 
    3253           0 :         for (i = 0; i < usec_timeout; i++) {
    3254           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
    3255           0 :                 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
    3256             :                         INVALIDATE_DCACHE_COMPLETE))
    3257             :                         break;
    3258           0 :                 udelay(1);
    3259             :         }
    3260             : 
    3261           0 :         if (i >= usec_timeout) {
    3262           0 :                 dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
    3263           0 :                 return -EINVAL;
    3264             :         }
    3265             : 
    3266             :         return 0;
    3267             : }
    3268             : 
    3269           0 : static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
    3270             : {
    3271             :         int r;
    3272             : 
    3273           0 :         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
    3274             :                 return -EINVAL;
    3275             : 
    3276           0 :         gfx_v11_0_cp_gfx_enable(adev, false);
    3277             : 
    3278           0 :         if (adev->gfx.rs64_enable)
    3279           0 :                 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
    3280             :         else
    3281           0 :                 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
    3282           0 :         if (r) {
    3283           0 :                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
    3284           0 :                 return r;
    3285             :         }
    3286             : 
    3287           0 :         if (adev->gfx.rs64_enable)
    3288           0 :                 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
    3289             :         else
    3290           0 :                 r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
    3291           0 :         if (r) {
    3292           0 :                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
    3293           0 :                 return r;
    3294             :         }
    3295             : 
    3296             :         return 0;
    3297             : }
    3298             : 
    3299           0 : static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
    3300             : {
    3301             :         struct amdgpu_ring *ring;
    3302           0 :         const struct cs_section_def *sect = NULL;
    3303           0 :         const struct cs_extent_def *ext = NULL;
    3304             :         int r, i;
    3305             :         int ctx_reg_offset;
    3306             : 
    3307             :         /* init the CP */
    3308           0 :         WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
    3309             :                      adev->gfx.config.max_hw_contexts - 1);
    3310           0 :         WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
    3311             : 
    3312           0 :         if (!amdgpu_async_gfx_ring)
    3313           0 :                 gfx_v11_0_cp_gfx_enable(adev, true);
    3314             : 
    3315           0 :         ring = &adev->gfx.gfx_ring[0];
    3316           0 :         r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
    3317           0 :         if (r) {
    3318           0 :                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
    3319           0 :                 return r;
    3320             :         }
    3321             : 
    3322           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
    3323           0 :         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
    3324             : 
    3325           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
    3326           0 :         amdgpu_ring_write(ring, 0x80000000);
    3327           0 :         amdgpu_ring_write(ring, 0x80000000);
    3328             : 
    3329           0 :         for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
    3330           0 :                 for (ext = sect->section; ext->extent != NULL; ++ext) {
    3331           0 :                         if (sect->id == SECT_CONTEXT) {
    3332           0 :                                 amdgpu_ring_write(ring,
    3333           0 :                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
    3334             :                                                           ext->reg_count));
    3335           0 :                                 amdgpu_ring_write(ring, ext->reg_index -
    3336             :                                                   PACKET3_SET_CONTEXT_REG_START);
    3337           0 :                                 for (i = 0; i < ext->reg_count; i++)
    3338           0 :                                         amdgpu_ring_write(ring, ext->extent[i]);
    3339             :                         }
    3340             :                 }
    3341             :         }
    3342             : 
    3343           0 :         ctx_reg_offset =
    3344           0 :                 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
    3345           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
    3346           0 :         amdgpu_ring_write(ring, ctx_reg_offset);
    3347           0 :         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
    3348             : 
    3349           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
    3350           0 :         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
    3351             : 
    3352           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
    3353           0 :         amdgpu_ring_write(ring, 0);
    3354             : 
    3355           0 :         amdgpu_ring_commit(ring);
    3356             : 
    3357             :         /* submit cs packet to copy state 0 to next available state */
    3358           0 :         if (adev->gfx.num_gfx_rings > 1) {
    3359             :                 /* maximum supported gfx ring is 2 */
    3360           0 :                 ring = &adev->gfx.gfx_ring[1];
    3361           0 :                 r = amdgpu_ring_alloc(ring, 2);
    3362           0 :                 if (r) {
    3363           0 :                         DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
    3364           0 :                         return r;
    3365             :                 }
    3366             : 
    3367           0 :                 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
    3368           0 :                 amdgpu_ring_write(ring, 0);
    3369             : 
    3370           0 :                 amdgpu_ring_commit(ring);
    3371             :         }
    3372             :         return 0;
    3373             : }
    3374             : 
    3375           0 : static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
    3376             :                                          CP_PIPE_ID pipe)
    3377             : {
    3378             :         u32 tmp;
    3379             : 
    3380           0 :         tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
    3381           0 :         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
    3382             : 
    3383           0 :         WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
    3384           0 : }
    3385             : 
    3386           0 : static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
    3387             :                                           struct amdgpu_ring *ring)
    3388             : {
    3389             :         u32 tmp;
    3390             : 
    3391           0 :         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
    3392           0 :         if (ring->use_doorbell) {
    3393           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3394             :                                     DOORBELL_OFFSET, ring->doorbell_index);
    3395           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3396             :                                     DOORBELL_EN, 1);
    3397             :         } else {
    3398           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3399             :                                     DOORBELL_EN, 0);
    3400             :         }
    3401           0 :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
    3402             : 
    3403           0 :         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
    3404             :                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
    3405           0 :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
    3406             : 
    3407           0 :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
    3408             :                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
    3409           0 : }
    3410             : 
    3411           0 : static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
    3412             : {
    3413             :         struct amdgpu_ring *ring;
    3414             :         u32 tmp;
    3415             :         u32 rb_bufsz;
    3416             :         u64 rb_addr, rptr_addr, wptr_gpu_addr;
    3417             :         u32 i;
    3418             : 
    3419             :         /* Set the write pointer delay */
    3420           0 :         WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
    3421             : 
    3422             :         /* set the RB to use vmid 0 */
    3423           0 :         WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
    3424             : 
    3425             :         /* Init gfx ring 0 for pipe 0 */
    3426           0 :         mutex_lock(&adev->srbm_mutex);
    3427           0 :         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
    3428             : 
    3429             :         /* Set ring buffer size */
    3430           0 :         ring = &adev->gfx.gfx_ring[0];
    3431           0 :         rb_bufsz = order_base_2(ring->ring_size / 8);
    3432           0 :         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
    3433           0 :         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
    3434           0 :         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
    3435             : 
    3436             :         /* Initialize the ring buffer's write pointers */
    3437           0 :         ring->wptr = 0;
    3438           0 :         WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
    3439           0 :         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
    3440             : 
    3441             :         /* set the wb address wether it's enabled or not */
    3442           0 :         rptr_addr = ring->rptr_gpu_addr;
    3443           0 :         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
    3444           0 :         WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
    3445             :                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
    3446             : 
    3447           0 :         wptr_gpu_addr = ring->wptr_gpu_addr;
    3448           0 :         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
    3449             :                      lower_32_bits(wptr_gpu_addr));
    3450           0 :         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
    3451             :                      upper_32_bits(wptr_gpu_addr));
    3452             : 
    3453           0 :         mdelay(1);
    3454           0 :         WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
    3455             : 
    3456           0 :         rb_addr = ring->gpu_addr >> 8;
    3457           0 :         WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
    3458           0 :         WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
    3459             : 
    3460           0 :         WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
    3461             : 
    3462           0 :         gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
    3463           0 :         mutex_unlock(&adev->srbm_mutex);
    3464             : 
    3465             :         /* Init gfx ring 1 for pipe 1 */
    3466           0 :         if (adev->gfx.num_gfx_rings > 1) {
    3467           0 :                 mutex_lock(&adev->srbm_mutex);
    3468           0 :                 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
    3469             :                 /* maximum supported gfx ring is 2 */
    3470           0 :                 ring = &adev->gfx.gfx_ring[1];
    3471           0 :                 rb_bufsz = order_base_2(ring->ring_size / 8);
    3472           0 :                 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
    3473           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
    3474           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
    3475             :                 /* Initialize the ring buffer's write pointers */
    3476           0 :                 ring->wptr = 0;
    3477           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
    3478           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
    3479             :                 /* Set the wb address wether it's enabled or not */
    3480           0 :                 rptr_addr = ring->rptr_gpu_addr;
    3481           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
    3482           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
    3483             :                              CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
    3484           0 :                 wptr_gpu_addr = ring->wptr_gpu_addr;
    3485           0 :                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
    3486             :                              lower_32_bits(wptr_gpu_addr));
    3487           0 :                 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
    3488             :                              upper_32_bits(wptr_gpu_addr));
    3489             : 
    3490           0 :                 mdelay(1);
    3491           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
    3492             : 
    3493           0 :                 rb_addr = ring->gpu_addr >> 8;
    3494           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
    3495           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
    3496           0 :                 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
    3497             : 
    3498           0 :                 gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
    3499           0 :                 mutex_unlock(&adev->srbm_mutex);
    3500             :         }
    3501             :         /* Switch to pipe 0 */
    3502           0 :         mutex_lock(&adev->srbm_mutex);
    3503           0 :         gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
    3504           0 :         mutex_unlock(&adev->srbm_mutex);
    3505             : 
    3506             :         /* start the ring */
    3507           0 :         gfx_v11_0_cp_gfx_start(adev);
    3508             : 
    3509           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    3510           0 :                 ring = &adev->gfx.gfx_ring[i];
    3511           0 :                 ring->sched.ready = true;
    3512             :         }
    3513             : 
    3514           0 :         return 0;
    3515             : }
    3516             : 
    3517           0 : static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
    3518             : {
    3519             :         u32 data;
    3520             : 
    3521           0 :         if (adev->gfx.rs64_enable) {
    3522           0 :                 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
    3523           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
    3524             :                                                          enable ? 0 : 1);
    3525           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
    3526             :                                                          enable ? 0 : 1);
    3527           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
    3528             :                                                          enable ? 0 : 1);
    3529           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
    3530             :                                                          enable ? 0 : 1);
    3531           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
    3532             :                                                          enable ? 0 : 1);
    3533           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
    3534             :                                                          enable ? 1 : 0);
    3535           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
    3536             :                                                          enable ? 1 : 0);
    3537           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
    3538             :                                                          enable ? 1 : 0);
    3539           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
    3540             :                                                          enable ? 1 : 0);
    3541           0 :                 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
    3542             :                                                          enable ? 0 : 1);
    3543           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
    3544             :         } else {
    3545           0 :                 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
    3546             : 
    3547           0 :                 if (enable) {
    3548           0 :                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
    3549           0 :                         if (!adev->enable_mes_kiq)
    3550           0 :                                 data = REG_SET_FIELD(data, CP_MEC_CNTL,
    3551             :                                                      MEC_ME2_HALT, 0);
    3552             :                 } else {
    3553           0 :                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
    3554           0 :                         data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
    3555             :                 }
    3556           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
    3557             :         }
    3558             : 
    3559           0 :         adev->gfx.kiq.ring.sched.ready = enable;
    3560             : 
    3561           0 :         udelay(50);
    3562           0 : }
    3563             : 
    3564           0 : static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
    3565             : {
    3566             :         const struct gfx_firmware_header_v1_0 *mec_hdr;
    3567             :         const __le32 *fw_data;
    3568             :         unsigned i, fw_size;
    3569           0 :         u32 *fw = NULL;
    3570             :         int r;
    3571             : 
    3572           0 :         if (!adev->gfx.mec_fw)
    3573             :                 return -EINVAL;
    3574             : 
    3575           0 :         gfx_v11_0_cp_compute_enable(adev, false);
    3576             : 
    3577           0 :         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
    3578           0 :         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
    3579             : 
    3580           0 :         fw_data = (const __le32 *)
    3581           0 :                 (adev->gfx.mec_fw->data +
    3582           0 :                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
    3583           0 :         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
    3584             : 
    3585           0 :         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
    3586             :                                           PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
    3587             :                                           &adev->gfx.mec.mec_fw_obj,
    3588             :                                           &adev->gfx.mec.mec_fw_gpu_addr,
    3589             :                                           (void **)&fw);
    3590           0 :         if (r) {
    3591           0 :                 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
    3592           0 :                 gfx_v11_0_mec_fini(adev);
    3593           0 :                 return r;
    3594             :         }
    3595             : 
    3596           0 :         memcpy(fw, fw_data, fw_size);
    3597             :         
    3598           0 :         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
    3599           0 :         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
    3600             : 
    3601           0 :         gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
    3602             : 
    3603             :         /* MEC1 */
    3604           0 :         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
    3605             : 
    3606           0 :         for (i = 0; i < mec_hdr->jt_size; i++)
    3607           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
    3608             :                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
    3609             : 
    3610           0 :         WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
    3611             : 
    3612             :         return 0;
    3613             : }
    3614             : 
    3615           0 : static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
    3616             : {
    3617             :         const struct gfx_firmware_header_v2_0 *mec_hdr;
    3618             :         const __le32 *fw_ucode, *fw_data;
    3619             :         u32 tmp, fw_ucode_size, fw_data_size;
    3620           0 :         u32 i, usec_timeout = 50000; /* Wait for 50 ms */
    3621             :         u32 *fw_ucode_ptr, *fw_data_ptr;
    3622             :         int r;
    3623             : 
    3624           0 :         if (!adev->gfx.mec_fw)
    3625             :                 return -EINVAL;
    3626             : 
    3627           0 :         gfx_v11_0_cp_compute_enable(adev, false);
    3628             : 
    3629           0 :         mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
    3630           0 :         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
    3631             : 
    3632           0 :         fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
    3633           0 :                                 le32_to_cpu(mec_hdr->ucode_offset_bytes));
    3634           0 :         fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
    3635             : 
    3636           0 :         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
    3637           0 :                                 le32_to_cpu(mec_hdr->data_offset_bytes));
    3638           0 :         fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
    3639             : 
    3640           0 :         r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
    3641             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    3642             :                                       &adev->gfx.mec.mec_fw_obj,
    3643             :                                       &adev->gfx.mec.mec_fw_gpu_addr,
    3644             :                                       (void **)&fw_ucode_ptr);
    3645           0 :         if (r) {
    3646           0 :                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
    3647           0 :                 gfx_v11_0_mec_fini(adev);
    3648           0 :                 return r;
    3649             :         }
    3650             : 
    3651           0 :         r = amdgpu_bo_create_reserved(adev, fw_data_size,
    3652             :                                       64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
    3653             :                                       &adev->gfx.mec.mec_fw_data_obj,
    3654             :                                       &adev->gfx.mec.mec_fw_data_gpu_addr,
    3655             :                                       (void **)&fw_data_ptr);
    3656           0 :         if (r) {
    3657           0 :                 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
    3658           0 :                 gfx_v11_0_mec_fini(adev);
    3659           0 :                 return r;
    3660             :         }
    3661             : 
    3662           0 :         memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
    3663           0 :         memcpy(fw_data_ptr, fw_data, fw_data_size);
    3664             : 
    3665           0 :         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
    3666           0 :         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
    3667           0 :         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
    3668           0 :         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
    3669             : 
    3670           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
    3671           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
    3672           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
    3673           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
    3674           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
    3675             : 
    3676           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
    3677           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
    3678           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
    3679           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
    3680             : 
    3681           0 :         mutex_lock(&adev->srbm_mutex);
    3682           0 :         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
    3683           0 :                 soc21_grbm_select(adev, 1, i, 0, 0);
    3684             : 
    3685           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
    3686           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
    3687             :                      upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
    3688             : 
    3689           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
    3690             :                                         mec_hdr->ucode_start_addr_lo >> 2 |
    3691             :                                         mec_hdr->ucode_start_addr_hi << 30);
    3692           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
    3693             :                                         mec_hdr->ucode_start_addr_hi >> 2);
    3694             : 
    3695           0 :                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
    3696           0 :                 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
    3697             :                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
    3698             :         }
    3699           0 :         mutex_unlock(&adev->srbm_mutex);
    3700           0 :         soc21_grbm_select(adev, 0, 0, 0, 0);
    3701             : 
    3702             :         /* Trigger an invalidation of the L1 instruction caches */
    3703           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
    3704           0 :         tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
    3705           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
    3706             : 
    3707             :         /* Wait for invalidation complete */
    3708           0 :         for (i = 0; i < usec_timeout; i++) {
    3709           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
    3710           0 :                 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
    3711             :                                        INVALIDATE_DCACHE_COMPLETE))
    3712             :                         break;
    3713           0 :                 udelay(1);
    3714             :         }
    3715             : 
    3716           0 :         if (i >= usec_timeout) {
    3717           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    3718           0 :                 return -EINVAL;
    3719             :         }
    3720             : 
    3721             :         /* Trigger an invalidation of the L1 instruction caches */
    3722           0 :         tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    3723           0 :         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
    3724           0 :         WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
    3725             : 
    3726             :         /* Wait for invalidation complete */
    3727           0 :         for (i = 0; i < usec_timeout; i++) {
    3728           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
    3729           0 :                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
    3730             :                                        INVALIDATE_CACHE_COMPLETE))
    3731             :                         break;
    3732           0 :                 udelay(1);
    3733             :         }
    3734             : 
    3735           0 :         if (i >= usec_timeout) {
    3736           0 :                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
    3737           0 :                 return -EINVAL;
    3738             :         }
    3739             : 
    3740             :         return 0;
    3741             : }
    3742             : 
    3743           0 : static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
    3744             : {
    3745             :         uint32_t tmp;
    3746           0 :         struct amdgpu_device *adev = ring->adev;
    3747             : 
    3748             :         /* tell RLC which is KIQ queue */
    3749           0 :         tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
    3750           0 :         tmp &= 0xffffff00;
    3751           0 :         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
    3752           0 :         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
    3753           0 :         tmp |= 0x80;
    3754           0 :         WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
    3755           0 : }
    3756             : 
    3757           0 : static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
    3758             : {
    3759             :         /* set graphics engine doorbell range */
    3760           0 :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
    3761             :                      (adev->doorbell_index.gfx_ring0 * 2) << 2);
    3762           0 :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
    3763             :                      (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
    3764             : 
    3765             :         /* set compute engine doorbell range */
    3766           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
    3767             :                      (adev->doorbell_index.kiq * 2) << 2);
    3768           0 :         WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
    3769             :                      (adev->doorbell_index.userqueue_end * 2) << 2);
    3770           0 : }
    3771             : 
    3772           0 : static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
    3773             :                                   struct amdgpu_mqd_prop *prop)
    3774             : {
    3775           0 :         struct v11_gfx_mqd *mqd = m;
    3776             :         uint64_t hqd_gpu_addr, wb_gpu_addr;
    3777             :         uint32_t tmp;
    3778             :         uint32_t rb_bufsz;
    3779             : 
    3780             :         /* set up gfx hqd wptr */
    3781           0 :         mqd->cp_gfx_hqd_wptr = 0;
    3782           0 :         mqd->cp_gfx_hqd_wptr_hi = 0;
    3783             : 
    3784             :         /* set the pointer to the MQD */
    3785           0 :         mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
    3786           0 :         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
    3787             : 
    3788             :         /* set up mqd control */
    3789           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
    3790           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
    3791           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
    3792           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
    3793           0 :         mqd->cp_gfx_mqd_control = tmp;
    3794             : 
    3795             :         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
    3796           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
    3797           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
    3798           0 :         mqd->cp_gfx_hqd_vmid = 0;
    3799             : 
    3800             :         /* set up default queue priority level
    3801             :          * 0x0 = low priority, 0x1 = high priority */
    3802           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
    3803           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
    3804           0 :         mqd->cp_gfx_hqd_queue_priority = tmp;
    3805             : 
    3806             :         /* set up time quantum */
    3807           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
    3808           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
    3809           0 :         mqd->cp_gfx_hqd_quantum = tmp;
    3810             : 
    3811             :         /* set up gfx hqd base. this is similar as CP_RB_BASE */
    3812           0 :         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
    3813           0 :         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
    3814           0 :         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
    3815             : 
    3816             :         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
    3817           0 :         wb_gpu_addr = prop->rptr_gpu_addr;
    3818           0 :         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
    3819           0 :         mqd->cp_gfx_hqd_rptr_addr_hi =
    3820           0 :                 upper_32_bits(wb_gpu_addr) & 0xffff;
    3821             : 
    3822             :         /* set up rb_wptr_poll addr */
    3823           0 :         wb_gpu_addr = prop->wptr_gpu_addr;
    3824           0 :         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
    3825           0 :         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
    3826             : 
    3827             :         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
    3828           0 :         rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
    3829           0 :         tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
    3830           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
    3831           0 :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
    3832             : #ifdef __BIG_ENDIAN
    3833             :         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
    3834             : #endif
    3835           0 :         mqd->cp_gfx_hqd_cntl = tmp;
    3836             : 
    3837             :         /* set up cp_doorbell_control */
    3838           0 :         tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
    3839           0 :         if (prop->use_doorbell) {
    3840           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3841             :                                     DOORBELL_OFFSET, prop->doorbell_index);
    3842           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3843             :                                     DOORBELL_EN, 1);
    3844             :         } else
    3845           0 :                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
    3846             :                                     DOORBELL_EN, 0);
    3847           0 :         mqd->cp_rb_doorbell_control = tmp;
    3848             : 
    3849             :         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
    3850           0 :         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
    3851             : 
    3852             :         /* active the queue */
    3853           0 :         mqd->cp_gfx_hqd_active = 1;
    3854             : 
    3855           0 :         return 0;
    3856             : }
    3857             : 
    3858             : #ifdef BRING_UP_DEBUG
    3859             : static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring)
    3860             : {
    3861             :         struct amdgpu_device *adev = ring->adev;
    3862             :         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
    3863             : 
    3864             :         /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
    3865             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
    3866             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
    3867             : 
    3868             :         /* set GFX_MQD_BASE */
    3869             :         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
    3870             :         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
    3871             : 
    3872             :         /* set GFX_MQD_CONTROL */
    3873             :         WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
    3874             : 
    3875             :         /* set GFX_HQD_VMID to 0 */
    3876             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
    3877             : 
    3878             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY,
    3879             :                         mqd->cp_gfx_hqd_queue_priority);
    3880             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
    3881             : 
    3882             :         /* set GFX_HQD_BASE, similar as CP_RB_BASE */
    3883             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
    3884             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
    3885             : 
    3886             :         /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
    3887             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
    3888             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
    3889             : 
    3890             :         /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
    3891             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
    3892             : 
    3893             :         /* set RB_WPTR_POLL_ADDR */
    3894             :         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
    3895             :         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
    3896             : 
    3897             :         /* set RB_DOORBELL_CONTROL */
    3898             :         WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
    3899             : 
    3900             :         /* active the queue */
    3901             :         WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
    3902             : 
    3903             :         return 0;
    3904             : }
    3905             : #endif
    3906             : 
    3907           0 : static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
    3908             : {
    3909           0 :         struct amdgpu_device *adev = ring->adev;
    3910           0 :         struct v11_gfx_mqd *mqd = ring->mqd_ptr;
    3911           0 :         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
    3912             : 
    3913           0 :         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
    3914           0 :                 memset((void *)mqd, 0, sizeof(*mqd));
    3915           0 :                 mutex_lock(&adev->srbm_mutex);
    3916           0 :                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
    3917           0 :                 amdgpu_ring_init_mqd(ring);
    3918             : #ifdef BRING_UP_DEBUG
    3919             :                 gfx_v11_0_gfx_queue_init_register(ring);
    3920             : #endif
    3921           0 :                 soc21_grbm_select(adev, 0, 0, 0, 0);
    3922           0 :                 mutex_unlock(&adev->srbm_mutex);
    3923           0 :                 if (adev->gfx.me.mqd_backup[mqd_idx])
    3924           0 :                         memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
    3925           0 :         } else if (amdgpu_in_reset(adev)) {
    3926             :                 /* reset mqd with the backup copy */
    3927           0 :                 if (adev->gfx.me.mqd_backup[mqd_idx])
    3928           0 :                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
    3929             :                 /* reset the ring */
    3930           0 :                 ring->wptr = 0;
    3931           0 :                 *ring->wptr_cpu_addr = 0;
    3932             :                 amdgpu_ring_clear_ring(ring);
    3933             : #ifdef BRING_UP_DEBUG
    3934             :                 mutex_lock(&adev->srbm_mutex);
    3935             :                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
    3936             :                 gfx_v11_0_gfx_queue_init_register(ring);
    3937             :                 soc21_grbm_select(adev, 0, 0, 0, 0);
    3938             :                 mutex_unlock(&adev->srbm_mutex);
    3939             : #endif
    3940             :         } else {
    3941             :                 amdgpu_ring_clear_ring(ring);
    3942             :         }
    3943             : 
    3944           0 :         return 0;
    3945             : }
    3946             : 
    3947             : #ifndef BRING_UP_DEBUG
    3948           0 : static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev)
    3949             : {
    3950           0 :         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    3951           0 :         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
    3952             :         int r, i;
    3953             : 
    3954           0 :         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
    3955             :                 return -EINVAL;
    3956             : 
    3957           0 :         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
    3958           0 :                                         adev->gfx.num_gfx_rings);
    3959           0 :         if (r) {
    3960           0 :                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
    3961           0 :                 return r;
    3962             :         }
    3963             : 
    3964           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
    3965           0 :                 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
    3966             : 
    3967           0 :         return amdgpu_ring_test_helper(kiq_ring);
    3968             : }
    3969             : #endif
    3970             : 
    3971           0 : static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
    3972             : {
    3973             :         int r, i;
    3974             :         struct amdgpu_ring *ring;
    3975             : 
    3976           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    3977           0 :                 ring = &adev->gfx.gfx_ring[i];
    3978             : 
    3979           0 :                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
    3980           0 :                 if (unlikely(r != 0))
    3981             :                         goto done;
    3982             : 
    3983           0 :                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
    3984           0 :                 if (!r) {
    3985           0 :                         r = gfx_v11_0_gfx_init_queue(ring);
    3986           0 :                         amdgpu_bo_kunmap(ring->mqd_obj);
    3987           0 :                         ring->mqd_ptr = NULL;
    3988             :                 }
    3989           0 :                 amdgpu_bo_unreserve(ring->mqd_obj);
    3990           0 :                 if (r)
    3991             :                         goto done;
    3992             :         }
    3993             : #ifndef BRING_UP_DEBUG
    3994           0 :         r = gfx_v11_0_kiq_enable_kgq(adev);
    3995           0 :         if (r)
    3996             :                 goto done;
    3997             : #endif
    3998           0 :         r = gfx_v11_0_cp_gfx_start(adev);
    3999           0 :         if (r)
    4000             :                 goto done;
    4001             : 
    4002           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    4003           0 :                 ring = &adev->gfx.gfx_ring[i];
    4004           0 :                 ring->sched.ready = true;
    4005             :         }
    4006             : done:
    4007           0 :         return r;
    4008             : }
    4009             : 
    4010           0 : static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
    4011             :                                       struct amdgpu_mqd_prop *prop)
    4012             : {
    4013           0 :         struct v11_compute_mqd *mqd = m;
    4014             :         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
    4015             :         uint32_t tmp;
    4016             : 
    4017           0 :         mqd->header = 0xC0310800;
    4018           0 :         mqd->compute_pipelinestat_enable = 0x00000001;
    4019           0 :         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
    4020           0 :         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
    4021           0 :         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
    4022           0 :         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
    4023           0 :         mqd->compute_misc_reserved = 0x00000007;
    4024             : 
    4025           0 :         eop_base_addr = prop->eop_gpu_addr >> 8;
    4026           0 :         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
    4027           0 :         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
    4028             : 
    4029             :         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
    4030           0 :         tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
    4031           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
    4032             :                         (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
    4033             : 
    4034           0 :         mqd->cp_hqd_eop_control = tmp;
    4035             : 
    4036             :         /* enable doorbell? */
    4037           0 :         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
    4038             : 
    4039           0 :         if (prop->use_doorbell) {
    4040           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4041             :                                     DOORBELL_OFFSET, prop->doorbell_index);
    4042           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4043             :                                     DOORBELL_EN, 1);
    4044           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4045             :                                     DOORBELL_SOURCE, 0);
    4046           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4047             :                                     DOORBELL_HIT, 0);
    4048             :         } else {
    4049           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4050             :                                     DOORBELL_EN, 0);
    4051             :         }
    4052             : 
    4053           0 :         mqd->cp_hqd_pq_doorbell_control = tmp;
    4054             : 
    4055             :         /* disable the queue if it's active */
    4056           0 :         mqd->cp_hqd_dequeue_request = 0;
    4057           0 :         mqd->cp_hqd_pq_rptr = 0;
    4058           0 :         mqd->cp_hqd_pq_wptr_lo = 0;
    4059           0 :         mqd->cp_hqd_pq_wptr_hi = 0;
    4060             : 
    4061             :         /* set the pointer to the MQD */
    4062           0 :         mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
    4063           0 :         mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
    4064             : 
    4065             :         /* set MQD vmid to 0 */
    4066           0 :         tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
    4067           0 :         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
    4068           0 :         mqd->cp_mqd_control = tmp;
    4069             : 
    4070             :         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
    4071           0 :         hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
    4072           0 :         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
    4073           0 :         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
    4074             : 
    4075             :         /* set up the HQD, this is similar to CP_RB0_CNTL */
    4076           0 :         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
    4077           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
    4078             :                             (order_base_2(prop->queue_size / 4) - 1));
    4079           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
    4080             :                             (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
    4081           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
    4082           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
    4083           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
    4084           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
    4085           0 :         mqd->cp_hqd_pq_control = tmp;
    4086             : 
    4087             :         /* set the wb address whether it's enabled or not */
    4088           0 :         wb_gpu_addr = prop->rptr_gpu_addr;
    4089           0 :         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
    4090           0 :         mqd->cp_hqd_pq_rptr_report_addr_hi =
    4091           0 :                 upper_32_bits(wb_gpu_addr) & 0xffff;
    4092             : 
    4093             :         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
    4094           0 :         wb_gpu_addr = prop->wptr_gpu_addr;
    4095           0 :         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
    4096           0 :         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
    4097             : 
    4098           0 :         tmp = 0;
    4099             :         /* enable the doorbell if requested */
    4100           0 :         if (prop->use_doorbell) {
    4101           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
    4102           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4103             :                                 DOORBELL_OFFSET, prop->doorbell_index);
    4104             : 
    4105           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4106             :                                     DOORBELL_EN, 1);
    4107           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4108             :                                     DOORBELL_SOURCE, 0);
    4109           0 :                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
    4110             :                                     DOORBELL_HIT, 0);
    4111             :         }
    4112             : 
    4113           0 :         mqd->cp_hqd_pq_doorbell_control = tmp;
    4114             : 
    4115             :         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
    4116           0 :         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
    4117             : 
    4118             :         /* set the vmid for the queue */
    4119           0 :         mqd->cp_hqd_vmid = 0;
    4120             : 
    4121           0 :         tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
    4122           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
    4123           0 :         mqd->cp_hqd_persistent_state = tmp;
    4124             : 
    4125             :         /* set MIN_IB_AVAIL_SIZE */
    4126           0 :         tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
    4127           0 :         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
    4128           0 :         mqd->cp_hqd_ib_control = tmp;
    4129             : 
    4130             :         /* set static priority for a compute queue/ring */
    4131           0 :         mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
    4132           0 :         mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
    4133             : 
    4134           0 :         mqd->cp_hqd_active = prop->hqd_active;
    4135             : 
    4136           0 :         return 0;
    4137             : }
    4138             : 
    4139           0 : static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
    4140             : {
    4141           0 :         struct amdgpu_device *adev = ring->adev;
    4142           0 :         struct v11_compute_mqd *mqd = ring->mqd_ptr;
    4143             :         int j;
    4144             : 
    4145             :         /* inactivate the queue */
    4146           0 :         if (amdgpu_sriov_vf(adev))
    4147           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
    4148             : 
    4149             :         /* disable wptr polling */
    4150           0 :         WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
    4151             : 
    4152             :         /* write the EOP addr */
    4153           0 :         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
    4154             :                mqd->cp_hqd_eop_base_addr_lo);
    4155           0 :         WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
    4156             :                mqd->cp_hqd_eop_base_addr_hi);
    4157             : 
    4158             :         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
    4159           0 :         WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
    4160             :                mqd->cp_hqd_eop_control);
    4161             : 
    4162             :         /* enable doorbell? */
    4163           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
    4164             :                mqd->cp_hqd_pq_doorbell_control);
    4165             : 
    4166             :         /* disable the queue if it's active */
    4167           0 :         if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
    4168           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
    4169           0 :                 for (j = 0; j < adev->usec_timeout; j++) {
    4170           0 :                         if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
    4171             :                                 break;
    4172           0 :                         udelay(1);
    4173             :                 }
    4174           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
    4175             :                        mqd->cp_hqd_dequeue_request);
    4176           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
    4177             :                        mqd->cp_hqd_pq_rptr);
    4178           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
    4179             :                        mqd->cp_hqd_pq_wptr_lo);
    4180           0 :                 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
    4181             :                        mqd->cp_hqd_pq_wptr_hi);
    4182             :         }
    4183             : 
    4184             :         /* set the pointer to the MQD */
    4185           0 :         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
    4186             :                mqd->cp_mqd_base_addr_lo);
    4187           0 :         WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
    4188             :                mqd->cp_mqd_base_addr_hi);
    4189             : 
    4190             :         /* set MQD vmid to 0 */
    4191           0 :         WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
    4192             :                mqd->cp_mqd_control);
    4193             : 
    4194             :         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
    4195           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
    4196             :                mqd->cp_hqd_pq_base_lo);
    4197           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
    4198             :                mqd->cp_hqd_pq_base_hi);
    4199             : 
    4200             :         /* set up the HQD, this is similar to CP_RB0_CNTL */
    4201           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
    4202             :                mqd->cp_hqd_pq_control);
    4203             : 
    4204             :         /* set the wb address whether it's enabled or not */
    4205           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
    4206             :                 mqd->cp_hqd_pq_rptr_report_addr_lo);
    4207           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
    4208             :                 mqd->cp_hqd_pq_rptr_report_addr_hi);
    4209             : 
    4210             :         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
    4211           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
    4212             :                mqd->cp_hqd_pq_wptr_poll_addr_lo);
    4213           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
    4214             :                mqd->cp_hqd_pq_wptr_poll_addr_hi);
    4215             : 
    4216             :         /* enable the doorbell if requested */
    4217           0 :         if (ring->use_doorbell) {
    4218           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
    4219             :                         (adev->doorbell_index.kiq * 2) << 2);
    4220           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
    4221             :                         (adev->doorbell_index.userqueue_end * 2) << 2);
    4222             :         }
    4223             : 
    4224           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
    4225             :                mqd->cp_hqd_pq_doorbell_control);
    4226             : 
    4227             :         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
    4228           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
    4229             :                mqd->cp_hqd_pq_wptr_lo);
    4230           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
    4231             :                mqd->cp_hqd_pq_wptr_hi);
    4232             : 
    4233             :         /* set the vmid for the queue */
    4234           0 :         WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
    4235             : 
    4236           0 :         WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
    4237             :                mqd->cp_hqd_persistent_state);
    4238             : 
    4239             :         /* activate the queue */
    4240           0 :         WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
    4241             :                mqd->cp_hqd_active);
    4242             : 
    4243           0 :         if (ring->use_doorbell)
    4244           0 :                 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
    4245             : 
    4246           0 :         return 0;
    4247             : }
    4248             : 
    4249           0 : static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
    4250             : {
    4251           0 :         struct amdgpu_device *adev = ring->adev;
    4252           0 :         struct v11_compute_mqd *mqd = ring->mqd_ptr;
    4253           0 :         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
    4254             : 
    4255           0 :         gfx_v11_0_kiq_setting(ring);
    4256             : 
    4257           0 :         if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
    4258             :                 /* reset MQD to a clean status */
    4259           0 :                 if (adev->gfx.mec.mqd_backup[mqd_idx])
    4260           0 :                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
    4261             : 
    4262             :                 /* reset ring buffer */
    4263           0 :                 ring->wptr = 0;
    4264           0 :                 amdgpu_ring_clear_ring(ring);
    4265             : 
    4266           0 :                 mutex_lock(&adev->srbm_mutex);
    4267           0 :                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
    4268           0 :                 gfx_v11_0_kiq_init_register(ring);
    4269           0 :                 soc21_grbm_select(adev, 0, 0, 0, 0);
    4270           0 :                 mutex_unlock(&adev->srbm_mutex);
    4271             :         } else {
    4272           0 :                 memset((void *)mqd, 0, sizeof(*mqd));
    4273           0 :                 mutex_lock(&adev->srbm_mutex);
    4274           0 :                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
    4275           0 :                 amdgpu_ring_init_mqd(ring);
    4276           0 :                 gfx_v11_0_kiq_init_register(ring);
    4277           0 :                 soc21_grbm_select(adev, 0, 0, 0, 0);
    4278           0 :                 mutex_unlock(&adev->srbm_mutex);
    4279             : 
    4280           0 :                 if (adev->gfx.mec.mqd_backup[mqd_idx])
    4281           0 :                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
    4282             :         }
    4283             : 
    4284           0 :         return 0;
    4285             : }
    4286             : 
    4287           0 : static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
    4288             : {
    4289           0 :         struct amdgpu_device *adev = ring->adev;
    4290           0 :         struct v11_compute_mqd *mqd = ring->mqd_ptr;
    4291           0 :         int mqd_idx = ring - &adev->gfx.compute_ring[0];
    4292             : 
    4293           0 :         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
    4294           0 :                 memset((void *)mqd, 0, sizeof(*mqd));
    4295           0 :                 mutex_lock(&adev->srbm_mutex);
    4296           0 :                 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
    4297           0 :                 amdgpu_ring_init_mqd(ring);
    4298           0 :                 soc21_grbm_select(adev, 0, 0, 0, 0);
    4299           0 :                 mutex_unlock(&adev->srbm_mutex);
    4300             : 
    4301           0 :                 if (adev->gfx.mec.mqd_backup[mqd_idx])
    4302           0 :                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
    4303           0 :         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
    4304             :                 /* reset MQD to a clean status */
    4305           0 :                 if (adev->gfx.mec.mqd_backup[mqd_idx])
    4306           0 :                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
    4307             : 
    4308             :                 /* reset ring buffer */
    4309           0 :                 ring->wptr = 0;
    4310           0 :                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
    4311             :                 amdgpu_ring_clear_ring(ring);
    4312             :         } else {
    4313             :                 amdgpu_ring_clear_ring(ring);
    4314             :         }
    4315             : 
    4316           0 :         return 0;
    4317             : }
    4318             : 
    4319           0 : static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
    4320             : {
    4321             :         struct amdgpu_ring *ring;
    4322             :         int r;
    4323             : 
    4324           0 :         ring = &adev->gfx.kiq.ring;
    4325             : 
    4326           0 :         r = amdgpu_bo_reserve(ring->mqd_obj, false);
    4327           0 :         if (unlikely(r != 0))
    4328             :                 return r;
    4329             : 
    4330           0 :         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
    4331           0 :         if (unlikely(r != 0)) {
    4332           0 :                 amdgpu_bo_unreserve(ring->mqd_obj);
    4333           0 :                 return r;
    4334             :         }
    4335             : 
    4336           0 :         gfx_v11_0_kiq_init_queue(ring);
    4337           0 :         amdgpu_bo_kunmap(ring->mqd_obj);
    4338           0 :         ring->mqd_ptr = NULL;
    4339           0 :         amdgpu_bo_unreserve(ring->mqd_obj);
    4340           0 :         ring->sched.ready = true;
    4341           0 :         return 0;
    4342             : }
    4343             : 
    4344           0 : static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
    4345             : {
    4346           0 :         struct amdgpu_ring *ring = NULL;
    4347           0 :         int r = 0, i;
    4348             : 
    4349           0 :         if (!amdgpu_async_gfx_ring)
    4350           0 :                 gfx_v11_0_cp_compute_enable(adev, true);
    4351             : 
    4352           0 :         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    4353           0 :                 ring = &adev->gfx.compute_ring[i];
    4354             : 
    4355           0 :                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
    4356           0 :                 if (unlikely(r != 0))
    4357             :                         goto done;
    4358           0 :                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
    4359           0 :                 if (!r) {
    4360           0 :                         r = gfx_v11_0_kcq_init_queue(ring);
    4361           0 :                         amdgpu_bo_kunmap(ring->mqd_obj);
    4362           0 :                         ring->mqd_ptr = NULL;
    4363             :                 }
    4364           0 :                 amdgpu_bo_unreserve(ring->mqd_obj);
    4365           0 :                 if (r)
    4366             :                         goto done;
    4367             :         }
    4368             : 
    4369           0 :         r = amdgpu_gfx_enable_kcq(adev);
    4370             : done:
    4371           0 :         return r;
    4372             : }
    4373             : 
    4374           0 : static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
    4375             : {
    4376             :         int r, i;
    4377             :         struct amdgpu_ring *ring;
    4378             : 
    4379           0 :         if (!(adev->flags & AMD_IS_APU))
    4380           0 :                 gfx_v11_0_enable_gui_idle_interrupt(adev, false);
    4381             : 
    4382           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
    4383             :                 /* legacy firmware loading */
    4384           0 :                 r = gfx_v11_0_cp_gfx_load_microcode(adev);
    4385           0 :                 if (r)
    4386             :                         return r;
    4387             : 
    4388           0 :                 if (adev->gfx.rs64_enable)
    4389           0 :                         r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
    4390             :                 else
    4391           0 :                         r = gfx_v11_0_cp_compute_load_microcode(adev);
    4392           0 :                 if (r)
    4393             :                         return r;
    4394             :         }
    4395             : 
    4396           0 :         gfx_v11_0_cp_set_doorbell_range(adev);
    4397             : 
    4398           0 :         if (amdgpu_async_gfx_ring) {
    4399           0 :                 gfx_v11_0_cp_compute_enable(adev, true);
    4400           0 :                 gfx_v11_0_cp_gfx_enable(adev, true);
    4401             :         }
    4402             : 
    4403           0 :         if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
    4404           0 :                 r = amdgpu_mes_kiq_hw_init(adev);
    4405             :         else
    4406           0 :                 r = gfx_v11_0_kiq_resume(adev);
    4407           0 :         if (r)
    4408             :                 return r;
    4409             : 
    4410           0 :         r = gfx_v11_0_kcq_resume(adev);
    4411           0 :         if (r)
    4412             :                 return r;
    4413             : 
    4414           0 :         if (!amdgpu_async_gfx_ring) {
    4415           0 :                 r = gfx_v11_0_cp_gfx_resume(adev);
    4416           0 :                 if (r)
    4417             :                         return r;
    4418             :         } else {
    4419           0 :                 r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
    4420           0 :                 if (r)
    4421             :                         return r;
    4422             :         }
    4423             : 
    4424           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    4425           0 :                 ring = &adev->gfx.gfx_ring[i];
    4426           0 :                 r = amdgpu_ring_test_helper(ring);
    4427           0 :                 if (r)
    4428             :                         return r;
    4429             :         }
    4430             : 
    4431           0 :         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    4432           0 :                 ring = &adev->gfx.compute_ring[i];
    4433           0 :                 r = amdgpu_ring_test_helper(ring);
    4434           0 :                 if (r)
    4435             :                         return r;
    4436             :         }
    4437             : 
    4438             :         return 0;
    4439             : }
    4440             : 
    4441             : static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
    4442             : {
    4443           0 :         gfx_v11_0_cp_gfx_enable(adev, enable);
    4444           0 :         gfx_v11_0_cp_compute_enable(adev, enable);
    4445             : }
    4446             : 
    4447           0 : static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
    4448             : {
    4449             :         int r;
    4450             :         bool value;
    4451             : 
    4452           0 :         r = adev->gfxhub.funcs->gart_enable(adev);
    4453           0 :         if (r)
    4454             :                 return r;
    4455             : 
    4456           0 :         adev->hdp.funcs->flush_hdp(adev, NULL);
    4457             : 
    4458           0 :         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
    4459           0 :                 false : true;
    4460             : 
    4461           0 :         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
    4462           0 :         amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
    4463             : 
    4464           0 :         return 0;
    4465             : }
    4466             : 
    4467           0 : static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
    4468             : {
    4469             :         u32 tmp;
    4470             : 
    4471             :         /* select RS64 */
    4472           0 :         if (adev->gfx.rs64_enable) {
    4473           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
    4474           0 :                 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
    4475           0 :                 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
    4476             : 
    4477           0 :                 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
    4478           0 :                 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
    4479           0 :                 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
    4480             :         }
    4481             : 
    4482           0 :         if (amdgpu_emu_mode == 1)
    4483           0 :                 msleep(100);
    4484           0 : }
    4485             : 
    4486           0 : static int get_gb_addr_config(struct amdgpu_device * adev)
    4487             : {
    4488             :         u32 gb_addr_config;
    4489             : 
    4490           0 :         gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
    4491           0 :         if (gb_addr_config == 0)
    4492             :                 return -EINVAL;
    4493             : 
    4494           0 :         adev->gfx.config.gb_addr_config_fields.num_pkrs =
    4495           0 :                 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
    4496             : 
    4497           0 :         adev->gfx.config.gb_addr_config = gb_addr_config;
    4498             : 
    4499           0 :         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
    4500           0 :                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
    4501             :                                       GB_ADDR_CONFIG, NUM_PIPES);
    4502             : 
    4503           0 :         adev->gfx.config.max_tile_pipes =
    4504           0 :                 adev->gfx.config.gb_addr_config_fields.num_pipes;
    4505             : 
    4506           0 :         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
    4507           0 :                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
    4508             :                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
    4509           0 :         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
    4510           0 :                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
    4511             :                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
    4512           0 :         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
    4513           0 :                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
    4514             :                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
    4515           0 :         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
    4516           0 :                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
    4517             :                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
    4518             : 
    4519           0 :         return 0;
    4520             : }
    4521             : 
    4522           0 : static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
    4523             : {
    4524             :         uint32_t data;
    4525             : 
    4526           0 :         data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
    4527           0 :         data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
    4528           0 :         WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
    4529             : 
    4530           0 :         data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
    4531           0 :         data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
    4532           0 :         WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
    4533           0 : }
    4534             : 
    4535           0 : static int gfx_v11_0_hw_init(void *handle)
    4536             : {
    4537             :         int r;
    4538           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4539             : 
    4540           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
    4541           0 :                 if (adev->gfx.imu.funcs) {
    4542             :                         /* RLC autoload sequence 1: Program rlc ram */
    4543           0 :                         if (adev->gfx.imu.funcs->program_rlc_ram)
    4544           0 :                                 adev->gfx.imu.funcs->program_rlc_ram(adev);
    4545             :                 }
    4546             :                 /* rlc autoload firmware */
    4547           0 :                 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
    4548           0 :                 if (r)
    4549             :                         return r;
    4550             :         } else {
    4551           0 :                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
    4552           0 :                         if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
    4553           0 :                                 if (adev->gfx.imu.funcs->load_microcode)
    4554           0 :                                         adev->gfx.imu.funcs->load_microcode(adev);
    4555           0 :                                 if (adev->gfx.imu.funcs->setup_imu)
    4556           0 :                                         adev->gfx.imu.funcs->setup_imu(adev);
    4557           0 :                                 if (adev->gfx.imu.funcs->start_imu)
    4558           0 :                                         adev->gfx.imu.funcs->start_imu(adev);
    4559             :                         }
    4560             : 
    4561             :                         /* disable gpa mode in backdoor loading */
    4562           0 :                         gfx_v11_0_disable_gpa_mode(adev);
    4563             :                 }
    4564             :         }
    4565             : 
    4566           0 :         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
    4567             :             (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
    4568           0 :                 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
    4569           0 :                 if (r) {
    4570           0 :                         dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
    4571           0 :                         return r;
    4572             :                 }
    4573             :         }
    4574             : 
    4575           0 :         adev->gfx.is_poweron = true;
    4576             : 
    4577           0 :         if(get_gb_addr_config(adev))
    4578           0 :                 DRM_WARN("Invalid gb_addr_config !\n");
    4579             : 
    4580           0 :         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
    4581           0 :             adev->gfx.rs64_enable)
    4582           0 :                 gfx_v11_0_config_gfx_rs64(adev);
    4583             : 
    4584           0 :         r = gfx_v11_0_gfxhub_enable(adev);
    4585           0 :         if (r)
    4586             :                 return r;
    4587             : 
    4588           0 :         if (!amdgpu_emu_mode)
    4589             :                 gfx_v11_0_init_golden_registers(adev);
    4590             : 
    4591           0 :         if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
    4592           0 :             (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
    4593             :                 /**
    4594             :                  * For gfx 11, rlc firmware loading relies on smu firmware is
    4595             :                  * loaded firstly, so in direct type, it has to load smc ucode
    4596             :                  * here before rlc.
    4597             :                  */
    4598           0 :                 if (!(adev->flags & AMD_IS_APU)) {
    4599           0 :                         r = amdgpu_pm_load_smu_firmware(adev, NULL);
    4600           0 :                         if (r)
    4601             :                                 return r;
    4602             :                 }
    4603             :         }
    4604             : 
    4605           0 :         gfx_v11_0_constants_init(adev);
    4606             : 
    4607           0 :         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
    4608           0 :                 gfx_v11_0_select_cp_fw_arch(adev);
    4609             : 
    4610           0 :         if (adev->nbio.funcs->gc_doorbell_init)
    4611           0 :                 adev->nbio.funcs->gc_doorbell_init(adev);
    4612             : 
    4613           0 :         r = gfx_v11_0_rlc_resume(adev);
    4614           0 :         if (r)
    4615             :                 return r;
    4616             : 
    4617             :         /*
    4618             :          * init golden registers and rlc resume may override some registers,
    4619             :          * reconfig them here
    4620             :          */
    4621           0 :         gfx_v11_0_tcp_harvest(adev);
    4622             : 
    4623           0 :         r = gfx_v11_0_cp_resume(adev);
    4624             :         if (r)
    4625             :                 return r;
    4626             : 
    4627             :         return r;
    4628             : }
    4629             : 
    4630             : #ifndef BRING_UP_DEBUG
    4631           0 : static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev)
    4632             : {
    4633           0 :         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    4634           0 :         struct amdgpu_ring *kiq_ring = &kiq->ring;
    4635           0 :         int i, r = 0;
    4636             : 
    4637           0 :         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
    4638             :                 return -EINVAL;
    4639             : 
    4640           0 :         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
    4641           0 :                                         adev->gfx.num_gfx_rings))
    4642             :                 return -ENOMEM;
    4643             : 
    4644           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
    4645           0 :                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
    4646             :                                            PREEMPT_QUEUES, 0, 0);
    4647             : 
    4648           0 :         if (adev->gfx.kiq.ring.sched.ready)
    4649           0 :                 r = amdgpu_ring_test_helper(kiq_ring);
    4650             : 
    4651             :         return r;
    4652             : }
    4653             : #endif
    4654             : 
    4655           0 : static int gfx_v11_0_hw_fini(void *handle)
    4656             : {
    4657           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4658             :         int r;
    4659             :         uint32_t tmp;
    4660             : 
    4661           0 :         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
    4662           0 :         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
    4663             : 
    4664           0 :         if (!adev->no_hw_access) {
    4665             : #ifndef BRING_UP_DEBUG
    4666           0 :                 if (amdgpu_async_gfx_ring) {
    4667           0 :                         r = gfx_v11_0_kiq_disable_kgq(adev);
    4668           0 :                         if (r)
    4669           0 :                                 DRM_ERROR("KGQ disable failed\n");
    4670             :                 }
    4671             : #endif
    4672           0 :                 if (amdgpu_gfx_disable_kcq(adev))
    4673           0 :                         DRM_ERROR("KCQ disable failed\n");
    4674             : 
    4675           0 :                 amdgpu_mes_kiq_hw_fini(adev);
    4676             :         }
    4677             : 
    4678           0 :         if (amdgpu_sriov_vf(adev)) {
    4679           0 :                 gfx_v11_0_cp_gfx_enable(adev, false);
    4680             :                 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
    4681           0 :                 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
    4682           0 :                 tmp &= 0xffffff00;
    4683           0 :                 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
    4684             : 
    4685             :                 return 0;
    4686             :         }
    4687           0 :         gfx_v11_0_cp_enable(adev, false);
    4688           0 :         gfx_v11_0_enable_gui_idle_interrupt(adev, false);
    4689             : 
    4690           0 :         adev->gfxhub.funcs->gart_disable(adev);
    4691             : 
    4692           0 :         adev->gfx.is_poweron = false;
    4693             : 
    4694           0 :         return 0;
    4695             : }
    4696             : 
    4697           0 : static int gfx_v11_0_suspend(void *handle)
    4698             : {
    4699           0 :         return gfx_v11_0_hw_fini(handle);
    4700             : }
    4701             : 
    4702           0 : static int gfx_v11_0_resume(void *handle)
    4703             : {
    4704           0 :         return gfx_v11_0_hw_init(handle);
    4705             : }
    4706             : 
    4707           0 : static bool gfx_v11_0_is_idle(void *handle)
    4708             : {
    4709           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4710             : 
    4711           0 :         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
    4712             :                                 GRBM_STATUS, GUI_ACTIVE))
    4713             :                 return false;
    4714             :         else
    4715           0 :                 return true;
    4716             : }
    4717             : 
    4718           0 : static int gfx_v11_0_wait_for_idle(void *handle)
    4719             : {
    4720             :         unsigned i;
    4721             :         u32 tmp;
    4722           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4723             : 
    4724           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    4725             :                 /* read MC_STATUS */
    4726           0 :                 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
    4727             :                         GRBM_STATUS__GUI_ACTIVE_MASK;
    4728             : 
    4729           0 :                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
    4730             :                         return 0;
    4731           0 :                 udelay(1);
    4732             :         }
    4733             :         return -ETIMEDOUT;
    4734             : }
    4735             : 
    4736           0 : static int gfx_v11_0_soft_reset(void *handle)
    4737             : {
    4738           0 :         u32 grbm_soft_reset = 0;
    4739             :         u32 tmp;
    4740             :         int i, j, k;
    4741           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4742             : 
    4743           0 :         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
    4744           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
    4745           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
    4746           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
    4747           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
    4748           0 :         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
    4749             : 
    4750           0 :         gfx_v11_0_set_safe_mode(adev);
    4751             : 
    4752           0 :         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
    4753           0 :                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
    4754           0 :                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
    4755           0 :                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
    4756           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
    4757           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
    4758           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
    4759           0 :                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
    4760             : 
    4761           0 :                                 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
    4762           0 :                                 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
    4763             :                         }
    4764             :                 }
    4765             :         }
    4766           0 :         for (i = 0; i < adev->gfx.me.num_me; ++i) {
    4767           0 :                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
    4768           0 :                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
    4769           0 :                                 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
    4770           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
    4771           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
    4772           0 :                                 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
    4773           0 :                                 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
    4774             : 
    4775           0 :                                 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
    4776             :                         }
    4777             :                 }
    4778             :         }
    4779             : 
    4780           0 :         WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
    4781             : 
    4782             :         // Read CP_VMID_RESET register three times.
    4783             :         // to get sufficient time for GFX_HQD_ACTIVE reach 0
    4784           0 :         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
    4785           0 :         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
    4786           0 :         RREG32_SOC15(GC, 0, regCP_VMID_RESET);
    4787             : 
    4788           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    4789           0 :                 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
    4790           0 :                     !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
    4791             :                         break;
    4792           0 :                 udelay(1);
    4793             :         }
    4794           0 :         if (i >= adev->usec_timeout) {
    4795           0 :                 printk("Failed to wait all pipes clean\n");
    4796           0 :                 return -EINVAL;
    4797             :         }
    4798             : 
    4799             :         /**********  trigger soft reset  ***********/
    4800           0 :         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
    4801           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4802             :                                         SOFT_RESET_CP, 1);
    4803           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4804             :                                         SOFT_RESET_GFX, 1);
    4805           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4806             :                                         SOFT_RESET_CPF, 1);
    4807           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4808             :                                         SOFT_RESET_CPC, 1);
    4809           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4810             :                                         SOFT_RESET_CPG, 1);
    4811           0 :         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
    4812             :         /**********  exit soft reset  ***********/
    4813           0 :         grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
    4814           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4815             :                                         SOFT_RESET_CP, 0);
    4816           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4817             :                                         SOFT_RESET_GFX, 0);
    4818           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4819             :                                         SOFT_RESET_CPF, 0);
    4820           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4821             :                                         SOFT_RESET_CPC, 0);
    4822           0 :         grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
    4823             :                                         SOFT_RESET_CPG, 0);
    4824           0 :         WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
    4825             : 
    4826           0 :         tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
    4827           0 :         tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
    4828           0 :         WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
    4829             : 
    4830           0 :         WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
    4831           0 :         WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
    4832             : 
    4833           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    4834           0 :                 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
    4835             :                         break;
    4836           0 :                 udelay(1);
    4837             :         }
    4838           0 :         if (i >= adev->usec_timeout) {
    4839           0 :                 printk("Failed to wait CP_VMID_RESET to 0\n");
    4840           0 :                 return -EINVAL;
    4841             :         }
    4842             : 
    4843           0 :         tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
    4844           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
    4845           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
    4846           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
    4847           0 :         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
    4848           0 :         WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
    4849             : 
    4850           0 :         gfx_v11_0_unset_safe_mode(adev);
    4851             : 
    4852           0 :         return gfx_v11_0_cp_resume(adev);
    4853             : }
    4854             : 
    4855           0 : static bool gfx_v11_0_check_soft_reset(void *handle)
    4856             : {
    4857             :         int i, r;
    4858           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4859             :         struct amdgpu_ring *ring;
    4860             :         long tmo = msecs_to_jiffies(1000);
    4861             : 
    4862           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    4863           0 :                 ring = &adev->gfx.gfx_ring[i];
    4864           0 :                 r = amdgpu_ring_test_ib(ring, tmo);
    4865           0 :                 if (r)
    4866             :                         return true;
    4867             :         }
    4868             : 
    4869           0 :         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    4870           0 :                 ring = &adev->gfx.compute_ring[i];
    4871           0 :                 r = amdgpu_ring_test_ib(ring, tmo);
    4872           0 :                 if (r)
    4873             :                         return true;
    4874             :         }
    4875             : 
    4876             :         return false;
    4877             : }
    4878             : 
    4879           0 : static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
    4880             : {
    4881             :         uint64_t clock;
    4882             : 
    4883           0 :         amdgpu_gfx_off_ctrl(adev, false);
    4884           0 :         mutex_lock(&adev->gfx.gpu_clock_mutex);
    4885           0 :         clock = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER) |
    4886           0 :                 ((uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER) << 32ULL);
    4887           0 :         mutex_unlock(&adev->gfx.gpu_clock_mutex);
    4888           0 :         amdgpu_gfx_off_ctrl(adev, true);
    4889           0 :         return clock;
    4890             : }
    4891             : 
    4892           0 : static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
    4893             :                                            uint32_t vmid,
    4894             :                                            uint32_t gds_base, uint32_t gds_size,
    4895             :                                            uint32_t gws_base, uint32_t gws_size,
    4896             :                                            uint32_t oa_base, uint32_t oa_size)
    4897             : {
    4898           0 :         struct amdgpu_device *adev = ring->adev;
    4899             : 
    4900             :         /* GDS Base */
    4901           0 :         gfx_v11_0_write_data_to_reg(ring, 0, false,
    4902           0 :                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
    4903             :                                     gds_base);
    4904             : 
    4905             :         /* GDS Size */
    4906           0 :         gfx_v11_0_write_data_to_reg(ring, 0, false,
    4907           0 :                                     SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
    4908             :                                     gds_size);
    4909             : 
    4910             :         /* GWS */
    4911           0 :         gfx_v11_0_write_data_to_reg(ring, 0, false,
    4912           0 :                                     SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
    4913           0 :                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
    4914             : 
    4915             :         /* OA */
    4916           0 :         gfx_v11_0_write_data_to_reg(ring, 0, false,
    4917           0 :                                     SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
    4918           0 :                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
    4919           0 : }
    4920             : 
    4921           0 : static int gfx_v11_0_early_init(void *handle)
    4922             : {
    4923           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4924             : 
    4925           0 :         adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
    4926           0 :         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
    4927             :                                           AMDGPU_MAX_COMPUTE_RINGS);
    4928             : 
    4929           0 :         gfx_v11_0_set_kiq_pm4_funcs(adev);
    4930           0 :         gfx_v11_0_set_ring_funcs(adev);
    4931           0 :         gfx_v11_0_set_irq_funcs(adev);
    4932           0 :         gfx_v11_0_set_gds_init(adev);
    4933           0 :         gfx_v11_0_set_rlc_funcs(adev);
    4934           0 :         gfx_v11_0_set_mqd_funcs(adev);
    4935           0 :         gfx_v11_0_set_imu_funcs(adev);
    4936             : 
    4937           0 :         gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
    4938             : 
    4939           0 :         return 0;
    4940             : }
    4941             : 
    4942           0 : static int gfx_v11_0_late_init(void *handle)
    4943             : {
    4944           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    4945             :         int r;
    4946             : 
    4947           0 :         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
    4948           0 :         if (r)
    4949             :                 return r;
    4950             : 
    4951           0 :         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
    4952           0 :         if (r)
    4953             :                 return r;
    4954             : 
    4955           0 :         return 0;
    4956             : }
    4957             : 
    4958           0 : static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
    4959             : {
    4960             :         uint32_t rlc_cntl;
    4961             : 
    4962             :         /* if RLC is not enabled, do nothing */
    4963           0 :         rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
    4964           0 :         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
    4965             : }
    4966             : 
    4967           0 : static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev)
    4968             : {
    4969             :         uint32_t data;
    4970             :         unsigned i;
    4971             : 
    4972           0 :         data = RLC_SAFE_MODE__CMD_MASK;
    4973           0 :         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
    4974             : 
    4975           0 :         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
    4976             : 
    4977             :         /* wait for RLC_SAFE_MODE */
    4978           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    4979           0 :                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
    4980             :                                    RLC_SAFE_MODE, CMD))
    4981             :                         break;
    4982           0 :                 udelay(1);
    4983             :         }
    4984           0 : }
    4985             : 
    4986           0 : static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev)
    4987             : {
    4988           0 :         WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
    4989           0 : }
    4990             : 
    4991           0 : static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
    4992             :                                       bool enable)
    4993             : {
    4994             :         uint32_t def, data;
    4995             : 
    4996           0 :         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
    4997             :                 return;
    4998             : 
    4999           0 :         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5000             : 
    5001           0 :         if (enable)
    5002           0 :                 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
    5003             :         else
    5004           0 :                 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
    5005             : 
    5006           0 :         if (def != data)
    5007           0 :                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5008             : }
    5009             : 
    5010           0 : static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
    5011             :                                        bool enable)
    5012             : {
    5013             :         uint32_t def, data;
    5014             : 
    5015           0 :         if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
    5016             :                 return;
    5017             : 
    5018           0 :         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5019             : 
    5020           0 :         if (enable)
    5021           0 :                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
    5022             :         else
    5023           0 :                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
    5024             : 
    5025           0 :         if (def != data)
    5026           0 :                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5027             : }
    5028             : 
    5029           0 : static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
    5030             :                                            bool enable)
    5031             : {
    5032             :         uint32_t def, data;
    5033             : 
    5034           0 :         if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
    5035             :                 return;
    5036             : 
    5037           0 :         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5038             : 
    5039           0 :         if (enable)
    5040           0 :                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
    5041             :         else
    5042           0 :                 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
    5043             : 
    5044           0 :         if (def != data)
    5045           0 :                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5046             : }
    5047             : 
    5048           0 : static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
    5049             :                                                        bool enable)
    5050             : {
    5051             :         uint32_t data, def;
    5052             : 
    5053           0 :         if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
    5054             :                 return;
    5055             : 
    5056             :         /* It is disabled by HW by default */
    5057           0 :         if (enable) {
    5058           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
    5059             :                         /* 1 - RLC_CGTT_MGCG_OVERRIDE */
    5060           0 :                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5061             : 
    5062           0 :                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
    5063             :                                   RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
    5064             :                                   RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
    5065             : 
    5066           0 :                         if (def != data)
    5067           0 :                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5068             :                 }
    5069             :         } else {
    5070           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
    5071           0 :                         def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5072             : 
    5073           0 :                         data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
    5074             :                                  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
    5075             :                                  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
    5076             : 
    5077           0 :                         if (def != data)
    5078           0 :                                 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5079             :                 }
    5080             :         }
    5081             : }
    5082             : 
    5083           0 : static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
    5084             :                                                        bool enable)
    5085             : {
    5086             :         uint32_t def, data;
    5087             : 
    5088           0 :         if (!(adev->cg_flags &
    5089             :               (AMD_CG_SUPPORT_GFX_CGCG |
    5090             :               AMD_CG_SUPPORT_GFX_CGLS |
    5091             :               AMD_CG_SUPPORT_GFX_3D_CGCG |
    5092             :               AMD_CG_SUPPORT_GFX_3D_CGLS)))
    5093             :                 return;
    5094             : 
    5095           0 :         if (enable) {
    5096           0 :                 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5097             : 
    5098             :                 /* unset CGCG override */
    5099           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
    5100           0 :                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
    5101           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
    5102           0 :                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
    5103           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
    5104             :                     adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
    5105           0 :                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
    5106             : 
    5107             :                 /* update CGCG override bits */
    5108           0 :                 if (def != data)
    5109           0 :                         WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
    5110             : 
    5111             :                 /* enable cgcg FSM(0x0000363F) */
    5112           0 :                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
    5113             : 
    5114           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
    5115           0 :                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
    5116           0 :                         data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
    5117             :                                  RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
    5118             :                 }
    5119             : 
    5120           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
    5121           0 :                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
    5122           0 :                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
    5123             :                                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
    5124             :                 }
    5125             : 
    5126           0 :                 if (def != data)
    5127           0 :                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
    5128             : 
    5129             :                 /* Program RLC_CGCG_CGLS_CTRL_3D */
    5130           0 :                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
    5131             : 
    5132           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
    5133           0 :                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
    5134           0 :                         data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
    5135             :                                  RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
    5136             :                 }
    5137             : 
    5138           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
    5139           0 :                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
    5140           0 :                         data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
    5141             :                                  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
    5142             :                 }
    5143             : 
    5144           0 :                 if (def != data)
    5145           0 :                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
    5146             : 
    5147             :                 /* set IDLE_POLL_COUNT(0x00900100) */
    5148           0 :                 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
    5149             : 
    5150           0 :                 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
    5151           0 :                 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
    5152             :                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
    5153             : 
    5154           0 :                 if (def != data)
    5155           0 :                         WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
    5156             : 
    5157           0 :                 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
    5158           0 :                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
    5159           0 :                 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
    5160           0 :                 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
    5161           0 :                 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
    5162           0 :                 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
    5163             : 
    5164           0 :                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
    5165           0 :                 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
    5166           0 :                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
    5167             : 
    5168             :                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
    5169           0 :                 if (adev->sdma.num_instances > 1) {
    5170           0 :                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
    5171           0 :                         data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
    5172           0 :                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
    5173             :                 }
    5174             :         } else {
    5175             :                 /* Program RLC_CGCG_CGLS_CTRL */
    5176           0 :                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
    5177             : 
    5178           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
    5179           0 :                         data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
    5180             : 
    5181           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
    5182           0 :                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
    5183             : 
    5184           0 :                 if (def != data)
    5185           0 :                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
    5186             : 
    5187             :                 /* Program RLC_CGCG_CGLS_CTRL_3D */
    5188           0 :                 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
    5189             : 
    5190           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
    5191           0 :                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
    5192           0 :                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
    5193           0 :                         data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
    5194             : 
    5195           0 :                 if (def != data)
    5196           0 :                         WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
    5197             : 
    5198           0 :                 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
    5199           0 :                 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
    5200           0 :                 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
    5201             : 
    5202             :                 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
    5203           0 :                 if (adev->sdma.num_instances > 1) {
    5204           0 :                         data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
    5205           0 :                         data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
    5206           0 :                         WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
    5207             :                 }
    5208             :         }
    5209             : }
    5210             : 
    5211           0 : static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
    5212             :                                             bool enable)
    5213             : {
    5214           0 :         amdgpu_gfx_rlc_enter_safe_mode(adev);
    5215             : 
    5216           0 :         gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
    5217             : 
    5218           0 :         gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
    5219             : 
    5220           0 :         gfx_v11_0_update_repeater_fgcg(adev, enable);
    5221             : 
    5222           0 :         gfx_v11_0_update_sram_fgcg(adev, enable);
    5223             : 
    5224           0 :         gfx_v11_0_update_perf_clk(adev, enable);
    5225             : 
    5226           0 :         if (adev->cg_flags &
    5227             :             (AMD_CG_SUPPORT_GFX_MGCG |
    5228             :              AMD_CG_SUPPORT_GFX_CGLS |
    5229             :              AMD_CG_SUPPORT_GFX_CGCG |
    5230             :              AMD_CG_SUPPORT_GFX_3D_CGCG |
    5231             :              AMD_CG_SUPPORT_GFX_3D_CGLS))
    5232           0 :                 gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
    5233             : 
    5234           0 :         amdgpu_gfx_rlc_exit_safe_mode(adev);
    5235             : 
    5236           0 :         return 0;
    5237             : }
    5238             : 
    5239           0 : static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
    5240             : {
    5241             :         u32 reg, data;
    5242             : 
    5243           0 :         reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
    5244           0 :         if (amdgpu_sriov_is_pp_one_vf(adev))
    5245           0 :                 data = RREG32_NO_KIQ(reg);
    5246             :         else
    5247           0 :                 data = RREG32(reg);
    5248             : 
    5249           0 :         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
    5250           0 :         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
    5251             : 
    5252           0 :         if (amdgpu_sriov_is_pp_one_vf(adev))
    5253           0 :                 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
    5254             :         else
    5255           0 :                 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
    5256           0 : }
    5257             : 
    5258             : static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
    5259             :         .is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
    5260             :         .set_safe_mode = gfx_v11_0_set_safe_mode,
    5261             :         .unset_safe_mode = gfx_v11_0_unset_safe_mode,
    5262             :         .init = gfx_v11_0_rlc_init,
    5263             :         .get_csb_size = gfx_v11_0_get_csb_size,
    5264             :         .get_csb_buffer = gfx_v11_0_get_csb_buffer,
    5265             :         .resume = gfx_v11_0_rlc_resume,
    5266             :         .stop = gfx_v11_0_rlc_stop,
    5267             :         .reset = gfx_v11_0_rlc_reset,
    5268             :         .start = gfx_v11_0_rlc_start,
    5269             :         .update_spm_vmid = gfx_v11_0_update_spm_vmid,
    5270             : };
    5271             : 
    5272           0 : static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
    5273             : {
    5274           0 :         u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
    5275             : 
    5276           0 :         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
    5277           0 :                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
    5278             :         else
    5279           0 :                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
    5280             : 
    5281           0 :         WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
    5282             : 
    5283             :         // Program RLC_PG_DELAY3 for CGPG hysteresis
    5284           0 :         if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
    5285           0 :                 switch (adev->ip_versions[GC_HWIP][0]) {
    5286             :                 case IP_VERSION(11, 0, 1):
    5287           0 :                         WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
    5288             :                         break;
    5289             :                 default:
    5290             :                         break;
    5291             :                 }
    5292             :         }
    5293           0 : }
    5294             : 
    5295           0 : static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
    5296             : {
    5297           0 :         amdgpu_gfx_rlc_enter_safe_mode(adev);
    5298             : 
    5299           0 :         gfx_v11_cntl_power_gating(adev, enable);
    5300             : 
    5301           0 :         amdgpu_gfx_rlc_exit_safe_mode(adev);
    5302           0 : }
    5303             : 
    5304           0 : static int gfx_v11_0_set_powergating_state(void *handle,
    5305             :                                            enum amd_powergating_state state)
    5306             : {
    5307           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    5308           0 :         bool enable = (state == AMD_PG_STATE_GATE);
    5309             : 
    5310           0 :         if (amdgpu_sriov_vf(adev))
    5311             :                 return 0;
    5312             : 
    5313           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
    5314             :         case IP_VERSION(11, 0, 0):
    5315             :         case IP_VERSION(11, 0, 2):
    5316           0 :                 amdgpu_gfx_off_ctrl(adev, enable);
    5317           0 :                 break;
    5318             :         case IP_VERSION(11, 0, 1):
    5319           0 :                 gfx_v11_cntl_pg(adev, enable);
    5320           0 :                 amdgpu_gfx_off_ctrl(adev, enable);
    5321           0 :                 break;
    5322             :         default:
    5323             :                 break;
    5324             :         }
    5325             : 
    5326             :         return 0;
    5327             : }
    5328             : 
    5329           0 : static int gfx_v11_0_set_clockgating_state(void *handle,
    5330             :                                           enum amd_clockgating_state state)
    5331             : {
    5332           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    5333             : 
    5334           0 :         if (amdgpu_sriov_vf(adev))
    5335             :                 return 0;
    5336             : 
    5337           0 :         switch (adev->ip_versions[GC_HWIP][0]) {
    5338             :         case IP_VERSION(11, 0, 0):
    5339             :         case IP_VERSION(11, 0, 1):
    5340             :         case IP_VERSION(11, 0, 2):
    5341           0 :                 gfx_v11_0_update_gfx_clock_gating(adev,
    5342             :                                 state ==  AMD_CG_STATE_GATE);
    5343           0 :                 break;
    5344             :         default:
    5345             :                 break;
    5346             :         }
    5347             : 
    5348             :         return 0;
    5349             : }
    5350             : 
    5351           0 : static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
    5352             : {
    5353           0 :         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
    5354             :         int data;
    5355             : 
    5356             :         /* AMD_CG_SUPPORT_GFX_MGCG */
    5357           0 :         data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
    5358           0 :         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
    5359           0 :                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
    5360             : 
    5361             :         /* AMD_CG_SUPPORT_REPEATER_FGCG */
    5362           0 :         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
    5363           0 :                 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
    5364             : 
    5365             :         /* AMD_CG_SUPPORT_GFX_FGCG */
    5366           0 :         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
    5367           0 :                 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
    5368             : 
    5369             :         /* AMD_CG_SUPPORT_GFX_PERF_CLK */
    5370           0 :         if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
    5371           0 :                 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
    5372             : 
    5373             :         /* AMD_CG_SUPPORT_GFX_CGCG */
    5374           0 :         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
    5375           0 :         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
    5376           0 :                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
    5377             : 
    5378             :         /* AMD_CG_SUPPORT_GFX_CGLS */
    5379           0 :         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
    5380           0 :                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
    5381             : 
    5382             :         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
    5383           0 :         data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
    5384           0 :         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
    5385           0 :                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
    5386             : 
    5387             :         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
    5388           0 :         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
    5389           0 :                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
    5390           0 : }
    5391             : 
    5392           0 : static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
    5393             : {
    5394             :         /* gfx11 is 32bit rptr*/
    5395           0 :         return *(uint32_t *)ring->rptr_cpu_addr;
    5396             : }
    5397             : 
    5398           0 : static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
    5399             : {
    5400           0 :         struct amdgpu_device *adev = ring->adev;
    5401             :         u64 wptr;
    5402             : 
    5403             :         /* XXX check if swapping is necessary on BE */
    5404           0 :         if (ring->use_doorbell) {
    5405           0 :                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
    5406             :         } else {
    5407           0 :                 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
    5408           0 :                 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
    5409             :         }
    5410             : 
    5411           0 :         return wptr;
    5412             : }
    5413             : 
    5414           0 : static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
    5415             : {
    5416           0 :         struct amdgpu_device *adev = ring->adev;
    5417             :         uint32_t *wptr_saved;
    5418             :         uint32_t *is_queue_unmap;
    5419             :         uint64_t aggregated_db_index;
    5420           0 :         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
    5421             :         uint64_t wptr_tmp;
    5422             : 
    5423           0 :         if (ring->is_mes_queue) {
    5424           0 :                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
    5425           0 :                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
    5426             :                                               sizeof(uint32_t));
    5427           0 :                 aggregated_db_index =
    5428           0 :                         amdgpu_mes_get_aggregated_doorbell_index(adev,
    5429           0 :                                                                  ring->hw_prio);
    5430             : 
    5431           0 :                 wptr_tmp = ring->wptr & ring->buf_mask;
    5432           0 :                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
    5433           0 :                 *wptr_saved = wptr_tmp;
    5434             :                 /* assume doorbell always being used by mes mapped queue */
    5435           0 :                 if (*is_queue_unmap) {
    5436           0 :                         WDOORBELL64(aggregated_db_index, wptr_tmp);
    5437           0 :                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
    5438             :                 } else {
    5439           0 :                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
    5440             : 
    5441           0 :                         if (*is_queue_unmap)
    5442           0 :                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
    5443             :                 }
    5444             :         } else {
    5445           0 :                 if (ring->use_doorbell) {
    5446             :                         /* XXX check if swapping is necessary on BE */
    5447           0 :                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
    5448           0 :                                      ring->wptr);
    5449           0 :                         WDOORBELL64(ring->doorbell_index, ring->wptr);
    5450             :                 } else {
    5451           0 :                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
    5452             :                                      lower_32_bits(ring->wptr));
    5453           0 :                         WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
    5454             :                                      upper_32_bits(ring->wptr));
    5455             :                 }
    5456             :         }
    5457           0 : }
    5458             : 
    5459           0 : static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
    5460             : {
    5461             :         /* gfx11 hardware is 32bit rptr */
    5462           0 :         return *(uint32_t *)ring->rptr_cpu_addr;
    5463             : }
    5464             : 
    5465           0 : static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
    5466             : {
    5467             :         u64 wptr;
    5468             : 
    5469             :         /* XXX check if swapping is necessary on BE */
    5470           0 :         if (ring->use_doorbell)
    5471           0 :                 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
    5472             :         else
    5473           0 :                 BUG();
    5474           0 :         return wptr;
    5475             : }
    5476             : 
    5477           0 : static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
    5478             : {
    5479           0 :         struct amdgpu_device *adev = ring->adev;
    5480             :         uint32_t *wptr_saved;
    5481             :         uint32_t *is_queue_unmap;
    5482             :         uint64_t aggregated_db_index;
    5483           0 :         uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
    5484             :         uint64_t wptr_tmp;
    5485             : 
    5486           0 :         if (ring->is_mes_queue) {
    5487           0 :                 wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
    5488           0 :                 is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
    5489             :                                               sizeof(uint32_t));
    5490           0 :                 aggregated_db_index =
    5491           0 :                         amdgpu_mes_get_aggregated_doorbell_index(adev,
    5492           0 :                                                                  ring->hw_prio);
    5493             : 
    5494           0 :                 wptr_tmp = ring->wptr & ring->buf_mask;
    5495           0 :                 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
    5496           0 :                 *wptr_saved = wptr_tmp;
    5497             :                 /* assume doorbell always used by mes mapped queue */
    5498           0 :                 if (*is_queue_unmap) {
    5499           0 :                         WDOORBELL64(aggregated_db_index, wptr_tmp);
    5500           0 :                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
    5501             :                 } else {
    5502           0 :                         WDOORBELL64(ring->doorbell_index, wptr_tmp);
    5503             : 
    5504           0 :                         if (*is_queue_unmap)
    5505           0 :                                 WDOORBELL64(aggregated_db_index, wptr_tmp);
    5506             :                 }
    5507             :         } else {
    5508             :                 /* XXX check if swapping is necessary on BE */
    5509           0 :                 if (ring->use_doorbell) {
    5510           0 :                         atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
    5511           0 :                                      ring->wptr);
    5512           0 :                         WDOORBELL64(ring->doorbell_index, ring->wptr);
    5513             :                 } else {
    5514           0 :                         BUG(); /* only DOORBELL method supported on gfx11 now */
    5515             :                 }
    5516             :         }
    5517           0 : }
    5518             : 
    5519           0 : static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
    5520             : {
    5521           0 :         struct amdgpu_device *adev = ring->adev;
    5522             :         u32 ref_and_mask, reg_mem_engine;
    5523           0 :         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
    5524             : 
    5525           0 :         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
    5526           0 :                 switch (ring->me) {
    5527             :                 case 1:
    5528           0 :                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
    5529           0 :                         break;
    5530             :                 case 2:
    5531           0 :                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
    5532           0 :                         break;
    5533             :                 default:
    5534             :                         return;
    5535             :                 }
    5536             :                 reg_mem_engine = 0;
    5537             :         } else {
    5538           0 :                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
    5539           0 :                 reg_mem_engine = 1; /* pfp */
    5540             :         }
    5541             : 
    5542           0 :         gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
    5543           0 :                                adev->nbio.funcs->get_hdp_flush_req_offset(adev),
    5544           0 :                                adev->nbio.funcs->get_hdp_flush_done_offset(adev),
    5545             :                                ref_and_mask, ref_and_mask, 0x20);
    5546             : }
    5547             : 
    5548           0 : static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
    5549             :                                        struct amdgpu_job *job,
    5550             :                                        struct amdgpu_ib *ib,
    5551             :                                        uint32_t flags)
    5552             : {
    5553           0 :         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
    5554           0 :         u32 header, control = 0;
    5555             : 
    5556           0 :         BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
    5557             : 
    5558           0 :         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
    5559             : 
    5560           0 :         control |= ib->length_dw | (vmid << 24);
    5561             : 
    5562           0 :         if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
    5563           0 :                 control |= INDIRECT_BUFFER_PRE_ENB(1);
    5564             : 
    5565           0 :                 if (flags & AMDGPU_IB_PREEMPTED)
    5566           0 :                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
    5567             : 
    5568           0 :                 if (vmid)
    5569           0 :                         gfx_v11_0_ring_emit_de_meta(ring,
    5570           0 :                                     (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
    5571             :         }
    5572             : 
    5573           0 :         if (ring->is_mes_queue)
    5574             :                 /* inherit vmid from mqd */
    5575           0 :                 control |= 0x400000;
    5576             : 
    5577           0 :         amdgpu_ring_write(ring, header);
    5578           0 :         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
    5579           0 :         amdgpu_ring_write(ring,
    5580             : #ifdef __BIG_ENDIAN
    5581             :                 (2 << 0) |
    5582             : #endif
    5583             :                 lower_32_bits(ib->gpu_addr));
    5584           0 :         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    5585           0 :         amdgpu_ring_write(ring, control);
    5586           0 : }
    5587             : 
    5588           0 : static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
    5589             :                                            struct amdgpu_job *job,
    5590             :                                            struct amdgpu_ib *ib,
    5591             :                                            uint32_t flags)
    5592             : {
    5593           0 :         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
    5594           0 :         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
    5595             : 
    5596           0 :         if (ring->is_mes_queue)
    5597             :                 /* inherit vmid from mqd */
    5598           0 :                 control |= 0x40000000;
    5599             : 
    5600             :         /* Currently, there is a high possibility to get wave ID mismatch
    5601             :          * between ME and GDS, leading to a hw deadlock, because ME generates
    5602             :          * different wave IDs than the GDS expects. This situation happens
    5603             :          * randomly when at least 5 compute pipes use GDS ordered append.
    5604             :          * The wave IDs generated by ME are also wrong after suspend/resume.
    5605             :          * Those are probably bugs somewhere else in the kernel driver.
    5606             :          *
    5607             :          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
    5608             :          * GDS to 0 for this ring (me/pipe).
    5609             :          */
    5610           0 :         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
    5611           0 :                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
    5612           0 :                 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
    5613           0 :                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
    5614             :         }
    5615             : 
    5616           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
    5617           0 :         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
    5618           0 :         amdgpu_ring_write(ring,
    5619             : #ifdef __BIG_ENDIAN
    5620             :                                 (2 << 0) |
    5621             : #endif
    5622             :                                 lower_32_bits(ib->gpu_addr));
    5623           0 :         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
    5624           0 :         amdgpu_ring_write(ring, control);
    5625           0 : }
    5626             : 
    5627           0 : static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
    5628             :                                      u64 seq, unsigned flags)
    5629             : {
    5630           0 :         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
    5631           0 :         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
    5632             : 
    5633             :         /* RELEASE_MEM - flush caches, send int */
    5634           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
    5635           0 :         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
    5636             :                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
    5637             :                                  PACKET3_RELEASE_MEM_GCR_GL2_INV |
    5638             :                                  PACKET3_RELEASE_MEM_GCR_GL2_US |
    5639             :                                  PACKET3_RELEASE_MEM_GCR_GL1_INV |
    5640             :                                  PACKET3_RELEASE_MEM_GCR_GLV_INV |
    5641             :                                  PACKET3_RELEASE_MEM_GCR_GLM_INV |
    5642             :                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
    5643             :                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
    5644             :                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
    5645             :                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
    5646           0 :         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
    5647           0 :                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
    5648             : 
    5649             :         /*
    5650             :          * the address should be Qword aligned if 64bit write, Dword
    5651             :          * aligned if only send 32bit data low (discard data high)
    5652             :          */
    5653           0 :         if (write64bit)
    5654           0 :                 BUG_ON(addr & 0x7);
    5655             :         else
    5656           0 :                 BUG_ON(addr & 0x3);
    5657           0 :         amdgpu_ring_write(ring, lower_32_bits(addr));
    5658           0 :         amdgpu_ring_write(ring, upper_32_bits(addr));
    5659           0 :         amdgpu_ring_write(ring, lower_32_bits(seq));
    5660           0 :         amdgpu_ring_write(ring, upper_32_bits(seq));
    5661           0 :         amdgpu_ring_write(ring, ring->is_mes_queue ?
    5662           0 :                          (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
    5663           0 : }
    5664             : 
    5665           0 : static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
    5666             : {
    5667           0 :         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
    5668           0 :         uint32_t seq = ring->fence_drv.sync_seq;
    5669           0 :         uint64_t addr = ring->fence_drv.gpu_addr;
    5670             : 
    5671           0 :         gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
    5672           0 :                                upper_32_bits(addr), seq, 0xffffffff, 4);
    5673           0 : }
    5674             : 
    5675           0 : static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
    5676             :                                    uint16_t pasid, uint32_t flush_type,
    5677             :                                    bool all_hub, uint8_t dst_sel)
    5678             : {
    5679           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
    5680           0 :         amdgpu_ring_write(ring,
    5681           0 :                           PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
    5682           0 :                           PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
    5683           0 :                           PACKET3_INVALIDATE_TLBS_PASID(pasid) |
    5684           0 :                           PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
    5685           0 : }
    5686             : 
    5687           0 : static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
    5688             :                                          unsigned vmid, uint64_t pd_addr)
    5689             : {
    5690           0 :         if (ring->is_mes_queue)
    5691             :                 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
    5692             :         else
    5693           0 :                 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
    5694             : 
    5695             :         /* compute doesn't have PFP */
    5696           0 :         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
    5697             :                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
    5698           0 :                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
    5699           0 :                 amdgpu_ring_write(ring, 0x0);
    5700             :         }
    5701           0 : }
    5702             : 
    5703           0 : static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
    5704             :                                           u64 seq, unsigned int flags)
    5705             : {
    5706           0 :         struct amdgpu_device *adev = ring->adev;
    5707             : 
    5708             :         /* we only allocate 32bit for each seq wb address */
    5709           0 :         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
    5710             : 
    5711             :         /* write fence seq to the "addr" */
    5712           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
    5713           0 :         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
    5714             :                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
    5715           0 :         amdgpu_ring_write(ring, lower_32_bits(addr));
    5716           0 :         amdgpu_ring_write(ring, upper_32_bits(addr));
    5717           0 :         amdgpu_ring_write(ring, lower_32_bits(seq));
    5718             : 
    5719           0 :         if (flags & AMDGPU_FENCE_FLAG_INT) {
    5720             :                 /* set register to trigger INT */
    5721           0 :                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
    5722           0 :                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
    5723             :                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
    5724           0 :                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
    5725           0 :                 amdgpu_ring_write(ring, 0);
    5726           0 :                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
    5727             :         }
    5728           0 : }
    5729             : 
    5730           0 : static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
    5731             :                                          uint32_t flags)
    5732             : {
    5733           0 :         uint32_t dw2 = 0;
    5734             : 
    5735           0 :         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
    5736           0 :         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
    5737             :                 /* set load_global_config & load_global_uconfig */
    5738           0 :                 dw2 |= 0x8001;
    5739             :                 /* set load_cs_sh_regs */
    5740           0 :                 dw2 |= 0x01000000;
    5741             :                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
    5742           0 :                 dw2 |= 0x10002;
    5743             :         }
    5744             : 
    5745           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
    5746           0 :         amdgpu_ring_write(ring, dw2);
    5747           0 :         amdgpu_ring_write(ring, 0);
    5748           0 : }
    5749             : 
    5750           0 : static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
    5751             : {
    5752             :         unsigned ret;
    5753             : 
    5754           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
    5755           0 :         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
    5756           0 :         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
    5757           0 :         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
    5758           0 :         ret = ring->wptr & ring->buf_mask;
    5759           0 :         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
    5760             : 
    5761           0 :         return ret;
    5762             : }
    5763             : 
    5764           0 : static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
    5765             : {
    5766             :         unsigned cur;
    5767           0 :         BUG_ON(offset > ring->buf_mask);
    5768           0 :         BUG_ON(ring->ring[offset] != 0x55aa55aa);
    5769             : 
    5770           0 :         cur = (ring->wptr - 1) & ring->buf_mask;
    5771           0 :         if (likely(cur > offset))
    5772           0 :                 ring->ring[offset] = cur - offset;
    5773             :         else
    5774           0 :                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
    5775           0 : }
    5776             : 
    5777           0 : static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
    5778             : {
    5779           0 :         int i, r = 0;
    5780           0 :         struct amdgpu_device *adev = ring->adev;
    5781           0 :         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
    5782           0 :         struct amdgpu_ring *kiq_ring = &kiq->ring;
    5783             :         unsigned long flags;
    5784             : 
    5785           0 :         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
    5786             :                 return -EINVAL;
    5787             : 
    5788           0 :         spin_lock_irqsave(&kiq->ring_lock, flags);
    5789             : 
    5790           0 :         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
    5791           0 :                 spin_unlock_irqrestore(&kiq->ring_lock, flags);
    5792           0 :                 return -ENOMEM;
    5793             :         }
    5794             : 
    5795             :         /* assert preemption condition */
    5796           0 :         amdgpu_ring_set_preempt_cond_exec(ring, false);
    5797             : 
    5798             :         /* assert IB preemption, emit the trailing fence */
    5799           0 :         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
    5800             :                                    ring->trail_fence_gpu_addr,
    5801           0 :                                    ++ring->trail_seq);
    5802           0 :         amdgpu_ring_commit(kiq_ring);
    5803             : 
    5804           0 :         spin_unlock_irqrestore(&kiq->ring_lock, flags);
    5805             : 
    5806             :         /* poll the trailing fence */
    5807           0 :         for (i = 0; i < adev->usec_timeout; i++) {
    5808           0 :                 if (ring->trail_seq ==
    5809           0 :                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
    5810             :                         break;
    5811           0 :                 udelay(1);
    5812             :         }
    5813             : 
    5814           0 :         if (i >= adev->usec_timeout) {
    5815           0 :                 r = -EINVAL;
    5816           0 :                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
    5817             :         }
    5818             : 
    5819             :         /* deassert preemption condition */
    5820           0 :         amdgpu_ring_set_preempt_cond_exec(ring, true);
    5821           0 :         return r;
    5822             : }
    5823             : 
    5824           0 : static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
    5825             : {
    5826           0 :         struct amdgpu_device *adev = ring->adev;
    5827           0 :         struct v10_de_ib_state de_payload = {0};
    5828             :         uint64_t offset, gds_addr, de_payload_gpu_addr;
    5829             :         void *de_payload_cpu_addr;
    5830             :         int cnt;
    5831             : 
    5832           0 :         if (ring->is_mes_queue) {
    5833           0 :                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    5834             :                                   gfx[0].gfx_meta_data) +
    5835             :                         offsetof(struct v10_gfx_meta_data, de_payload);
    5836           0 :                 de_payload_gpu_addr =
    5837           0 :                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    5838           0 :                 de_payload_cpu_addr =
    5839           0 :                         amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
    5840             : 
    5841           0 :                 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    5842             :                                   gfx[0].gds_backup) +
    5843             :                         offsetof(struct v10_gfx_meta_data, de_payload);
    5844           0 :                 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    5845             :         } else {
    5846           0 :                 offset = offsetof(struct v10_gfx_meta_data, de_payload);
    5847           0 :                 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
    5848           0 :                 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
    5849             : 
    5850           0 :                 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
    5851             :                                  AMDGPU_CSA_SIZE - adev->gds.gds_size,
    5852             :                                  PAGE_SIZE);
    5853             :         }
    5854             : 
    5855           0 :         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
    5856           0 :         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
    5857             : 
    5858           0 :         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
    5859           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
    5860           0 :         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
    5861             :                                  WRITE_DATA_DST_SEL(8) |
    5862             :                                  WR_CONFIRM) |
    5863             :                                  WRITE_DATA_CACHE_POLICY(0));
    5864           0 :         amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
    5865           0 :         amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
    5866             : 
    5867           0 :         if (resume)
    5868           0 :                 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
    5869             :                                            sizeof(de_payload) >> 2);
    5870             :         else
    5871           0 :                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
    5872             :                                            sizeof(de_payload) >> 2);
    5873           0 : }
    5874             : 
    5875           0 : static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
    5876             :                                     bool secure)
    5877             : {
    5878           0 :         uint32_t v = secure ? FRAME_TMZ : 0;
    5879             : 
    5880           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
    5881           0 :         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
    5882           0 : }
    5883             : 
    5884           0 : static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
    5885             :                                      uint32_t reg_val_offs)
    5886             : {
    5887           0 :         struct amdgpu_device *adev = ring->adev;
    5888             : 
    5889           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
    5890           0 :         amdgpu_ring_write(ring, 0 |     /* src: register*/
    5891             :                                 (5 << 8) |        /* dst: memory */
    5892             :                                 (1 << 20));       /* write confirm */
    5893           0 :         amdgpu_ring_write(ring, reg);
    5894           0 :         amdgpu_ring_write(ring, 0);
    5895           0 :         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
    5896             :                                 reg_val_offs * 4));
    5897           0 :         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
    5898             :                                 reg_val_offs * 4));
    5899           0 : }
    5900             : 
    5901           0 : static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
    5902             :                                    uint32_t val)
    5903             : {
    5904           0 :         uint32_t cmd = 0;
    5905             : 
    5906           0 :         switch (ring->funcs->type) {
    5907             :         case AMDGPU_RING_TYPE_GFX:
    5908             :                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
    5909             :                 break;
    5910             :         case AMDGPU_RING_TYPE_KIQ:
    5911           0 :                 cmd = (1 << 16); /* no inc addr */
    5912           0 :                 break;
    5913             :         default:
    5914           0 :                 cmd = WR_CONFIRM;
    5915           0 :                 break;
    5916             :         }
    5917           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
    5918           0 :         amdgpu_ring_write(ring, cmd);
    5919           0 :         amdgpu_ring_write(ring, reg);
    5920           0 :         amdgpu_ring_write(ring, 0);
    5921           0 :         amdgpu_ring_write(ring, val);
    5922           0 : }
    5923             : 
    5924           0 : static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
    5925             :                                         uint32_t val, uint32_t mask)
    5926             : {
    5927           0 :         gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
    5928           0 : }
    5929             : 
    5930           0 : static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
    5931             :                                                    uint32_t reg0, uint32_t reg1,
    5932             :                                                    uint32_t ref, uint32_t mask)
    5933             : {
    5934           0 :         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
    5935             : 
    5936           0 :         gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
    5937             :                                ref, mask, 0x20);
    5938           0 : }
    5939             : 
    5940           0 : static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
    5941             :                                          unsigned vmid)
    5942             : {
    5943           0 :         struct amdgpu_device *adev = ring->adev;
    5944           0 :         uint32_t value = 0;
    5945             : 
    5946           0 :         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
    5947           0 :         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
    5948           0 :         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
    5949           0 :         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
    5950           0 :         WREG32_SOC15(GC, 0, regSQ_CMD, value);
    5951           0 : }
    5952             : 
    5953             : static void
    5954           0 : gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
    5955             :                                       uint32_t me, uint32_t pipe,
    5956             :                                       enum amdgpu_interrupt_state state)
    5957             : {
    5958             :         uint32_t cp_int_cntl, cp_int_cntl_reg;
    5959             : 
    5960           0 :         if (!me) {
    5961           0 :                 switch (pipe) {
    5962             :                 case 0:
    5963           0 :                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
    5964           0 :                         break;
    5965             :                 case 1:
    5966           0 :                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
    5967           0 :                         break;
    5968             :                 default:
    5969           0 :                         DRM_DEBUG("invalid pipe %d\n", pipe);
    5970           0 :                         return;
    5971             :                 }
    5972             :         } else {
    5973           0 :                 DRM_DEBUG("invalid me %d\n", me);
    5974           0 :                 return;
    5975             :         }
    5976             : 
    5977           0 :         switch (state) {
    5978             :         case AMDGPU_IRQ_STATE_DISABLE:
    5979           0 :                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
    5980           0 :                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
    5981             :                                             TIME_STAMP_INT_ENABLE, 0);
    5982           0 :                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
    5983             :                                             GENERIC0_INT_ENABLE, 0);
    5984           0 :                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
    5985             :                 break;
    5986             :         case AMDGPU_IRQ_STATE_ENABLE:
    5987           0 :                 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
    5988           0 :                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
    5989             :                                             TIME_STAMP_INT_ENABLE, 1);
    5990           0 :                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
    5991             :                                             GENERIC0_INT_ENABLE, 1);
    5992           0 :                 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
    5993             :                 break;
    5994             :         default:
    5995             :                 break;
    5996             :         }
    5997             : }
    5998             : 
    5999           0 : static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
    6000             :                                                      int me, int pipe,
    6001             :                                                      enum amdgpu_interrupt_state state)
    6002             : {
    6003             :         u32 mec_int_cntl, mec_int_cntl_reg;
    6004             : 
    6005             :         /*
    6006             :          * amdgpu controls only the first MEC. That's why this function only
    6007             :          * handles the setting of interrupts for this specific MEC. All other
    6008             :          * pipes' interrupts are set by amdkfd.
    6009             :          */
    6010             : 
    6011           0 :         if (me == 1) {
    6012           0 :                 switch (pipe) {
    6013             :                 case 0:
    6014           0 :                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
    6015           0 :                         break;
    6016             :                 case 1:
    6017           0 :                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
    6018           0 :                         break;
    6019             :                 case 2:
    6020           0 :                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
    6021           0 :                         break;
    6022             :                 case 3:
    6023           0 :                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
    6024           0 :                         break;
    6025             :                 default:
    6026           0 :                         DRM_DEBUG("invalid pipe %d\n", pipe);
    6027           0 :                         return;
    6028             :                 }
    6029             :         } else {
    6030           0 :                 DRM_DEBUG("invalid me %d\n", me);
    6031           0 :                 return;
    6032             :         }
    6033             : 
    6034           0 :         switch (state) {
    6035             :         case AMDGPU_IRQ_STATE_DISABLE:
    6036           0 :                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
    6037           0 :                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
    6038             :                                              TIME_STAMP_INT_ENABLE, 0);
    6039           0 :                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
    6040             :                                              GENERIC0_INT_ENABLE, 0);
    6041           0 :                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
    6042             :                 break;
    6043             :         case AMDGPU_IRQ_STATE_ENABLE:
    6044           0 :                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
    6045           0 :                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
    6046             :                                              TIME_STAMP_INT_ENABLE, 1);
    6047           0 :                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
    6048             :                                              GENERIC0_INT_ENABLE, 1);
    6049           0 :                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
    6050             :                 break;
    6051             :         default:
    6052             :                 break;
    6053             :         }
    6054             : }
    6055             : 
    6056           0 : static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
    6057             :                                             struct amdgpu_irq_src *src,
    6058             :                                             unsigned type,
    6059             :                                             enum amdgpu_interrupt_state state)
    6060             : {
    6061           0 :         switch (type) {
    6062             :         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
    6063           0 :                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
    6064           0 :                 break;
    6065             :         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
    6066           0 :                 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
    6067           0 :                 break;
    6068             :         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
    6069           0 :                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
    6070           0 :                 break;
    6071             :         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
    6072           0 :                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
    6073           0 :                 break;
    6074             :         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
    6075           0 :                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
    6076           0 :                 break;
    6077             :         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
    6078           0 :                 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
    6079           0 :                 break;
    6080             :         default:
    6081             :                 break;
    6082             :         }
    6083           0 :         return 0;
    6084             : }
    6085             : 
    6086           0 : static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
    6087             :                              struct amdgpu_irq_src *source,
    6088             :                              struct amdgpu_iv_entry *entry)
    6089             : {
    6090             :         int i;
    6091             :         u8 me_id, pipe_id, queue_id;
    6092             :         struct amdgpu_ring *ring;
    6093           0 :         uint32_t mes_queue_id = entry->src_data[0];
    6094             : 
    6095           0 :         DRM_DEBUG("IH: CP EOP\n");
    6096             : 
    6097           0 :         if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
    6098             :                 struct amdgpu_mes_queue *queue;
    6099             : 
    6100           0 :                 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
    6101             : 
    6102           0 :                 spin_lock(&adev->mes.queue_id_lock);
    6103           0 :                 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
    6104           0 :                 if (queue) {
    6105           0 :                         DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
    6106           0 :                         amdgpu_fence_process(queue->ring);
    6107             :                 }
    6108           0 :                 spin_unlock(&adev->mes.queue_id_lock);
    6109             :         } else {
    6110           0 :                 me_id = (entry->ring_id & 0x0c) >> 2;
    6111           0 :                 pipe_id = (entry->ring_id & 0x03) >> 0;
    6112           0 :                 queue_id = (entry->ring_id & 0x70) >> 4;
    6113             : 
    6114           0 :                 switch (me_id) {
    6115             :                 case 0:
    6116           0 :                         if (pipe_id == 0)
    6117           0 :                                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
    6118             :                         else
    6119           0 :                                 amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
    6120             :                         break;
    6121             :                 case 1:
    6122             :                 case 2:
    6123           0 :                         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    6124           0 :                                 ring = &adev->gfx.compute_ring[i];
    6125             :                                 /* Per-queue interrupt is supported for MEC starting from VI.
    6126             :                                  * The interrupt can only be enabled/disabled per pipe instead
    6127             :                                  * of per queue.
    6128             :                                  */
    6129           0 :                                 if ((ring->me == me_id) &&
    6130           0 :                                     (ring->pipe == pipe_id) &&
    6131           0 :                                     (ring->queue == queue_id))
    6132           0 :                                         amdgpu_fence_process(ring);
    6133             :                         }
    6134             :                         break;
    6135             :                 }
    6136             :         }
    6137             : 
    6138           0 :         return 0;
    6139             : }
    6140             : 
    6141           0 : static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
    6142             :                                               struct amdgpu_irq_src *source,
    6143             :                                               unsigned type,
    6144             :                                               enum amdgpu_interrupt_state state)
    6145             : {
    6146           0 :         switch (state) {
    6147             :         case AMDGPU_IRQ_STATE_DISABLE:
    6148             :         case AMDGPU_IRQ_STATE_ENABLE:
    6149           0 :                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
    6150             :                                PRIV_REG_INT_ENABLE,
    6151             :                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
    6152             :                 break;
    6153             :         default:
    6154             :                 break;
    6155             :         }
    6156             : 
    6157           0 :         return 0;
    6158             : }
    6159             : 
    6160           0 : static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
    6161             :                                                struct amdgpu_irq_src *source,
    6162             :                                                unsigned type,
    6163             :                                                enum amdgpu_interrupt_state state)
    6164             : {
    6165           0 :         switch (state) {
    6166             :         case AMDGPU_IRQ_STATE_DISABLE:
    6167             :         case AMDGPU_IRQ_STATE_ENABLE:
    6168           0 :                 WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
    6169             :                                PRIV_INSTR_INT_ENABLE,
    6170             :                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
    6171             :                 break;
    6172             :         default:
    6173             :                 break;
    6174             :         }
    6175             : 
    6176           0 :         return 0;
    6177             : }
    6178             : 
    6179           0 : static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
    6180             :                                         struct amdgpu_iv_entry *entry)
    6181             : {
    6182             :         u8 me_id, pipe_id, queue_id;
    6183             :         struct amdgpu_ring *ring;
    6184             :         int i;
    6185             : 
    6186           0 :         me_id = (entry->ring_id & 0x0c) >> 2;
    6187           0 :         pipe_id = (entry->ring_id & 0x03) >> 0;
    6188           0 :         queue_id = (entry->ring_id & 0x70) >> 4;
    6189             : 
    6190           0 :         switch (me_id) {
    6191             :         case 0:
    6192           0 :                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
    6193           0 :                         ring = &adev->gfx.gfx_ring[i];
    6194             :                         /* we only enabled 1 gfx queue per pipe for now */
    6195           0 :                         if (ring->me == me_id && ring->pipe == pipe_id)
    6196           0 :                                 drm_sched_fault(&ring->sched);
    6197             :                 }
    6198             :                 break;
    6199             :         case 1:
    6200             :         case 2:
    6201           0 :                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
    6202           0 :                         ring = &adev->gfx.compute_ring[i];
    6203           0 :                         if (ring->me == me_id && ring->pipe == pipe_id &&
    6204           0 :                             ring->queue == queue_id)
    6205           0 :                                 drm_sched_fault(&ring->sched);
    6206             :                 }
    6207             :                 break;
    6208             :         default:
    6209           0 :                 BUG();
    6210             :                 break;
    6211             :         }
    6212           0 : }
    6213             : 
    6214           0 : static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
    6215             :                                   struct amdgpu_irq_src *source,
    6216             :                                   struct amdgpu_iv_entry *entry)
    6217             : {
    6218           0 :         DRM_ERROR("Illegal register access in command stream\n");
    6219           0 :         gfx_v11_0_handle_priv_fault(adev, entry);
    6220           0 :         return 0;
    6221             : }
    6222             : 
    6223           0 : static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
    6224             :                                    struct amdgpu_irq_src *source,
    6225             :                                    struct amdgpu_iv_entry *entry)
    6226             : {
    6227           0 :         DRM_ERROR("Illegal instruction in command stream\n");
    6228           0 :         gfx_v11_0_handle_priv_fault(adev, entry);
    6229           0 :         return 0;
    6230             : }
    6231             : 
    6232             : #if 0
    6233             : static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
    6234             :                                              struct amdgpu_irq_src *src,
    6235             :                                              unsigned int type,
    6236             :                                              enum amdgpu_interrupt_state state)
    6237             : {
    6238             :         uint32_t tmp, target;
    6239             :         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
    6240             : 
    6241             :         target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
    6242             :         target += ring->pipe;
    6243             : 
    6244             :         switch (type) {
    6245             :         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
    6246             :                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
    6247             :                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
    6248             :                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
    6249             :                                             GENERIC2_INT_ENABLE, 0);
    6250             :                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
    6251             : 
    6252             :                         tmp = RREG32_SOC15_IP(GC, target);
    6253             :                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
    6254             :                                             GENERIC2_INT_ENABLE, 0);
    6255             :                         WREG32_SOC15_IP(GC, target, tmp);
    6256             :                 } else {
    6257             :                         tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
    6258             :                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
    6259             :                                             GENERIC2_INT_ENABLE, 1);
    6260             :                         WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
    6261             : 
    6262             :                         tmp = RREG32_SOC15_IP(GC, target);
    6263             :                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
    6264             :                                             GENERIC2_INT_ENABLE, 1);
    6265             :                         WREG32_SOC15_IP(GC, target, tmp);
    6266             :                 }
    6267             :                 break;
    6268             :         default:
    6269             :                 BUG(); /* kiq only support GENERIC2_INT now */
    6270             :                 break;
    6271             :         }
    6272             :         return 0;
    6273             : }
    6274             : #endif
    6275             : 
    6276           0 : static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
    6277             : {
    6278           0 :         const unsigned int gcr_cntl =
    6279             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
    6280             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
    6281             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
    6282             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
    6283             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
    6284             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
    6285             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
    6286             :                         PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
    6287             : 
    6288             :         /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
    6289           0 :         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
    6290           0 :         amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
    6291           0 :         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
    6292           0 :         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
    6293           0 :         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
    6294           0 :         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
    6295           0 :         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
    6296           0 :         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
    6297           0 : }
    6298             : 
    6299             : static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
    6300             :         .name = "gfx_v11_0",
    6301             :         .early_init = gfx_v11_0_early_init,
    6302             :         .late_init = gfx_v11_0_late_init,
    6303             :         .sw_init = gfx_v11_0_sw_init,
    6304             :         .sw_fini = gfx_v11_0_sw_fini,
    6305             :         .hw_init = gfx_v11_0_hw_init,
    6306             :         .hw_fini = gfx_v11_0_hw_fini,
    6307             :         .suspend = gfx_v11_0_suspend,
    6308             :         .resume = gfx_v11_0_resume,
    6309             :         .is_idle = gfx_v11_0_is_idle,
    6310             :         .wait_for_idle = gfx_v11_0_wait_for_idle,
    6311             :         .soft_reset = gfx_v11_0_soft_reset,
    6312             :         .check_soft_reset = gfx_v11_0_check_soft_reset,
    6313             :         .set_clockgating_state = gfx_v11_0_set_clockgating_state,
    6314             :         .set_powergating_state = gfx_v11_0_set_powergating_state,
    6315             :         .get_clockgating_state = gfx_v11_0_get_clockgating_state,
    6316             : };
    6317             : 
    6318             : static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
    6319             :         .type = AMDGPU_RING_TYPE_GFX,
    6320             :         .align_mask = 0xff,
    6321             :         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
    6322             :         .support_64bit_ptrs = true,
    6323             :         .vmhub = AMDGPU_GFXHUB_0,
    6324             :         .get_rptr = gfx_v11_0_ring_get_rptr_gfx,
    6325             :         .get_wptr = gfx_v11_0_ring_get_wptr_gfx,
    6326             :         .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
    6327             :         .emit_frame_size = /* totally 242 maximum if 16 IBs */
    6328             :                 5 + /* COND_EXEC */
    6329             :                 7 + /* PIPELINE_SYNC */
    6330             :                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
    6331             :                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
    6332             :                 2 + /* VM_FLUSH */
    6333             :                 8 + /* FENCE for VM_FLUSH */
    6334             :                 20 + /* GDS switch */
    6335             :                 5 + /* COND_EXEC */
    6336             :                 7 + /* HDP_flush */
    6337             :                 4 + /* VGT_flush */
    6338             :                 31 + /* DE_META */
    6339             :                 3 + /* CNTX_CTRL */
    6340             :                 5 + /* HDP_INVL */
    6341             :                 8 + 8 + /* FENCE x2 */
    6342             :                 8, /* gfx_v11_0_emit_mem_sync */
    6343             :         .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
    6344             :         .emit_ib = gfx_v11_0_ring_emit_ib_gfx,
    6345             :         .emit_fence = gfx_v11_0_ring_emit_fence,
    6346             :         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
    6347             :         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
    6348             :         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
    6349             :         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
    6350             :         .test_ring = gfx_v11_0_ring_test_ring,
    6351             :         .test_ib = gfx_v11_0_ring_test_ib,
    6352             :         .insert_nop = amdgpu_ring_insert_nop,
    6353             :         .pad_ib = amdgpu_ring_generic_pad_ib,
    6354             :         .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
    6355             :         .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
    6356             :         .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
    6357             :         .preempt_ib = gfx_v11_0_ring_preempt_ib,
    6358             :         .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
    6359             :         .emit_wreg = gfx_v11_0_ring_emit_wreg,
    6360             :         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
    6361             :         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
    6362             :         .soft_recovery = gfx_v11_0_ring_soft_recovery,
    6363             :         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
    6364             : };
    6365             : 
    6366             : static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
    6367             :         .type = AMDGPU_RING_TYPE_COMPUTE,
    6368             :         .align_mask = 0xff,
    6369             :         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
    6370             :         .support_64bit_ptrs = true,
    6371             :         .vmhub = AMDGPU_GFXHUB_0,
    6372             :         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
    6373             :         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
    6374             :         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
    6375             :         .emit_frame_size =
    6376             :                 20 + /* gfx_v11_0_ring_emit_gds_switch */
    6377             :                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
    6378             :                 5 + /* hdp invalidate */
    6379             :                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
    6380             :                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
    6381             :                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
    6382             :                 2 + /* gfx_v11_0_ring_emit_vm_flush */
    6383             :                 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
    6384             :                 8, /* gfx_v11_0_emit_mem_sync */
    6385             :         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
    6386             :         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
    6387             :         .emit_fence = gfx_v11_0_ring_emit_fence,
    6388             :         .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
    6389             :         .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
    6390             :         .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
    6391             :         .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
    6392             :         .test_ring = gfx_v11_0_ring_test_ring,
    6393             :         .test_ib = gfx_v11_0_ring_test_ib,
    6394             :         .insert_nop = amdgpu_ring_insert_nop,
    6395             :         .pad_ib = amdgpu_ring_generic_pad_ib,
    6396             :         .emit_wreg = gfx_v11_0_ring_emit_wreg,
    6397             :         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
    6398             :         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
    6399             :         .emit_mem_sync = gfx_v11_0_emit_mem_sync,
    6400             : };
    6401             : 
    6402             : static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
    6403             :         .type = AMDGPU_RING_TYPE_KIQ,
    6404             :         .align_mask = 0xff,
    6405             :         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
    6406             :         .support_64bit_ptrs = true,
    6407             :         .vmhub = AMDGPU_GFXHUB_0,
    6408             :         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
    6409             :         .get_wptr = gfx_v11_0_ring_get_wptr_compute,
    6410             :         .set_wptr = gfx_v11_0_ring_set_wptr_compute,
    6411             :         .emit_frame_size =
    6412             :                 20 + /* gfx_v11_0_ring_emit_gds_switch */
    6413             :                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
    6414             :                 5 + /*hdp invalidate */
    6415             :                 7 + /* gfx_v11_0_ring_emit_pipeline_sync */
    6416             :                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
    6417             :                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
    6418             :                 2 + /* gfx_v11_0_ring_emit_vm_flush */
    6419             :                 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
    6420             :         .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
    6421             :         .emit_ib = gfx_v11_0_ring_emit_ib_compute,
    6422             :         .emit_fence = gfx_v11_0_ring_emit_fence_kiq,
    6423             :         .test_ring = gfx_v11_0_ring_test_ring,
    6424             :         .test_ib = gfx_v11_0_ring_test_ib,
    6425             :         .insert_nop = amdgpu_ring_insert_nop,
    6426             :         .pad_ib = amdgpu_ring_generic_pad_ib,
    6427             :         .emit_rreg = gfx_v11_0_ring_emit_rreg,
    6428             :         .emit_wreg = gfx_v11_0_ring_emit_wreg,
    6429             :         .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
    6430             :         .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
    6431             : };
    6432             : 
    6433             : static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
    6434             : {
    6435             :         int i;
    6436             : 
    6437           0 :         adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq;
    6438             : 
    6439           0 :         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
    6440           0 :                 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
    6441             : 
    6442           0 :         for (i = 0; i < adev->gfx.num_compute_rings; i++)
    6443           0 :                 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
    6444             : }
    6445             : 
    6446             : static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
    6447             :         .set = gfx_v11_0_set_eop_interrupt_state,
    6448             :         .process = gfx_v11_0_eop_irq,
    6449             : };
    6450             : 
    6451             : static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
    6452             :         .set = gfx_v11_0_set_priv_reg_fault_state,
    6453             :         .process = gfx_v11_0_priv_reg_irq,
    6454             : };
    6455             : 
    6456             : static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
    6457             :         .set = gfx_v11_0_set_priv_inst_fault_state,
    6458             :         .process = gfx_v11_0_priv_inst_irq,
    6459             : };
    6460             : 
    6461             : static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
    6462             : {
    6463           0 :         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
    6464           0 :         adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
    6465             : 
    6466           0 :         adev->gfx.priv_reg_irq.num_types = 1;
    6467           0 :         adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
    6468             : 
    6469           0 :         adev->gfx.priv_inst_irq.num_types = 1;
    6470           0 :         adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
    6471             : }
    6472             : 
    6473             : static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
    6474             : {
    6475           0 :         if (adev->flags & AMD_IS_APU)
    6476           0 :                 adev->gfx.imu.mode = MISSION_MODE;
    6477             :         else
    6478           0 :                 adev->gfx.imu.mode = DEBUG_MODE;
    6479             : 
    6480           0 :         adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
    6481             : }
    6482             : 
    6483             : static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
    6484             : {
    6485           0 :         adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
    6486             : }
    6487             : 
    6488             : static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
    6489             : {
    6490           0 :         unsigned total_cu = adev->gfx.config.max_cu_per_sh *
    6491           0 :                             adev->gfx.config.max_sh_per_se *
    6492           0 :                             adev->gfx.config.max_shader_engines;
    6493             : 
    6494           0 :         adev->gds.gds_size = 0x1000;
    6495           0 :         adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
    6496           0 :         adev->gds.gws_size = 64;
    6497           0 :         adev->gds.oa_size = 16;
    6498             : }
    6499             : 
    6500             : static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
    6501             : {
    6502             :         /* set gfx eng mqd */
    6503           0 :         adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
    6504             :                 sizeof(struct v11_gfx_mqd);
    6505           0 :         adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
    6506             :                 gfx_v11_0_gfx_mqd_init;
    6507             :         /* set compute eng mqd */
    6508           0 :         adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
    6509             :                 sizeof(struct v11_compute_mqd);
    6510           0 :         adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
    6511             :                 gfx_v11_0_compute_mqd_init;
    6512             : }
    6513             : 
    6514           0 : static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
    6515             :                                                           u32 bitmap)
    6516             : {
    6517             :         u32 data;
    6518             : 
    6519           0 :         if (!bitmap)
    6520             :                 return;
    6521             : 
    6522           0 :         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
    6523           0 :         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
    6524             : 
    6525           0 :         WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
    6526             : }
    6527             : 
    6528           0 : static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
    6529             : {
    6530             :         u32 data, wgp_bitmask;
    6531           0 :         data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
    6532           0 :         data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
    6533             : 
    6534           0 :         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
    6535           0 :         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
    6536             : 
    6537           0 :         wgp_bitmask =
    6538           0 :                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
    6539             : 
    6540           0 :         return (~data) & wgp_bitmask;
    6541             : }
    6542             : 
    6543           0 : static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
    6544             : {
    6545             :         u32 wgp_idx, wgp_active_bitmap;
    6546             :         u32 cu_bitmap_per_wgp, cu_active_bitmap;
    6547             : 
    6548           0 :         wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
    6549           0 :         cu_active_bitmap = 0;
    6550             : 
    6551           0 :         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
    6552             :                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
    6553           0 :                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
    6554           0 :                 if (wgp_active_bitmap & (1 << wgp_idx))
    6555           0 :                         cu_active_bitmap |= cu_bitmap_per_wgp;
    6556             :         }
    6557             : 
    6558           0 :         return cu_active_bitmap;
    6559             : }
    6560             : 
    6561           0 : static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
    6562             :                                  struct amdgpu_cu_info *cu_info)
    6563             : {
    6564           0 :         int i, j, k, counter, active_cu_number = 0;
    6565             :         u32 mask, bitmap;
    6566             :         unsigned disable_masks[8 * 2];
    6567             : 
    6568           0 :         if (!adev || !cu_info)
    6569             :                 return -EINVAL;
    6570             : 
    6571           0 :         amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
    6572             : 
    6573           0 :         mutex_lock(&adev->grbm_idx_mutex);
    6574           0 :         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
    6575           0 :                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
    6576           0 :                         mask = 1;
    6577           0 :                         counter = 0;
    6578           0 :                         gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
    6579           0 :                         if (i < 8 && j < 2)
    6580           0 :                                 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
    6581           0 :                                         adev, disable_masks[i * 2 + j]);
    6582           0 :                         bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
    6583             : 
    6584             :                         /**
    6585             :                          * GFX11 could support more than 4 SEs, while the bitmap
    6586             :                          * in cu_info struct is 4x4 and ioctl interface struct
    6587             :                          * drm_amdgpu_info_device should keep stable.
    6588             :                          * So we use last two columns of bitmap to store cu mask for
    6589             :                          * SEs 4 to 7, the layout of the bitmap is as below:
    6590             :                          *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
    6591             :                          *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
    6592             :                          *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
    6593             :                          *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
    6594             :                          *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
    6595             :                          *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
    6596             :                          *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
    6597             :                          *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
    6598             :                          */
    6599           0 :                         cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
    6600             : 
    6601           0 :                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
    6602           0 :                                 if (bitmap & mask)
    6603           0 :                                         counter++;
    6604             : 
    6605           0 :                                 mask <<= 1;
    6606             :                         }
    6607           0 :                         active_cu_number += counter;
    6608             :                 }
    6609             :         }
    6610           0 :         gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
    6611           0 :         mutex_unlock(&adev->grbm_idx_mutex);
    6612             : 
    6613           0 :         cu_info->number = active_cu_number;
    6614           0 :         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
    6615             : 
    6616           0 :         return 0;
    6617             : }
    6618             : 
    6619             : const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
    6620             : {
    6621             :         .type = AMD_IP_BLOCK_TYPE_GFX,
    6622             :         .major = 11,
    6623             :         .minor = 0,
    6624             :         .rev = 0,
    6625             :         .funcs = &gfx_v11_0_ip_funcs,
    6626             : };

Generated by: LCOV version 1.14