LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - umc_v8_10.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 115 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 9 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2022 Advanced Micro Devices, Inc.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included in
      12             :  * all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      17             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      18             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      19             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      20             :  * OTHER DEALINGS IN THE SOFTWARE.
      21             :  *
      22             :  */
      23             : #include "umc_v8_10.h"
      24             : #include "amdgpu_ras.h"
      25             : #include "amdgpu_umc.h"
      26             : #include "amdgpu.h"
      27             : #include "umc/umc_8_10_0_offset.h"
      28             : #include "umc/umc_8_10_0_sh_mask.h"
      29             : 
      30             : #define UMC_8_NODE_DIST   0x800000
      31             : #define UMC_8_INST_DIST   0x4000
      32             : 
      33             : struct channelnum_map_colbit {
      34             :         uint32_t channel_num;
      35             :         uint32_t col_bit;
      36             : };
      37             : 
      38             : const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
      39             :         {24, 13},
      40             :         {20, 13},
      41             :         {16, 12},
      42             :         {14, 12},
      43             :         {12, 12},
      44             :         {10, 12},
      45             :         {6,  11},
      46             : };
      47             : 
      48             : const uint32_t
      49             :         umc_v8_10_channel_idx_tbl[]
      50             :                                 [UMC_V8_10_UMC_INSTANCE_NUM]
      51             :                                 [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
      52             :            {{16, 18}, {17, 19}},
      53             :            {{15, 11}, {3,   7}},
      54             :            {{1,   5}, {13,  9}},
      55             :            {{23, 21}, {22, 20}},
      56             :            {{0,   4}, {12,  8}},
      57             :            {{14, 10}, {2,   6}}
      58             :         };
      59             : 
      60             : static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
      61             :                                             uint32_t node_inst,
      62             :                                             uint32_t umc_inst,
      63             :                                             uint32_t ch_inst)
      64             : {
      65           0 :         return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
      66           0 :                 UMC_8_NODE_DIST * node_inst;
      67             : }
      68             : 
      69             : static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
      70             :                                         uint32_t umc_reg_offset)
      71             : {
      72             :         uint32_t ecc_err_cnt_addr;
      73             : 
      74           0 :         ecc_err_cnt_addr =
      75           0 :                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
      76             : 
      77             :         /* clear error count */
      78           0 :         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
      79             :                         UMC_V8_10_CE_CNT_INIT);
      80             : }
      81             : 
      82           0 : static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
      83             : {
      84           0 :         uint32_t node_inst       = 0;
      85           0 :         uint32_t umc_inst        = 0;
      86           0 :         uint32_t ch_inst         = 0;
      87           0 :         uint32_t umc_reg_offset  = 0;
      88             : 
      89           0 :         LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
      90           0 :                 umc_reg_offset = get_umc_v8_10_reg_offset(adev,
      91             :                                                 node_inst,
      92             :                                                 umc_inst,
      93             :                                                 ch_inst);
      94             : 
      95           0 :                 umc_v8_10_clear_error_count_per_channel(adev,
      96             :                                                 umc_reg_offset);
      97             :         }
      98           0 : }
      99             : 
     100           0 : static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
     101             :                                                    uint32_t umc_reg_offset,
     102             :                                                    unsigned long *error_count)
     103             : {
     104             :         uint64_t mc_umc_status;
     105             :         uint32_t mc_umc_status_addr;
     106             : 
     107             :         /* UMC 8_10 registers */
     108           0 :         mc_umc_status_addr =
     109           0 :                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
     110             : 
     111             :         /* Rely on MCUMC_STATUS for correctable error counter
     112             :          * MCUMC_STATUS is a 64 bit register
     113             :          */
     114           0 :         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
     115           0 :         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
     116           0 :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
     117           0 :                 *error_count += 1;
     118           0 : }
     119             : 
     120           0 : static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
     121             :                                                       uint32_t umc_reg_offset,
     122             :                                                       unsigned long *error_count)
     123             : {
     124             :         uint64_t mc_umc_status;
     125             :         uint32_t mc_umc_status_addr;
     126             : 
     127           0 :         mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
     128             : 
     129             :         /* Check the MCUMC_STATUS. */
     130           0 :         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
     131           0 :         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
     132             :             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
     133             :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
     134             :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
     135           0 :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
     136             :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
     137           0 :                 *error_count += 1;
     138           0 : }
     139             : 
     140           0 : static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
     141             :                                            void *ras_error_status)
     142             : {
     143           0 :         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
     144             : 
     145           0 :         uint32_t node_inst       = 0;
     146           0 :         uint32_t umc_inst        = 0;
     147           0 :         uint32_t ch_inst         = 0;
     148           0 :         uint32_t umc_reg_offset  = 0;
     149             : 
     150           0 :         LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
     151           0 :                 umc_reg_offset = get_umc_v8_10_reg_offset(adev,
     152             :                                                 node_inst,
     153             :                                                 umc_inst,
     154             :                                                 ch_inst);
     155             : 
     156           0 :                 umc_v8_10_query_correctable_error_count(adev,
     157             :                                                 umc_reg_offset,
     158             :                                                 &(err_data->ce_count));
     159           0 :                 umc_v8_10_query_uncorrectable_error_count(adev,
     160             :                                                 umc_reg_offset,
     161             :                                                 &(err_data->ue_count));
     162             :         }
     163             : 
     164           0 :         umc_v8_10_clear_error_count(adev);
     165           0 : }
     166             : 
     167             : static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
     168             : {
     169           0 :         uint32_t t = 0;
     170             : 
     171           0 :         for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
     172           0 :                 if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
     173           0 :                         return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
     174             : 
     175             :         /* Failed to get col_bit. */
     176             :         return U32_MAX;
     177             : }
     178             : 
     179             : /*
     180             :  * Mapping normal address to soc physical address in swizzle mode.
     181             :  */
     182           0 : static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
     183             :                                         uint32_t channel_idx,
     184             :                                         uint64_t na, uint64_t *soc_pa)
     185             : {
     186           0 :         uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
     187           0 :         uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
     188             :         uint64_t tmp_addr;
     189             : 
     190           0 :         if (col_bit == U32_MAX)
     191             :                 return -1;
     192             : 
     193           0 :         tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
     194           0 :         *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
     195           0 :                 SWIZZLE_MODE_ADDR_MID(na, col_bit) |
     196           0 :                 SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
     197           0 :                 SWIZZLE_MODE_ADDR_LSB(na);
     198             : 
     199             :         return 0;
     200             : }
     201             : 
     202           0 : static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
     203             :                                          struct ras_err_data *err_data,
     204             :                                          uint32_t umc_reg_offset,
     205             :                                          uint32_t node_inst,
     206             :                                          uint32_t ch_inst,
     207             :                                          uint32_t umc_inst)
     208             : {
     209             :         uint64_t mc_umc_status_addr;
     210             :         uint64_t mc_umc_status, err_addr;
     211             :         uint32_t channel_index;
     212             : 
     213           0 :         mc_umc_status_addr =
     214           0 :                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
     215           0 :         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
     216             : 
     217           0 :         if (mc_umc_status == 0)
     218             :                 return;
     219             : 
     220           0 :         if (!err_data->err_addr) {
     221             :                 /* clear umc status */
     222           0 :                 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
     223           0 :                 return;
     224             :         }
     225             : 
     226           0 :         channel_index =
     227           0 :                 adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
     228           0 :                                         adev->umc.channel_inst_num +
     229           0 :                                         umc_inst * adev->umc.channel_inst_num +
     230             :                                         ch_inst];
     231             : 
     232             :         /* calculate error address if ue/ce error is detected */
     233           0 :         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
     234           0 :             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
     235           0 :             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
     236             :              REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
     237             :                 uint32_t addr_lsb;
     238             :                 uint64_t mc_umc_addrt0;
     239             : 
     240           0 :                 mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
     241           0 :                 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
     242           0 :                 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
     243             : 
     244             :                 /* the lowest lsb bits should be ignored */
     245           0 :                 addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
     246             : 
     247           0 :                 err_addr &= ~((0x1ULL << addr_lsb) - 1);
     248             : 
     249             :                 /* we only save ue error information currently, ce is skipped */
     250           0 :                 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
     251           0 :                         uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
     252             :                         uint64_t na_err_addr, retired_page_addr;
     253           0 :                         uint32_t col = 0;
     254           0 :                         int ret = 0;
     255             : 
     256             :                         /* loop for all possibilities of [C6 C5] in normal address. */
     257           0 :                         for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
     258           0 :                                 na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
     259             : 
     260             :                                 /* Mapping normal error address to retired soc physical address. */
     261           0 :                                 ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
     262             :                                                                 na_err_addr, &retired_page_addr);
     263           0 :                                 if (ret) {
     264           0 :                                         dev_err(adev->dev, "Failed to map pa from umc na.\n");
     265           0 :                                         break;
     266             :                                 }
     267           0 :                                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
     268             :                                         retired_page_addr);
     269           0 :                                 amdgpu_umc_fill_error_record(err_data, na_err_addr,
     270             :                                                 retired_page_addr, channel_index, umc_inst);
     271             :                         }
     272             :                 }
     273             :         }
     274             : 
     275             :         /* clear umc status */
     276           0 :         WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
     277             : }
     278             : 
     279           0 : static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
     280             :                                              void *ras_error_status)
     281             : {
     282           0 :         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
     283           0 :         uint32_t node_inst       = 0;
     284           0 :         uint32_t umc_inst        = 0;
     285           0 :         uint32_t ch_inst         = 0;
     286           0 :         uint32_t umc_reg_offset  = 0;
     287             : 
     288           0 :         LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
     289           0 :                 umc_reg_offset = get_umc_v8_10_reg_offset(adev,
     290             :                                                 node_inst,
     291             :                                                 umc_inst,
     292             :                                                 ch_inst);
     293             : 
     294           0 :                 umc_v8_10_query_error_address(adev,
     295             :                                         err_data,
     296             :                                         umc_reg_offset,
     297             :                                         node_inst,
     298             :                                         ch_inst,
     299             :                                         umc_inst);
     300             :         }
     301           0 : }
     302             : 
     303           0 : static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
     304             :                                               uint32_t umc_reg_offset)
     305             : {
     306             :         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
     307             :         uint32_t ecc_err_cnt_addr;
     308             : 
     309           0 :         ecc_err_cnt_sel_addr =
     310           0 :                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
     311           0 :         ecc_err_cnt_addr =
     312             :                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
     313             : 
     314           0 :         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
     315             : 
     316             :         /* set ce error interrupt type to APIC based interrupt */
     317           0 :         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
     318             :                                         GeccErrInt, 0x1);
     319           0 :         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
     320             :         /* set error count to initial value */
     321           0 :         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
     322           0 : }
     323             : 
     324           0 : static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
     325             : {
     326           0 :         uint32_t node_inst       = 0;
     327           0 :         uint32_t umc_inst        = 0;
     328           0 :         uint32_t ch_inst         = 0;
     329           0 :         uint32_t umc_reg_offset  = 0;
     330             : 
     331           0 :         LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
     332           0 :                 umc_reg_offset = get_umc_v8_10_reg_offset(adev,
     333             :                                                 node_inst,
     334             :                                                 umc_inst,
     335             :                                                 ch_inst);
     336             : 
     337           0 :                 umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
     338             :         }
     339           0 : }
     340             : 
     341             : const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
     342             :         .query_ras_error_count = umc_v8_10_query_ras_error_count,
     343             :         .query_ras_error_address = umc_v8_10_query_ras_error_address,
     344             : };
     345             : 
     346             : struct amdgpu_umc_ras umc_v8_10_ras = {
     347             :         .ras_block = {
     348             :                 .hw_ops = &umc_v8_10_ras_hw_ops,
     349             :         },
     350             :         .err_cnt_init = umc_v8_10_err_cnt_init,
     351             : };

Generated by: LCOV version 1.14