LCOV - code coverage report
Current view: top level - drivers/gpu/drm/amd/amdgpu - sdma_v4_4.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 49 0.0 %
Date: 2022-12-09 01:23:36 Functions: 0 5 0.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2020 Advanced Micro Devices, Inc.
       3             :  *
       4             :  * Permission is hereby granted, free of charge, to any person obtaining a
       5             :  * copy of this software and associated documentation files (the "Software"),
       6             :  * to deal in the Software without restriction, including without limitation
       7             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
       8             :  * and/or sell copies of the Software, and to permit persons to whom the
       9             :  * Software is furnished to do so, subject to the following conditions:
      10             :  *
      11             :  * The above copyright notice and this permission notice shall be included in
      12             :  * all copies or substantial portions of the Software.
      13             :  *
      14             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      17             :  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
      18             :  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      19             :  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
      20             :  * OTHER DEALINGS IN THE SOFTWARE.
      21             :  *
      22             :  */
      23             : #include "amdgpu.h"
      24             : #include "sdma/sdma_4_4_0_offset.h"
      25             : #include "sdma/sdma_4_4_0_sh_mask.h"
      26             : #include "soc15.h"
      27             : #include "amdgpu_ras.h"
      28             : 
      29             : #define SDMA1_REG_OFFSET 0x600
      30             : #define SDMA2_REG_OFFSET 0x1cda0
      31             : #define SDMA3_REG_OFFSET 0x1d1a0
      32             : #define SDMA4_REG_OFFSET 0x1d5a0
      33             : 
      34             : /* helper function that allow only use sdma0 register offset
      35             :  * to calculate register offset for all the sdma instances */
      36           0 : static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
      37             :                                          uint32_t instance,
      38             :                                          uint32_t offset)
      39             : {
      40           0 :         uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
      41             : 
      42           0 :         switch (instance) {
      43             :         case 0:
      44           0 :                 return (sdma_base + offset);
      45             :         case 1:
      46           0 :                 return (sdma_base + SDMA1_REG_OFFSET + offset);
      47             :         case 2:
      48           0 :                 return (sdma_base + SDMA2_REG_OFFSET + offset);
      49             :         case 3:
      50           0 :                 return (sdma_base + SDMA3_REG_OFFSET + offset);
      51             :         case 4:
      52           0 :                 return (sdma_base + SDMA4_REG_OFFSET + offset);
      53             :         default:
      54             :                 break;
      55             :         }
      56             :         return 0;
      57             : }
      58             : 
      59             : static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
      60             :         { "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      61             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
      62             :         0, 0,
      63             :         },
      64             :         { "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      65             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
      66             :         0, 0,
      67             :         },
      68             :         { "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      69             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
      70             :         0, 0,
      71             :         },
      72             :         { "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      73             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
      74             :         0, 0,
      75             :         },
      76             :         { "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      77             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
      78             :         0, 0,
      79             :         },
      80             :         { "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      81             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
      82             :         0, 0,
      83             :         },
      84             :         { "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      85             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
      86             :         0, 0,
      87             :         },
      88             :         { "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      89             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
      90             :         0, 0,
      91             :         },
      92             :         { "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      93             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
      94             :         0, 0,
      95             :         },
      96             :         { "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
      97             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
      98             :         0, 0,
      99             :         },
     100             :         { "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     101             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
     102             :         0, 0,
     103             :         },
     104             :         { "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     105             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
     106             :         0, 0,
     107             :         },
     108             :         { "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     109             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
     110             :         0, 0,
     111             :         },
     112             :         { "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     113             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
     114             :         0, 0,
     115             :         },
     116             :         { "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     117             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
     118             :         0, 0,
     119             :         },
     120             :         { "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
     121             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
     122             :         0, 0,
     123             :         },
     124             :         { "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     125             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
     126             :         0, 0,
     127             :         },
     128             :         { "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     129             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
     130             :         0, 0,
     131             :         },
     132             :         { "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     133             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
     134             :         0, 0,
     135             :         },
     136             :         { "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     137             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
     138             :         0, 0,
     139             :         },
     140             :         { "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     141             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
     142             :         0, 0,
     143             :         },
     144             :         { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     145             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
     146             :         0, 0,
     147             :         },
     148             :         { "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     149             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
     150             :         0, 0,
     151             :         },
     152             :         { "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     153             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
     154             :         0, 0,
     155             :         },
     156             :         { "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
     157             :         SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
     158             :         0, 0,
     159             :         },
     160             : };
     161             : 
     162           0 : static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
     163             :                                           uint32_t reg_offset,
     164             :                                           uint32_t value,
     165             :                                           uint32_t instance,
     166             :                                           uint32_t *sec_count)
     167             : {
     168             :         uint32_t i;
     169             :         uint32_t sec_cnt;
     170             : 
     171             :         /* double bits error (multiple bits) error detection is not supported */
     172           0 :         for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
     173           0 :                 if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
     174           0 :                         continue;
     175             : 
     176             :                 /* the SDMA_EDC_COUNTER register in each sdma instance
     177             :                  * shares the same sed shift_mask
     178             :                  * */
     179           0 :                 sec_cnt = (value &
     180           0 :                         sdma_v4_4_ras_fields[i].sec_count_mask) >>
     181           0 :                         sdma_v4_4_ras_fields[i].sec_count_shift;
     182           0 :                 if (sec_cnt) {
     183           0 :                         dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
     184             :                                  sdma_v4_4_ras_fields[i].name,
     185             :                                  instance, sec_cnt);
     186           0 :                         *sec_count += sec_cnt;
     187             :                 }
     188             :         }
     189           0 : }
     190             : 
     191           0 : static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
     192             :                                            uint32_t instance,
     193             :                                            void *ras_error_status)
     194             : {
     195           0 :         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
     196           0 :         uint32_t sec_count = 0;
     197           0 :         uint32_t reg_value = 0;
     198           0 :         uint32_t reg_offset = 0;
     199             : 
     200           0 :         reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
     201           0 :         reg_value = RREG32(reg_offset);
     202             :         /* double bit error is not supported */
     203           0 :         if (reg_value)
     204           0 :                 sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
     205             :                                               instance, &sec_count);
     206             : 
     207           0 :         reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
     208           0 :         reg_value = RREG32(reg_offset);
     209             :         /* double bit error is not supported */
     210           0 :         if (reg_value)
     211           0 :                 sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
     212             :                                               instance, &sec_count);
     213             : 
     214             :         /*
     215             :          * err_data->ue_count should be initialized to 0
     216             :          * before calling into this function
     217             :          *
     218             :          * SDMA RAS supports single bit uncorrectable error detection.
     219             :          * So, increment uncorrectable error count.
     220             :          */
     221           0 :         err_data->ue_count += sec_count;
     222             : 
     223             :         /*
     224             :          * SDMA RAS does not support correctable errors.
     225             :          * Set ce count to 0.
     226             :          */
     227           0 :         err_data->ce_count = 0;
     228             : 
     229           0 :         return 0;
     230             : };
     231             : 
     232           0 : static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
     233             : {
     234             :         int i;
     235             :         uint32_t reg_offset;
     236             : 
     237             :         /* write 0 to EDC_COUNTER reg to clear sdma edc counters */
     238           0 :         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
     239           0 :                 for (i = 0; i < adev->sdma.num_instances; i++) {
     240           0 :                         reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
     241           0 :                         WREG32(reg_offset, 0);
     242           0 :                         reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
     243           0 :                         WREG32(reg_offset, 0);
     244             :                 }
     245             :         }
     246           0 : }
     247             : 
     248           0 : static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
     249             : {
     250           0 :         int i = 0;
     251             : 
     252           0 :         for (i = 0; i < adev->sdma.num_instances; i++) {
     253           0 :                 if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
     254           0 :                         dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
     255           0 :                         return;
     256             :                 }
     257             :         }
     258             : 
     259             : }
     260             : 
     261             : const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
     262             :         .query_ras_error_count = sdma_v4_4_query_ras_error_count,
     263             :         .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
     264             : };
     265             : 
     266             : struct amdgpu_sdma_ras sdma_v4_4_ras = {
     267             :         .ras_block = {
     268             :                 .hw_ops = &sdma_v4_4_ras_hw_ops,
     269             :         },
     270             : };

Generated by: LCOV version 1.14