Line data Source code
1 : /*
2 : * Copyright 2021 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 : #include "amdgpu_ras.h"
24 : #include "amdgpu.h"
25 : #include "amdgpu_mca.h"
26 :
27 : #include "umc/umc_6_7_0_offset.h"
28 : #include "umc/umc_6_7_0_sh_mask.h"
29 :
30 0 : void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
31 : uint64_t mc_status_addr,
32 : unsigned long *error_count)
33 : {
34 0 : uint64_t mc_status = RREG64_PCIE(mc_status_addr);
35 :
36 0 : if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
37 0 : REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
38 0 : *error_count += 1;
39 0 : }
40 :
41 0 : void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
42 : uint64_t mc_status_addr,
43 : unsigned long *error_count)
44 : {
45 0 : uint64_t mc_status = RREG64_PCIE(mc_status_addr);
46 :
47 0 : if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
48 : (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
49 : REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
50 : REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
51 0 : REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
52 : REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
53 0 : *error_count += 1;
54 0 : }
55 :
56 0 : void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
57 : uint64_t mc_status_addr)
58 : {
59 0 : WREG64_PCIE(mc_status_addr, 0x0ULL);
60 0 : }
61 :
62 0 : void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
63 : uint64_t mc_status_addr,
64 : void *ras_error_status)
65 : {
66 0 : struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
67 :
68 0 : amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
69 0 : amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
70 :
71 0 : amdgpu_mca_reset_error_count(adev, mc_status_addr);
72 0 : }
|