Line data Source code
1 : /*
2 : * Copyright 2016 Advanced Micro Devices, Inc.
3 : *
4 : * Permission is hereby granted, free of charge, to any person obtaining a
5 : * copy of this software and associated documentation files (the "Software"),
6 : * to deal in the Software without restriction, including without limitation
7 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 : * and/or sell copies of the Software, and to permit persons to whom the
9 : * Software is furnished to do so, subject to the following conditions:
10 : *
11 : * The above copyright notice and this permission notice shall be included in
12 : * all copies or substantial portions of the Software.
13 : *
14 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 : * OTHER DEALINGS IN THE SOFTWARE.
21 : *
22 : */
23 :
24 : #include <linux/firmware.h>
25 : #include <linux/pci.h>
26 :
27 : #include <drm/drm_cache.h>
28 :
29 : #include "amdgpu.h"
30 : #include "gmc_v9_0.h"
31 : #include "amdgpu_atomfirmware.h"
32 : #include "amdgpu_gem.h"
33 :
34 : #include "gc/gc_9_0_sh_mask.h"
35 : #include "dce/dce_12_0_offset.h"
36 : #include "dce/dce_12_0_sh_mask.h"
37 : #include "vega10_enum.h"
38 : #include "mmhub/mmhub_1_0_offset.h"
39 : #include "athub/athub_1_0_sh_mask.h"
40 : #include "athub/athub_1_0_offset.h"
41 : #include "oss/osssys_4_0_offset.h"
42 :
43 : #include "soc15.h"
44 : #include "soc15d.h"
45 : #include "soc15_common.h"
46 : #include "umc/umc_6_0_sh_mask.h"
47 :
48 : #include "gfxhub_v1_0.h"
49 : #include "mmhub_v1_0.h"
50 : #include "athub_v1_0.h"
51 : #include "gfxhub_v1_1.h"
52 : #include "mmhub_v9_4.h"
53 : #include "mmhub_v1_7.h"
54 : #include "umc_v6_1.h"
55 : #include "umc_v6_0.h"
56 : #include "umc_v6_7.h"
57 : #include "hdp_v4_0.h"
58 : #include "mca_v3_0.h"
59 :
60 : #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
61 :
62 : #include "amdgpu_ras.h"
63 : #include "amdgpu_xgmi.h"
64 :
65 : #include "amdgpu_reset.h"
66 :
67 : /* add these here since we already include dce12 headers and these are for DCN */
68 : #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
69 : #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2
70 : #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0
71 : #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10
72 : #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL
73 : #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L
74 : #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d
75 : #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
76 :
77 : #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea
78 : #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2
79 :
80 :
81 : static const char *gfxhub_client_ids[] = {
82 : "CB",
83 : "DB",
84 : "IA",
85 : "WD",
86 : "CPF",
87 : "CPC",
88 : "CPG",
89 : "RLC",
90 : "TCP",
91 : "SQC (inst)",
92 : "SQC (data)",
93 : "SQG",
94 : "PA",
95 : };
96 :
97 : static const char *mmhub_client_ids_raven[][2] = {
98 : [0][0] = "MP1",
99 : [1][0] = "MP0",
100 : [2][0] = "VCN",
101 : [3][0] = "VCNU",
102 : [4][0] = "HDP",
103 : [5][0] = "DCE",
104 : [13][0] = "UTCL2",
105 : [19][0] = "TLS",
106 : [26][0] = "OSS",
107 : [27][0] = "SDMA0",
108 : [0][1] = "MP1",
109 : [1][1] = "MP0",
110 : [2][1] = "VCN",
111 : [3][1] = "VCNU",
112 : [4][1] = "HDP",
113 : [5][1] = "XDP",
114 : [6][1] = "DBGU0",
115 : [7][1] = "DCE",
116 : [8][1] = "DCEDWB0",
117 : [9][1] = "DCEDWB1",
118 : [26][1] = "OSS",
119 : [27][1] = "SDMA0",
120 : };
121 :
122 : static const char *mmhub_client_ids_renoir[][2] = {
123 : [0][0] = "MP1",
124 : [1][0] = "MP0",
125 : [2][0] = "HDP",
126 : [4][0] = "DCEDMC",
127 : [5][0] = "DCEVGA",
128 : [13][0] = "UTCL2",
129 : [19][0] = "TLS",
130 : [26][0] = "OSS",
131 : [27][0] = "SDMA0",
132 : [28][0] = "VCN",
133 : [29][0] = "VCNU",
134 : [30][0] = "JPEG",
135 : [0][1] = "MP1",
136 : [1][1] = "MP0",
137 : [2][1] = "HDP",
138 : [3][1] = "XDP",
139 : [6][1] = "DBGU0",
140 : [7][1] = "DCEDMC",
141 : [8][1] = "DCEVGA",
142 : [9][1] = "DCEDWB",
143 : [26][1] = "OSS",
144 : [27][1] = "SDMA0",
145 : [28][1] = "VCN",
146 : [29][1] = "VCNU",
147 : [30][1] = "JPEG",
148 : };
149 :
150 : static const char *mmhub_client_ids_vega10[][2] = {
151 : [0][0] = "MP0",
152 : [1][0] = "UVD",
153 : [2][0] = "UVDU",
154 : [3][0] = "HDP",
155 : [13][0] = "UTCL2",
156 : [14][0] = "OSS",
157 : [15][0] = "SDMA1",
158 : [32+0][0] = "VCE0",
159 : [32+1][0] = "VCE0U",
160 : [32+2][0] = "XDMA",
161 : [32+3][0] = "DCE",
162 : [32+4][0] = "MP1",
163 : [32+14][0] = "SDMA0",
164 : [0][1] = "MP0",
165 : [1][1] = "UVD",
166 : [2][1] = "UVDU",
167 : [3][1] = "DBGU0",
168 : [4][1] = "HDP",
169 : [5][1] = "XDP",
170 : [14][1] = "OSS",
171 : [15][1] = "SDMA0",
172 : [32+0][1] = "VCE0",
173 : [32+1][1] = "VCE0U",
174 : [32+2][1] = "XDMA",
175 : [32+3][1] = "DCE",
176 : [32+4][1] = "DCEDWB",
177 : [32+5][1] = "MP1",
178 : [32+6][1] = "DBGU1",
179 : [32+14][1] = "SDMA1",
180 : };
181 :
182 : static const char *mmhub_client_ids_vega12[][2] = {
183 : [0][0] = "MP0",
184 : [1][0] = "VCE0",
185 : [2][0] = "VCE0U",
186 : [3][0] = "HDP",
187 : [13][0] = "UTCL2",
188 : [14][0] = "OSS",
189 : [15][0] = "SDMA1",
190 : [32+0][0] = "DCE",
191 : [32+1][0] = "XDMA",
192 : [32+2][0] = "UVD",
193 : [32+3][0] = "UVDU",
194 : [32+4][0] = "MP1",
195 : [32+15][0] = "SDMA0",
196 : [0][1] = "MP0",
197 : [1][1] = "VCE0",
198 : [2][1] = "VCE0U",
199 : [3][1] = "DBGU0",
200 : [4][1] = "HDP",
201 : [5][1] = "XDP",
202 : [14][1] = "OSS",
203 : [15][1] = "SDMA0",
204 : [32+0][1] = "DCE",
205 : [32+1][1] = "DCEDWB",
206 : [32+2][1] = "XDMA",
207 : [32+3][1] = "UVD",
208 : [32+4][1] = "UVDU",
209 : [32+5][1] = "MP1",
210 : [32+6][1] = "DBGU1",
211 : [32+15][1] = "SDMA1",
212 : };
213 :
214 : static const char *mmhub_client_ids_vega20[][2] = {
215 : [0][0] = "XDMA",
216 : [1][0] = "DCE",
217 : [2][0] = "VCE0",
218 : [3][0] = "VCE0U",
219 : [4][0] = "UVD",
220 : [5][0] = "UVD1U",
221 : [13][0] = "OSS",
222 : [14][0] = "HDP",
223 : [15][0] = "SDMA0",
224 : [32+0][0] = "UVD",
225 : [32+1][0] = "UVDU",
226 : [32+2][0] = "MP1",
227 : [32+3][0] = "MP0",
228 : [32+12][0] = "UTCL2",
229 : [32+14][0] = "SDMA1",
230 : [0][1] = "XDMA",
231 : [1][1] = "DCE",
232 : [2][1] = "DCEDWB",
233 : [3][1] = "VCE0",
234 : [4][1] = "VCE0U",
235 : [5][1] = "UVD1",
236 : [6][1] = "UVD1U",
237 : [7][1] = "DBGU0",
238 : [8][1] = "XDP",
239 : [13][1] = "OSS",
240 : [14][1] = "HDP",
241 : [15][1] = "SDMA0",
242 : [32+0][1] = "UVD",
243 : [32+1][1] = "UVDU",
244 : [32+2][1] = "DBGU1",
245 : [32+3][1] = "MP1",
246 : [32+4][1] = "MP0",
247 : [32+14][1] = "SDMA1",
248 : };
249 :
250 : static const char *mmhub_client_ids_arcturus[][2] = {
251 : [0][0] = "DBGU1",
252 : [1][0] = "XDP",
253 : [2][0] = "MP1",
254 : [14][0] = "HDP",
255 : [171][0] = "JPEG",
256 : [172][0] = "VCN",
257 : [173][0] = "VCNU",
258 : [203][0] = "JPEG1",
259 : [204][0] = "VCN1",
260 : [205][0] = "VCN1U",
261 : [256][0] = "SDMA0",
262 : [257][0] = "SDMA1",
263 : [258][0] = "SDMA2",
264 : [259][0] = "SDMA3",
265 : [260][0] = "SDMA4",
266 : [261][0] = "SDMA5",
267 : [262][0] = "SDMA6",
268 : [263][0] = "SDMA7",
269 : [384][0] = "OSS",
270 : [0][1] = "DBGU1",
271 : [1][1] = "XDP",
272 : [2][1] = "MP1",
273 : [14][1] = "HDP",
274 : [171][1] = "JPEG",
275 : [172][1] = "VCN",
276 : [173][1] = "VCNU",
277 : [203][1] = "JPEG1",
278 : [204][1] = "VCN1",
279 : [205][1] = "VCN1U",
280 : [256][1] = "SDMA0",
281 : [257][1] = "SDMA1",
282 : [258][1] = "SDMA2",
283 : [259][1] = "SDMA3",
284 : [260][1] = "SDMA4",
285 : [261][1] = "SDMA5",
286 : [262][1] = "SDMA6",
287 : [263][1] = "SDMA7",
288 : [384][1] = "OSS",
289 : };
290 :
291 : static const char *mmhub_client_ids_aldebaran[][2] = {
292 : [2][0] = "MP1",
293 : [3][0] = "MP0",
294 : [32+1][0] = "DBGU_IO0",
295 : [32+2][0] = "DBGU_IO2",
296 : [32+4][0] = "MPIO",
297 : [96+11][0] = "JPEG0",
298 : [96+12][0] = "VCN0",
299 : [96+13][0] = "VCNU0",
300 : [128+11][0] = "JPEG1",
301 : [128+12][0] = "VCN1",
302 : [128+13][0] = "VCNU1",
303 : [160+1][0] = "XDP",
304 : [160+14][0] = "HDP",
305 : [256+0][0] = "SDMA0",
306 : [256+1][0] = "SDMA1",
307 : [256+2][0] = "SDMA2",
308 : [256+3][0] = "SDMA3",
309 : [256+4][0] = "SDMA4",
310 : [384+0][0] = "OSS",
311 : [2][1] = "MP1",
312 : [3][1] = "MP0",
313 : [32+1][1] = "DBGU_IO0",
314 : [32+2][1] = "DBGU_IO2",
315 : [32+4][1] = "MPIO",
316 : [96+11][1] = "JPEG0",
317 : [96+12][1] = "VCN0",
318 : [96+13][1] = "VCNU0",
319 : [128+11][1] = "JPEG1",
320 : [128+12][1] = "VCN1",
321 : [128+13][1] = "VCNU1",
322 : [160+1][1] = "XDP",
323 : [160+14][1] = "HDP",
324 : [256+0][1] = "SDMA0",
325 : [256+1][1] = "SDMA1",
326 : [256+2][1] = "SDMA2",
327 : [256+3][1] = "SDMA3",
328 : [256+4][1] = "SDMA4",
329 : [384+0][1] = "OSS",
330 : };
331 :
332 : static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
333 : {
334 : SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
335 : SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
336 : };
337 :
338 : static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
339 : {
340 : SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
341 : SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
342 : };
343 :
344 : static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
345 : (0x000143c0 + 0x00000000),
346 : (0x000143c0 + 0x00000800),
347 : (0x000143c0 + 0x00001000),
348 : (0x000143c0 + 0x00001800),
349 : (0x000543c0 + 0x00000000),
350 : (0x000543c0 + 0x00000800),
351 : (0x000543c0 + 0x00001000),
352 : (0x000543c0 + 0x00001800),
353 : (0x000943c0 + 0x00000000),
354 : (0x000943c0 + 0x00000800),
355 : (0x000943c0 + 0x00001000),
356 : (0x000943c0 + 0x00001800),
357 : (0x000d43c0 + 0x00000000),
358 : (0x000d43c0 + 0x00000800),
359 : (0x000d43c0 + 0x00001000),
360 : (0x000d43c0 + 0x00001800),
361 : (0x001143c0 + 0x00000000),
362 : (0x001143c0 + 0x00000800),
363 : (0x001143c0 + 0x00001000),
364 : (0x001143c0 + 0x00001800),
365 : (0x001543c0 + 0x00000000),
366 : (0x001543c0 + 0x00000800),
367 : (0x001543c0 + 0x00001000),
368 : (0x001543c0 + 0x00001800),
369 : (0x001943c0 + 0x00000000),
370 : (0x001943c0 + 0x00000800),
371 : (0x001943c0 + 0x00001000),
372 : (0x001943c0 + 0x00001800),
373 : (0x001d43c0 + 0x00000000),
374 : (0x001d43c0 + 0x00000800),
375 : (0x001d43c0 + 0x00001000),
376 : (0x001d43c0 + 0x00001800),
377 : };
378 :
379 : static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
380 : (0x000143e0 + 0x00000000),
381 : (0x000143e0 + 0x00000800),
382 : (0x000143e0 + 0x00001000),
383 : (0x000143e0 + 0x00001800),
384 : (0x000543e0 + 0x00000000),
385 : (0x000543e0 + 0x00000800),
386 : (0x000543e0 + 0x00001000),
387 : (0x000543e0 + 0x00001800),
388 : (0x000943e0 + 0x00000000),
389 : (0x000943e0 + 0x00000800),
390 : (0x000943e0 + 0x00001000),
391 : (0x000943e0 + 0x00001800),
392 : (0x000d43e0 + 0x00000000),
393 : (0x000d43e0 + 0x00000800),
394 : (0x000d43e0 + 0x00001000),
395 : (0x000d43e0 + 0x00001800),
396 : (0x001143e0 + 0x00000000),
397 : (0x001143e0 + 0x00000800),
398 : (0x001143e0 + 0x00001000),
399 : (0x001143e0 + 0x00001800),
400 : (0x001543e0 + 0x00000000),
401 : (0x001543e0 + 0x00000800),
402 : (0x001543e0 + 0x00001000),
403 : (0x001543e0 + 0x00001800),
404 : (0x001943e0 + 0x00000000),
405 : (0x001943e0 + 0x00000800),
406 : (0x001943e0 + 0x00001000),
407 : (0x001943e0 + 0x00001800),
408 : (0x001d43e0 + 0x00000000),
409 : (0x001d43e0 + 0x00000800),
410 : (0x001d43e0 + 0x00001000),
411 : (0x001d43e0 + 0x00001800),
412 : };
413 :
414 0 : static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
415 : struct amdgpu_irq_src *src,
416 : unsigned type,
417 : enum amdgpu_interrupt_state state)
418 : {
419 : u32 bits, i, tmp, reg;
420 :
421 : /* Devices newer then VEGA10/12 shall have these programming
422 : sequences performed by PSP BL */
423 0 : if (adev->asic_type >= CHIP_VEGA20)
424 : return 0;
425 :
426 0 : bits = 0x7f;
427 :
428 0 : switch (state) {
429 : case AMDGPU_IRQ_STATE_DISABLE:
430 0 : for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
431 0 : reg = ecc_umc_mcumc_ctrl_addrs[i];
432 0 : tmp = RREG32(reg);
433 0 : tmp &= ~bits;
434 0 : WREG32(reg, tmp);
435 : }
436 0 : for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
437 0 : reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
438 0 : tmp = RREG32(reg);
439 0 : tmp &= ~bits;
440 0 : WREG32(reg, tmp);
441 : }
442 : break;
443 : case AMDGPU_IRQ_STATE_ENABLE:
444 0 : for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
445 0 : reg = ecc_umc_mcumc_ctrl_addrs[i];
446 0 : tmp = RREG32(reg);
447 0 : tmp |= bits;
448 0 : WREG32(reg, tmp);
449 : }
450 0 : for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
451 0 : reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
452 0 : tmp = RREG32(reg);
453 0 : tmp |= bits;
454 0 : WREG32(reg, tmp);
455 : }
456 : break;
457 : default:
458 : break;
459 : }
460 :
461 : return 0;
462 : }
463 :
464 0 : static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
465 : struct amdgpu_irq_src *src,
466 : unsigned type,
467 : enum amdgpu_interrupt_state state)
468 : {
469 : struct amdgpu_vmhub *hub;
470 : u32 tmp, reg, bits, i, j;
471 :
472 0 : bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
473 : VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
474 : VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
475 : VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
476 : VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
477 : VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
478 : VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
479 :
480 0 : switch (state) {
481 : case AMDGPU_IRQ_STATE_DISABLE:
482 0 : for (j = 0; j < adev->num_vmhubs; j++) {
483 : hub = &adev->vmhub[j];
484 0 : for (i = 0; i < 16; i++) {
485 0 : reg = hub->vm_context0_cntl + i;
486 :
487 0 : if (j == AMDGPU_GFXHUB_0)
488 0 : tmp = RREG32_SOC15_IP(GC, reg);
489 : else
490 0 : tmp = RREG32_SOC15_IP(MMHUB, reg);
491 :
492 0 : tmp &= ~bits;
493 :
494 0 : if (j == AMDGPU_GFXHUB_0)
495 0 : WREG32_SOC15_IP(GC, reg, tmp);
496 : else
497 0 : WREG32_SOC15_IP(MMHUB, reg, tmp);
498 : }
499 : }
500 : break;
501 : case AMDGPU_IRQ_STATE_ENABLE:
502 0 : for (j = 0; j < adev->num_vmhubs; j++) {
503 : hub = &adev->vmhub[j];
504 0 : for (i = 0; i < 16; i++) {
505 0 : reg = hub->vm_context0_cntl + i;
506 :
507 0 : if (j == AMDGPU_GFXHUB_0)
508 0 : tmp = RREG32_SOC15_IP(GC, reg);
509 : else
510 0 : tmp = RREG32_SOC15_IP(MMHUB, reg);
511 :
512 0 : tmp |= bits;
513 :
514 0 : if (j == AMDGPU_GFXHUB_0)
515 0 : WREG32_SOC15_IP(GC, reg, tmp);
516 : else
517 0 : WREG32_SOC15_IP(MMHUB, reg, tmp);
518 : }
519 : }
520 : break;
521 : default:
522 : break;
523 : }
524 :
525 0 : return 0;
526 : }
527 :
528 0 : static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
529 : struct amdgpu_irq_src *source,
530 : struct amdgpu_iv_entry *entry)
531 : {
532 0 : bool retry_fault = !!(entry->src_data[1] & 0x80);
533 0 : bool write_fault = !!(entry->src_data[1] & 0x20);
534 0 : uint32_t status = 0, cid = 0, rw = 0;
535 : struct amdgpu_task_info task_info;
536 : struct amdgpu_vmhub *hub;
537 : const char *mmhub_cid;
538 : const char *hub_name;
539 : u64 addr;
540 :
541 0 : addr = (u64)entry->src_data[0] << 12;
542 0 : addr |= ((u64)entry->src_data[1] & 0xf) << 44;
543 :
544 0 : if (retry_fault) {
545 : /* Returning 1 here also prevents sending the IV to the KFD */
546 :
547 : /* Process it onyl if it's the first fault for this address */
548 0 : if (entry->ih != &adev->irq.ih_soft &&
549 0 : amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
550 : entry->timestamp))
551 : return 1;
552 :
553 : /* Delegate it to a different ring if the hardware hasn't
554 : * already done it.
555 : */
556 0 : if (entry->ih == &adev->irq.ih) {
557 0 : amdgpu_irq_delegate(adev, entry, 8);
558 0 : return 1;
559 : }
560 :
561 : /* Try to handle the recoverable page faults by filling page
562 : * tables
563 : */
564 0 : if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
565 : return 1;
566 : }
567 :
568 0 : if (!printk_ratelimit())
569 : return 0;
570 :
571 0 : if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
572 0 : hub_name = "mmhub0";
573 0 : hub = &adev->vmhub[AMDGPU_MMHUB_0];
574 0 : } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
575 0 : hub_name = "mmhub1";
576 0 : hub = &adev->vmhub[AMDGPU_MMHUB_1];
577 : } else {
578 0 : hub_name = "gfxhub0";
579 0 : hub = &adev->vmhub[AMDGPU_GFXHUB_0];
580 : }
581 :
582 0 : memset(&task_info, 0, sizeof(struct amdgpu_task_info));
583 0 : amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
584 :
585 0 : dev_err(adev->dev,
586 : "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
587 : "pasid:%u, for process %s pid %d thread %s pid %d)\n",
588 : hub_name, retry_fault ? "retry" : "no-retry",
589 : entry->src_id, entry->ring_id, entry->vmid,
590 : entry->pasid, task_info.process_name, task_info.tgid,
591 : task_info.task_name, task_info.pid);
592 0 : dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n",
593 : addr, entry->client_id,
594 : soc15_ih_clientid_name[entry->client_id]);
595 :
596 0 : if (amdgpu_sriov_vf(adev))
597 : return 0;
598 :
599 : /*
600 : * Issue a dummy read to wait for the status register to
601 : * be updated to avoid reading an incorrect value due to
602 : * the new fast GRBM interface.
603 : */
604 0 : if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
605 0 : (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
606 0 : RREG32(hub->vm_l2_pro_fault_status);
607 :
608 0 : status = RREG32(hub->vm_l2_pro_fault_status);
609 0 : cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
610 0 : rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
611 0 : WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
612 :
613 :
614 0 : dev_err(adev->dev,
615 : "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
616 : status);
617 0 : if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
618 0 : dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
619 : cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
620 : gfxhub_client_ids[cid],
621 : cid);
622 : } else {
623 0 : switch (adev->ip_versions[MMHUB_HWIP][0]) {
624 : case IP_VERSION(9, 0, 0):
625 0 : mmhub_cid = mmhub_client_ids_vega10[cid][rw];
626 0 : break;
627 : case IP_VERSION(9, 3, 0):
628 0 : mmhub_cid = mmhub_client_ids_vega12[cid][rw];
629 0 : break;
630 : case IP_VERSION(9, 4, 0):
631 0 : mmhub_cid = mmhub_client_ids_vega20[cid][rw];
632 0 : break;
633 : case IP_VERSION(9, 4, 1):
634 0 : mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
635 0 : break;
636 : case IP_VERSION(9, 1, 0):
637 : case IP_VERSION(9, 2, 0):
638 0 : mmhub_cid = mmhub_client_ids_raven[cid][rw];
639 0 : break;
640 : case IP_VERSION(1, 5, 0):
641 : case IP_VERSION(2, 4, 0):
642 0 : mmhub_cid = mmhub_client_ids_renoir[cid][rw];
643 0 : break;
644 : case IP_VERSION(9, 4, 2):
645 0 : mmhub_cid = mmhub_client_ids_aldebaran[cid][rw];
646 0 : break;
647 : default:
648 : mmhub_cid = NULL;
649 : break;
650 : }
651 0 : dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
652 : mmhub_cid ? mmhub_cid : "unknown", cid);
653 : }
654 0 : dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
655 : REG_GET_FIELD(status,
656 : VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
657 0 : dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
658 : REG_GET_FIELD(status,
659 : VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
660 0 : dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
661 : REG_GET_FIELD(status,
662 : VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
663 0 : dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
664 : REG_GET_FIELD(status,
665 : VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
666 0 : dev_err(adev->dev, "\t RW: 0x%x\n", rw);
667 0 : return 0;
668 : }
669 :
670 : static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
671 : .set = gmc_v9_0_vm_fault_interrupt_state,
672 : .process = gmc_v9_0_process_interrupt,
673 : };
674 :
675 :
676 : static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
677 : .set = gmc_v9_0_ecc_interrupt_state,
678 : .process = amdgpu_umc_process_ecc_irq,
679 : };
680 :
681 : static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
682 : {
683 0 : adev->gmc.vm_fault.num_types = 1;
684 0 : adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
685 :
686 0 : if (!amdgpu_sriov_vf(adev) &&
687 0 : !adev->gmc.xgmi.connected_to_cpu) {
688 0 : adev->gmc.ecc_irq.num_types = 1;
689 0 : adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
690 : }
691 : }
692 :
693 : static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
694 : uint32_t flush_type)
695 : {
696 0 : u32 req = 0;
697 :
698 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
699 : PER_VMID_INVALIDATE_REQ, 1 << vmid);
700 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
701 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
702 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
703 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
704 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
705 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
706 0 : req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
707 : CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
708 :
709 : return req;
710 : }
711 :
712 : /**
713 : * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
714 : *
715 : * @adev: amdgpu_device pointer
716 : * @vmhub: vmhub type
717 : *
718 : */
719 : static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
720 : uint32_t vmhub)
721 : {
722 0 : if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
723 : return false;
724 :
725 0 : return ((vmhub == AMDGPU_MMHUB_0 ||
726 0 : vmhub == AMDGPU_MMHUB_1) &&
727 0 : (!amdgpu_sriov_vf(adev)) &&
728 0 : (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
729 : (adev->apu_flags & AMD_APU_IS_PICASSO))));
730 : }
731 :
732 : static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
733 : uint8_t vmid, uint16_t *p_pasid)
734 : {
735 : uint32_t value;
736 :
737 0 : value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
738 : + vmid);
739 0 : *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
740 :
741 0 : return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
742 : }
743 :
744 : /*
745 : * GART
746 : * VMID 0 is the physical GPU addresses as used by the kernel.
747 : * VMIDs 1-15 are used for userspace clients and are handled
748 : * by the amdgpu vm/hsa code.
749 : */
750 :
751 : /**
752 : * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
753 : *
754 : * @adev: amdgpu_device pointer
755 : * @vmid: vm instance to flush
756 : * @vmhub: which hub to flush
757 : * @flush_type: the flush type
758 : *
759 : * Flush the TLB for the requested page table using certain type.
760 : */
761 0 : static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
762 : uint32_t vmhub, uint32_t flush_type)
763 : {
764 0 : bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
765 0 : const unsigned eng = 17;
766 : u32 j, inv_req, inv_req2, tmp;
767 : struct amdgpu_vmhub *hub;
768 :
769 0 : BUG_ON(vmhub >= adev->num_vmhubs);
770 :
771 0 : hub = &adev->vmhub[vmhub];
772 0 : if (adev->gmc.xgmi.num_physical_nodes &&
773 : adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) {
774 : /* Vega20+XGMI caches PTEs in TC and TLB. Add a
775 : * heavy-weight TLB flush (type 2), which flushes
776 : * both. Due to a race condition with concurrent
777 : * memory accesses using the same TLB cache line, we
778 : * still need a second TLB flush after this.
779 : */
780 0 : inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
781 0 : inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
782 : } else {
783 0 : inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
784 0 : inv_req2 = 0;
785 : }
786 :
787 : /* This is necessary for a HW workaround under SRIOV as well
788 : * as GFXOFF under bare metal
789 : */
790 0 : if (adev->gfx.kiq.ring.sched.ready &&
791 0 : (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
792 0 : down_read_trylock(&adev->reset_domain->sem)) {
793 0 : uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
794 0 : uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
795 :
796 0 : amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
797 0 : 1 << vmid);
798 0 : up_read(&adev->reset_domain->sem);
799 0 : return;
800 : }
801 :
802 0 : spin_lock(&adev->gmc.invalidate_lock);
803 :
804 : /*
805 : * It may lose gpuvm invalidate acknowldege state across power-gating
806 : * off cycle, add semaphore acquire before invalidation and semaphore
807 : * release after invalidation to avoid entering power gated state
808 : * to WA the Issue
809 : */
810 :
811 : /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
812 0 : if (use_semaphore) {
813 0 : for (j = 0; j < adev->usec_timeout; j++) {
814 : /* a read return value of 1 means semaphore acquire */
815 0 : if (vmhub == AMDGPU_GFXHUB_0)
816 0 : tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
817 : else
818 0 : tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng);
819 :
820 0 : if (tmp & 0x1)
821 : break;
822 0 : udelay(1);
823 : }
824 :
825 0 : if (j >= adev->usec_timeout)
826 0 : DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
827 : }
828 :
829 : do {
830 0 : if (vmhub == AMDGPU_GFXHUB_0)
831 0 : WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
832 : else
833 0 : WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
834 :
835 : /*
836 : * Issue a dummy read to wait for the ACK register to
837 : * be cleared to avoid a false ACK due to the new fast
838 : * GRBM interface.
839 : */
840 0 : if ((vmhub == AMDGPU_GFXHUB_0) &&
841 0 : (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2)))
842 0 : RREG32_NO_KIQ(hub->vm_inv_eng0_req +
843 : hub->eng_distance * eng);
844 :
845 0 : for (j = 0; j < adev->usec_timeout; j++) {
846 0 : if (vmhub == AMDGPU_GFXHUB_0)
847 0 : tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
848 : else
849 0 : tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng);
850 :
851 0 : if (tmp & (1 << vmid))
852 : break;
853 0 : udelay(1);
854 : }
855 :
856 0 : inv_req = inv_req2;
857 0 : inv_req2 = 0;
858 0 : } while (inv_req);
859 :
860 : /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
861 0 : if (use_semaphore) {
862 : /*
863 : * add semaphore release after invalidation,
864 : * write with 0 means semaphore release
865 : */
866 0 : if (vmhub == AMDGPU_GFXHUB_0)
867 0 : WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
868 : else
869 0 : WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0);
870 : }
871 :
872 0 : spin_unlock(&adev->gmc.invalidate_lock);
873 :
874 0 : if (j < adev->usec_timeout)
875 : return;
876 :
877 0 : DRM_ERROR("Timeout waiting for VM flush ACK!\n");
878 : }
879 :
880 : /**
881 : * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
882 : *
883 : * @adev: amdgpu_device pointer
884 : * @pasid: pasid to be flush
885 : * @flush_type: the flush type
886 : * @all_hub: flush all hubs
887 : *
888 : * Flush the TLB for the requested pasid.
889 : */
890 0 : static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
891 : uint16_t pasid, uint32_t flush_type,
892 : bool all_hub)
893 : {
894 : int vmid, i;
895 : signed long r;
896 : uint32_t seq;
897 : uint16_t queried_pasid;
898 : bool ret;
899 0 : u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
900 0 : struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
901 0 : struct amdgpu_kiq *kiq = &adev->gfx.kiq;
902 :
903 0 : if (amdgpu_in_reset(adev))
904 : return -EIO;
905 :
906 0 : if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) {
907 : /* Vega20+XGMI caches PTEs in TC and TLB. Add a
908 : * heavy-weight TLB flush (type 2), which flushes
909 : * both. Due to a race condition with concurrent
910 : * memory accesses using the same TLB cache line, we
911 : * still need a second TLB flush after this.
912 : */
913 0 : bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
914 0 : adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0));
915 : /* 2 dwords flush + 8 dwords fence */
916 0 : unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
917 :
918 0 : if (vega20_xgmi_wa)
919 0 : ndw += kiq->pmf->invalidate_tlbs_size;
920 :
921 0 : spin_lock(&adev->gfx.kiq.ring_lock);
922 : /* 2 dwords flush + 8 dwords fence */
923 0 : amdgpu_ring_alloc(ring, ndw);
924 0 : if (vega20_xgmi_wa)
925 0 : kiq->pmf->kiq_invalidate_tlbs(ring,
926 : pasid, 2, all_hub);
927 0 : kiq->pmf->kiq_invalidate_tlbs(ring,
928 : pasid, flush_type, all_hub);
929 0 : r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
930 0 : if (r) {
931 0 : amdgpu_ring_undo(ring);
932 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
933 0 : up_read(&adev->reset_domain->sem);
934 0 : return -ETIME;
935 : }
936 :
937 0 : amdgpu_ring_commit(ring);
938 0 : spin_unlock(&adev->gfx.kiq.ring_lock);
939 0 : r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
940 0 : if (r < 1) {
941 0 : dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
942 0 : up_read(&adev->reset_domain->sem);
943 0 : return -ETIME;
944 : }
945 0 : up_read(&adev->reset_domain->sem);
946 0 : return 0;
947 : }
948 :
949 0 : for (vmid = 1; vmid < 16; vmid++) {
950 :
951 0 : ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
952 : &queried_pasid);
953 0 : if (ret && queried_pasid == pasid) {
954 0 : if (all_hub) {
955 0 : for (i = 0; i < adev->num_vmhubs; i++)
956 0 : gmc_v9_0_flush_gpu_tlb(adev, vmid,
957 : i, flush_type);
958 : } else {
959 0 : gmc_v9_0_flush_gpu_tlb(adev, vmid,
960 : AMDGPU_GFXHUB_0, flush_type);
961 : }
962 : break;
963 : }
964 : }
965 :
966 : return 0;
967 :
968 : }
969 :
970 0 : static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
971 : unsigned vmid, uint64_t pd_addr)
972 : {
973 0 : bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
974 0 : struct amdgpu_device *adev = ring->adev;
975 0 : struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
976 0 : uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
977 0 : unsigned eng = ring->vm_inv_eng;
978 :
979 : /*
980 : * It may lose gpuvm invalidate acknowldege state across power-gating
981 : * off cycle, add semaphore acquire before invalidation and semaphore
982 : * release after invalidation to avoid entering power gated state
983 : * to WA the Issue
984 : */
985 :
986 : /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
987 0 : if (use_semaphore)
988 : /* a read return value of 1 means semaphore acuqire */
989 0 : amdgpu_ring_emit_reg_wait(ring,
990 : hub->vm_inv_eng0_sem +
991 : hub->eng_distance * eng, 0x1, 0x1);
992 :
993 0 : amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
994 : (hub->ctx_addr_distance * vmid),
995 : lower_32_bits(pd_addr));
996 :
997 0 : amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
998 : (hub->ctx_addr_distance * vmid),
999 : upper_32_bits(pd_addr));
1000 :
1001 0 : amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
1002 : hub->eng_distance * eng,
1003 : hub->vm_inv_eng0_ack +
1004 : hub->eng_distance * eng,
1005 : req, 1 << vmid);
1006 :
1007 : /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
1008 0 : if (use_semaphore)
1009 : /*
1010 : * add semaphore release after invalidation,
1011 : * write with 0 means semaphore release
1012 : */
1013 0 : amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
1014 : hub->eng_distance * eng, 0);
1015 :
1016 0 : return pd_addr;
1017 : }
1018 :
1019 0 : static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
1020 : unsigned pasid)
1021 : {
1022 0 : struct amdgpu_device *adev = ring->adev;
1023 : uint32_t reg;
1024 :
1025 : /* Do nothing because there's no lut register for mmhub1. */
1026 0 : if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
1027 : return;
1028 :
1029 0 : if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
1030 0 : reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
1031 : else
1032 0 : reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
1033 :
1034 0 : amdgpu_ring_emit_wreg(ring, reg, pasid);
1035 : }
1036 :
1037 : /*
1038 : * PTE format on VEGA 10:
1039 : * 63:59 reserved
1040 : * 58:57 mtype
1041 : * 56 F
1042 : * 55 L
1043 : * 54 P
1044 : * 53 SW
1045 : * 52 T
1046 : * 50:48 reserved
1047 : * 47:12 4k physical page base address
1048 : * 11:7 fragment
1049 : * 6 write
1050 : * 5 read
1051 : * 4 exe
1052 : * 3 Z
1053 : * 2 snooped
1054 : * 1 system
1055 : * 0 valid
1056 : *
1057 : * PDE format on VEGA 10:
1058 : * 63:59 block fragment size
1059 : * 58:55 reserved
1060 : * 54 P
1061 : * 53:48 reserved
1062 : * 47:6 physical base address of PD or PTE
1063 : * 5:3 reserved
1064 : * 2 C
1065 : * 1 system
1066 : * 0 valid
1067 : */
1068 :
1069 0 : static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
1070 :
1071 : {
1072 0 : switch (flags) {
1073 : case AMDGPU_VM_MTYPE_DEFAULT:
1074 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1075 : case AMDGPU_VM_MTYPE_NC:
1076 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1077 : case AMDGPU_VM_MTYPE_WC:
1078 0 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
1079 : case AMDGPU_VM_MTYPE_RW:
1080 0 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
1081 : case AMDGPU_VM_MTYPE_CC:
1082 0 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
1083 : case AMDGPU_VM_MTYPE_UC:
1084 0 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
1085 : default:
1086 : return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
1087 : }
1088 : }
1089 :
1090 0 : static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1091 : uint64_t *addr, uint64_t *flags)
1092 : {
1093 0 : if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1094 0 : *addr = amdgpu_gmc_vram_mc2pa(adev, *addr);
1095 0 : BUG_ON(*addr & 0xFFFF00000000003FULL);
1096 :
1097 0 : if (!adev->gmc.translate_further)
1098 : return;
1099 :
1100 0 : if (level == AMDGPU_VM_PDB1) {
1101 : /* Set the block fragment size */
1102 0 : if (!(*flags & AMDGPU_PDE_PTE))
1103 0 : *flags |= AMDGPU_PDE_BFS(0x9);
1104 :
1105 0 : } else if (level == AMDGPU_VM_PDB0) {
1106 0 : if (*flags & AMDGPU_PDE_PTE)
1107 0 : *flags &= ~AMDGPU_PDE_PTE;
1108 : else
1109 0 : *flags |= AMDGPU_PTE_TF;
1110 : }
1111 : }
1112 :
1113 0 : static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1114 : struct amdgpu_bo_va_mapping *mapping,
1115 : uint64_t *flags)
1116 : {
1117 0 : *flags &= ~AMDGPU_PTE_EXECUTABLE;
1118 0 : *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1119 :
1120 0 : *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1121 0 : *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1122 :
1123 0 : if (mapping->flags & AMDGPU_PTE_PRT) {
1124 0 : *flags |= AMDGPU_PTE_PRT;
1125 0 : *flags &= ~AMDGPU_PTE_VALID;
1126 : }
1127 :
1128 0 : if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1129 0 : adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
1130 0 : !(*flags & AMDGPU_PTE_SYSTEM) &&
1131 0 : mapping->bo_va->is_xgmi)
1132 0 : *flags |= AMDGPU_PTE_SNOOPED;
1133 :
1134 0 : if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
1135 0 : *flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
1136 0 : }
1137 :
1138 0 : static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1139 : {
1140 0 : u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1141 : unsigned size;
1142 :
1143 : /* TODO move to DC so GMC doesn't need to hard-code DCN registers */
1144 :
1145 0 : if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1146 : size = AMDGPU_VBIOS_VGA_ALLOCATION;
1147 : } else {
1148 : u32 viewport;
1149 :
1150 0 : switch (adev->ip_versions[DCE_HWIP][0]) {
1151 : case IP_VERSION(1, 0, 0):
1152 : case IP_VERSION(1, 0, 1):
1153 0 : viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1154 0 : size = (REG_GET_FIELD(viewport,
1155 : HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1156 : REG_GET_FIELD(viewport,
1157 : HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1158 : 4);
1159 0 : break;
1160 : case IP_VERSION(2, 1, 0):
1161 0 : viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2);
1162 0 : size = (REG_GET_FIELD(viewport,
1163 : HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1164 : REG_GET_FIELD(viewport,
1165 : HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1166 : 4);
1167 0 : break;
1168 : default:
1169 0 : viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1170 0 : size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1171 0 : REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1172 : 4);
1173 0 : break;
1174 : }
1175 : }
1176 :
1177 0 : return size;
1178 : }
1179 :
1180 : static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1181 : .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1182 : .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1183 : .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1184 : .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1185 : .map_mtype = gmc_v9_0_map_mtype,
1186 : .get_vm_pde = gmc_v9_0_get_vm_pde,
1187 : .get_vm_pte = gmc_v9_0_get_vm_pte,
1188 : .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1189 : };
1190 :
1191 : static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1192 : {
1193 0 : adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1194 : }
1195 :
1196 0 : static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1197 : {
1198 0 : switch (adev->ip_versions[UMC_HWIP][0]) {
1199 : case IP_VERSION(6, 0, 0):
1200 0 : adev->umc.funcs = &umc_v6_0_funcs;
1201 0 : break;
1202 : case IP_VERSION(6, 1, 1):
1203 0 : adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1204 0 : adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1205 0 : adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1206 0 : adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1207 0 : adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1208 0 : adev->umc.ras = &umc_v6_1_ras;
1209 0 : break;
1210 : case IP_VERSION(6, 1, 2):
1211 0 : adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1212 0 : adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1213 0 : adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1214 0 : adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1215 0 : adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1216 0 : adev->umc.ras = &umc_v6_1_ras;
1217 0 : break;
1218 : case IP_VERSION(6, 7, 0):
1219 0 : adev->umc.max_ras_err_cnt_per_query =
1220 : UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL;
1221 0 : adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM;
1222 0 : adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM;
1223 0 : adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET;
1224 0 : if (!adev->gmc.xgmi.connected_to_cpu)
1225 0 : adev->umc.ras = &umc_v6_7_ras;
1226 0 : if (1 & adev->smuio.funcs->get_die_id(adev))
1227 0 : adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0];
1228 : else
1229 0 : adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
1230 : break;
1231 : default:
1232 : break;
1233 : }
1234 :
1235 0 : if (adev->umc.ras) {
1236 0 : amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
1237 :
1238 0 : strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
1239 0 : adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
1240 0 : adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
1241 0 : adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
1242 :
1243 : /* If don't define special ras_late_init function, use default ras_late_init */
1244 0 : if (!adev->umc.ras->ras_block.ras_late_init)
1245 0 : adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
1246 :
1247 : /* If not defined special ras_cb function, use default ras_cb */
1248 0 : if (!adev->umc.ras->ras_block.ras_cb)
1249 0 : adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
1250 : }
1251 0 : }
1252 :
1253 : static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1254 : {
1255 0 : switch (adev->ip_versions[MMHUB_HWIP][0]) {
1256 : case IP_VERSION(9, 4, 1):
1257 0 : adev->mmhub.funcs = &mmhub_v9_4_funcs;
1258 : break;
1259 : case IP_VERSION(9, 4, 2):
1260 0 : adev->mmhub.funcs = &mmhub_v1_7_funcs;
1261 : break;
1262 : default:
1263 0 : adev->mmhub.funcs = &mmhub_v1_0_funcs;
1264 : break;
1265 : }
1266 : }
1267 :
1268 0 : static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
1269 : {
1270 0 : switch (adev->ip_versions[MMHUB_HWIP][0]) {
1271 : case IP_VERSION(9, 4, 0):
1272 0 : adev->mmhub.ras = &mmhub_v1_0_ras;
1273 0 : break;
1274 : case IP_VERSION(9, 4, 1):
1275 0 : adev->mmhub.ras = &mmhub_v9_4_ras;
1276 0 : break;
1277 : case IP_VERSION(9, 4, 2):
1278 0 : adev->mmhub.ras = &mmhub_v1_7_ras;
1279 0 : break;
1280 : default:
1281 : /* mmhub ras is not available */
1282 : break;
1283 : }
1284 :
1285 0 : if (adev->mmhub.ras) {
1286 0 : amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block);
1287 :
1288 0 : strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub");
1289 0 : adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
1290 0 : adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
1291 0 : adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm;
1292 : }
1293 0 : }
1294 :
1295 : static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1296 : {
1297 0 : adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1298 : }
1299 :
1300 : static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
1301 : {
1302 0 : adev->hdp.ras = &hdp_v4_0_ras;
1303 0 : amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block);
1304 0 : adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm;
1305 : }
1306 :
1307 : static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
1308 : {
1309 : /* is UMC the right IP to check for MCA? Maybe DF? */
1310 0 : switch (adev->ip_versions[UMC_HWIP][0]) {
1311 : case IP_VERSION(6, 7, 0):
1312 0 : if (!adev->gmc.xgmi.connected_to_cpu)
1313 0 : adev->mca.funcs = &mca_v3_0_funcs;
1314 : break;
1315 : default:
1316 : break;
1317 : }
1318 : }
1319 :
1320 0 : static int gmc_v9_0_early_init(void *handle)
1321 : {
1322 : int r;
1323 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1324 :
1325 : /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
1326 0 : if (adev->asic_type == CHIP_VEGA20 ||
1327 : adev->asic_type == CHIP_ARCTURUS)
1328 0 : adev->gmc.xgmi.supported = true;
1329 :
1330 0 : if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
1331 0 : adev->gmc.xgmi.supported = true;
1332 0 : adev->gmc.xgmi.connected_to_cpu =
1333 0 : adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
1334 : }
1335 :
1336 0 : gmc_v9_0_set_gmc_funcs(adev);
1337 0 : gmc_v9_0_set_irq_funcs(adev);
1338 0 : gmc_v9_0_set_umc_funcs(adev);
1339 0 : gmc_v9_0_set_mmhub_funcs(adev);
1340 0 : gmc_v9_0_set_mmhub_ras_funcs(adev);
1341 0 : gmc_v9_0_set_gfxhub_funcs(adev);
1342 0 : gmc_v9_0_set_hdp_ras_funcs(adev);
1343 0 : gmc_v9_0_set_mca_funcs(adev);
1344 :
1345 0 : adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1346 0 : adev->gmc.shared_aperture_end =
1347 : adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1348 0 : adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1349 0 : adev->gmc.private_aperture_end =
1350 : adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1351 :
1352 0 : r = amdgpu_gmc_ras_early_init(adev);
1353 0 : if (r)
1354 : return r;
1355 :
1356 0 : return 0;
1357 : }
1358 :
1359 0 : static int gmc_v9_0_late_init(void *handle)
1360 : {
1361 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1362 : int r;
1363 :
1364 0 : r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1365 0 : if (r)
1366 : return r;
1367 :
1368 : /*
1369 : * Workaround performance drop issue with VBIOS enables partial
1370 : * writes, while disables HBM ECC for vega10.
1371 : */
1372 0 : if (!amdgpu_sriov_vf(adev) &&
1373 0 : (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) {
1374 0 : if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1375 0 : if (adev->df.funcs &&
1376 0 : adev->df.funcs->enable_ecc_force_par_wr_rmw)
1377 0 : adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1378 : }
1379 : }
1380 :
1381 0 : if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
1382 0 : if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
1383 0 : adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
1384 0 : adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
1385 :
1386 0 : if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops &&
1387 0 : adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count)
1388 0 : adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev);
1389 : }
1390 :
1391 0 : r = amdgpu_gmc_ras_late_init(adev);
1392 0 : if (r)
1393 : return r;
1394 :
1395 0 : return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1396 : }
1397 :
1398 0 : static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1399 : struct amdgpu_gmc *mc)
1400 : {
1401 0 : u64 base = adev->mmhub.funcs->get_fb_location(adev);
1402 :
1403 : /* add the xgmi offset of the physical node */
1404 0 : base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1405 0 : if (adev->gmc.xgmi.connected_to_cpu) {
1406 0 : amdgpu_gmc_sysvm_location(adev, mc);
1407 : } else {
1408 0 : amdgpu_gmc_vram_location(adev, mc, base);
1409 0 : amdgpu_gmc_gart_location(adev, mc);
1410 0 : amdgpu_gmc_agp_location(adev, mc);
1411 : }
1412 : /* base offset of vram pages */
1413 0 : adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1414 :
1415 : /* XXX: add the xgmi offset of the physical node? */
1416 0 : adev->vm_manager.vram_base_offset +=
1417 0 : adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1418 0 : }
1419 :
1420 : /**
1421 : * gmc_v9_0_mc_init - initialize the memory controller driver params
1422 : *
1423 : * @adev: amdgpu_device pointer
1424 : *
1425 : * Look up the amount of vram, vram width, and decide how to place
1426 : * vram and gart within the GPU's physical address space.
1427 : * Returns 0 for success.
1428 : */
1429 0 : static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1430 : {
1431 : int r;
1432 :
1433 : /* size in MB on si */
1434 0 : adev->gmc.mc_vram_size =
1435 0 : adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1436 0 : adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1437 :
1438 0 : if (!(adev->flags & AMD_IS_APU) &&
1439 0 : !adev->gmc.xgmi.connected_to_cpu) {
1440 0 : r = amdgpu_device_resize_fb_bar(adev);
1441 0 : if (r)
1442 : return r;
1443 : }
1444 0 : adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1445 0 : adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1446 :
1447 : #ifdef CONFIG_X86_64
1448 : /*
1449 : * AMD Accelerated Processing Platform (APP) supporting GPU-HOST xgmi
1450 : * interface can use VRAM through here as it appears system reserved
1451 : * memory in host address space.
1452 : *
1453 : * For APUs, VRAM is just the stolen system memory and can be accessed
1454 : * directly.
1455 : *
1456 : * Otherwise, use the legacy Host Data Path (HDP) through PCIe BAR.
1457 : */
1458 :
1459 : /* check whether both host-gpu and gpu-gpu xgmi links exist */
1460 0 : if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) ||
1461 0 : (adev->gmc.xgmi.supported &&
1462 0 : adev->gmc.xgmi.connected_to_cpu)) {
1463 0 : adev->gmc.aper_base =
1464 0 : adev->gfxhub.funcs->get_mc_fb_offset(adev) +
1465 0 : adev->gmc.xgmi.physical_node_id *
1466 0 : adev->gmc.xgmi.node_segment_size;
1467 0 : adev->gmc.aper_size = adev->gmc.real_vram_size;
1468 : }
1469 :
1470 : #endif
1471 : /* In case the PCI BAR is larger than the actual amount of vram */
1472 0 : adev->gmc.visible_vram_size = adev->gmc.aper_size;
1473 0 : if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1474 0 : adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1475 :
1476 : /* set the gart size */
1477 0 : if (amdgpu_gart_size == -1) {
1478 0 : switch (adev->ip_versions[GC_HWIP][0]) {
1479 : case IP_VERSION(9, 0, 1): /* all engines support GPUVM */
1480 : case IP_VERSION(9, 2, 1): /* all engines support GPUVM */
1481 : case IP_VERSION(9, 4, 0):
1482 : case IP_VERSION(9, 4, 1):
1483 : case IP_VERSION(9, 4, 2):
1484 : default:
1485 0 : adev->gmc.gart_size = 512ULL << 20;
1486 0 : break;
1487 : case IP_VERSION(9, 1, 0): /* DCE SG support */
1488 : case IP_VERSION(9, 2, 2): /* DCE SG support */
1489 : case IP_VERSION(9, 3, 0):
1490 0 : adev->gmc.gart_size = 1024ULL << 20;
1491 0 : break;
1492 : }
1493 : } else {
1494 0 : adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1495 : }
1496 :
1497 0 : adev->gmc.gart_size += adev->pm.smu_prv_buffer_size;
1498 :
1499 0 : gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1500 :
1501 0 : return 0;
1502 : }
1503 :
1504 0 : static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1505 : {
1506 : int r;
1507 :
1508 0 : if (adev->gart.bo) {
1509 0 : WARN(1, "VEGA10 PCIE GART already initialized\n");
1510 0 : return 0;
1511 : }
1512 :
1513 0 : if (adev->gmc.xgmi.connected_to_cpu) {
1514 0 : adev->gmc.vmid0_page_table_depth = 1;
1515 0 : adev->gmc.vmid0_page_table_block_size = 12;
1516 : } else {
1517 0 : adev->gmc.vmid0_page_table_depth = 0;
1518 0 : adev->gmc.vmid0_page_table_block_size = 0;
1519 : }
1520 :
1521 : /* Initialize common gart structure */
1522 0 : r = amdgpu_gart_init(adev);
1523 0 : if (r)
1524 : return r;
1525 0 : adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1526 0 : adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1527 : AMDGPU_PTE_EXECUTABLE;
1528 :
1529 0 : r = amdgpu_gart_table_vram_alloc(adev);
1530 0 : if (r)
1531 : return r;
1532 :
1533 0 : if (adev->gmc.xgmi.connected_to_cpu) {
1534 0 : r = amdgpu_gmc_pdb0_alloc(adev);
1535 : }
1536 :
1537 : return r;
1538 : }
1539 :
1540 : /**
1541 : * gmc_v9_0_save_registers - saves regs
1542 : *
1543 : * @adev: amdgpu_device pointer
1544 : *
1545 : * This saves potential register values that should be
1546 : * restored upon resume
1547 : */
1548 0 : static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1549 : {
1550 0 : if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
1551 : (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1)))
1552 0 : adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1553 0 : }
1554 :
1555 0 : static int gmc_v9_0_sw_init(void *handle)
1556 : {
1557 0 : int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
1558 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1559 :
1560 0 : adev->gfxhub.funcs->init(adev);
1561 :
1562 0 : adev->mmhub.funcs->init(adev);
1563 0 : if (adev->mca.funcs)
1564 0 : adev->mca.funcs->init(adev);
1565 :
1566 0 : spin_lock_init(&adev->gmc.invalidate_lock);
1567 :
1568 0 : r = amdgpu_atomfirmware_get_vram_info(adev,
1569 : &vram_width, &vram_type, &vram_vendor);
1570 0 : if (amdgpu_sriov_vf(adev))
1571 : /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1572 : * and DF related registers is not readable, seems hardcord is the
1573 : * only way to set the correct vram_width
1574 : */
1575 0 : adev->gmc.vram_width = 2048;
1576 0 : else if (amdgpu_emu_mode != 1)
1577 0 : adev->gmc.vram_width = vram_width;
1578 :
1579 0 : if (!adev->gmc.vram_width) {
1580 : int chansize, numchan;
1581 :
1582 : /* hbm memory channel size */
1583 0 : if (adev->flags & AMD_IS_APU)
1584 : chansize = 64;
1585 : else
1586 0 : chansize = 128;
1587 0 : if (adev->df.funcs &&
1588 0 : adev->df.funcs->get_hbm_channel_number) {
1589 0 : numchan = adev->df.funcs->get_hbm_channel_number(adev);
1590 0 : adev->gmc.vram_width = numchan * chansize;
1591 : }
1592 : }
1593 :
1594 0 : adev->gmc.vram_type = vram_type;
1595 0 : adev->gmc.vram_vendor = vram_vendor;
1596 0 : switch (adev->ip_versions[GC_HWIP][0]) {
1597 : case IP_VERSION(9, 1, 0):
1598 : case IP_VERSION(9, 2, 2):
1599 0 : adev->num_vmhubs = 2;
1600 :
1601 0 : if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1602 0 : amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1603 : } else {
1604 : /* vm_size is 128TB + 512GB for legacy 3-level page support */
1605 0 : amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1606 0 : adev->gmc.translate_further =
1607 0 : adev->vm_manager.num_level > 1;
1608 : }
1609 : break;
1610 : case IP_VERSION(9, 0, 1):
1611 : case IP_VERSION(9, 2, 1):
1612 : case IP_VERSION(9, 4, 0):
1613 : case IP_VERSION(9, 3, 0):
1614 : case IP_VERSION(9, 4, 2):
1615 0 : adev->num_vmhubs = 2;
1616 :
1617 :
1618 : /*
1619 : * To fulfill 4-level page support,
1620 : * vm size is 256TB (48bit), maximum size of Vega10,
1621 : * block size 512 (9bit)
1622 : */
1623 : /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1624 0 : if (amdgpu_sriov_vf(adev))
1625 0 : amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1626 : else
1627 0 : amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1628 0 : if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
1629 0 : adev->gmc.translate_further = adev->vm_manager.num_level > 1;
1630 : break;
1631 : case IP_VERSION(9, 4, 1):
1632 0 : adev->num_vmhubs = 3;
1633 :
1634 : /* Keep the vm size same with Vega20 */
1635 0 : amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1636 0 : adev->gmc.translate_further = adev->vm_manager.num_level > 1;
1637 0 : break;
1638 : default:
1639 : break;
1640 : }
1641 :
1642 : /* This interrupt is VMC page fault.*/
1643 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1644 : &adev->gmc.vm_fault);
1645 0 : if (r)
1646 : return r;
1647 :
1648 0 : if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
1649 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1650 : &adev->gmc.vm_fault);
1651 0 : if (r)
1652 : return r;
1653 : }
1654 :
1655 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1656 : &adev->gmc.vm_fault);
1657 :
1658 0 : if (r)
1659 : return r;
1660 :
1661 0 : if (!amdgpu_sriov_vf(adev) &&
1662 0 : !adev->gmc.xgmi.connected_to_cpu) {
1663 : /* interrupt sent to DF. */
1664 0 : r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1665 : &adev->gmc.ecc_irq);
1666 0 : if (r)
1667 : return r;
1668 : }
1669 :
1670 : /* Set the internal MC address mask
1671 : * This is the max address of the GPU's
1672 : * internal address space.
1673 : */
1674 0 : adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1675 :
1676 0 : dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44;
1677 0 : r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits));
1678 0 : if (r) {
1679 0 : printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1680 0 : return r;
1681 : }
1682 0 : adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits);
1683 :
1684 0 : r = gmc_v9_0_mc_init(adev);
1685 0 : if (r)
1686 : return r;
1687 :
1688 0 : amdgpu_gmc_get_vbios_allocations(adev);
1689 :
1690 : /* Memory manager */
1691 0 : r = amdgpu_bo_init(adev);
1692 0 : if (r)
1693 : return r;
1694 :
1695 0 : r = gmc_v9_0_gart_init(adev);
1696 0 : if (r)
1697 : return r;
1698 :
1699 : /*
1700 : * number of VMs
1701 : * VMID 0 is reserved for System
1702 : * amdgpu graphics/compute will use VMIDs 1..n-1
1703 : * amdkfd will use VMIDs n..15
1704 : *
1705 : * The first KFD VMID is 8 for GPUs with graphics, 3 for
1706 : * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1707 : * for video processing.
1708 : */
1709 0 : adev->vm_manager.first_kfd_vmid =
1710 0 : (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1711 0 : adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) ? 3 : 8;
1712 :
1713 0 : amdgpu_vm_manager_init(adev);
1714 :
1715 0 : gmc_v9_0_save_registers(adev);
1716 :
1717 0 : return 0;
1718 : }
1719 :
1720 0 : static int gmc_v9_0_sw_fini(void *handle)
1721 : {
1722 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1723 :
1724 0 : amdgpu_gmc_ras_fini(adev);
1725 0 : amdgpu_gem_force_release(adev);
1726 0 : amdgpu_vm_manager_fini(adev);
1727 0 : amdgpu_gart_table_vram_free(adev);
1728 0 : amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
1729 0 : amdgpu_bo_fini(adev);
1730 :
1731 0 : return 0;
1732 : }
1733 :
1734 0 : static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1735 : {
1736 :
1737 0 : switch (adev->ip_versions[MMHUB_HWIP][0]) {
1738 : case IP_VERSION(9, 0, 0):
1739 0 : if (amdgpu_sriov_vf(adev))
1740 : break;
1741 : fallthrough;
1742 : case IP_VERSION(9, 4, 0):
1743 0 : soc15_program_register_sequence(adev,
1744 : golden_settings_mmhub_1_0_0,
1745 : ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1746 0 : soc15_program_register_sequence(adev,
1747 : golden_settings_athub_1_0_0,
1748 : ARRAY_SIZE(golden_settings_athub_1_0_0));
1749 0 : break;
1750 : case IP_VERSION(9, 1, 0):
1751 : case IP_VERSION(9, 2, 0):
1752 : /* TODO for renoir */
1753 0 : soc15_program_register_sequence(adev,
1754 : golden_settings_athub_1_0_0,
1755 : ARRAY_SIZE(golden_settings_athub_1_0_0));
1756 0 : break;
1757 : default:
1758 : break;
1759 : }
1760 0 : }
1761 :
1762 : /**
1763 : * gmc_v9_0_restore_registers - restores regs
1764 : *
1765 : * @adev: amdgpu_device pointer
1766 : *
1767 : * This restores register values, saved at suspend.
1768 : */
1769 0 : void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1770 : {
1771 0 : if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
1772 : (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) {
1773 0 : WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1774 0 : WARN_ON(adev->gmc.sdpif_register !=
1775 : RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1776 : }
1777 0 : }
1778 :
1779 : /**
1780 : * gmc_v9_0_gart_enable - gart enable
1781 : *
1782 : * @adev: amdgpu_device pointer
1783 : */
1784 0 : static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1785 : {
1786 : int r;
1787 :
1788 0 : if (adev->gmc.xgmi.connected_to_cpu)
1789 0 : amdgpu_gmc_init_pdb0(adev);
1790 :
1791 0 : if (adev->gart.bo == NULL) {
1792 0 : dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1793 0 : return -EINVAL;
1794 : }
1795 :
1796 0 : amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
1797 0 : r = adev->gfxhub.funcs->gart_enable(adev);
1798 0 : if (r)
1799 : return r;
1800 :
1801 0 : r = adev->mmhub.funcs->gart_enable(adev);
1802 0 : if (r)
1803 : return r;
1804 :
1805 0 : DRM_INFO("PCIE GART of %uM enabled.\n",
1806 : (unsigned)(adev->gmc.gart_size >> 20));
1807 0 : if (adev->gmc.pdb0_bo)
1808 0 : DRM_INFO("PDB0 located at 0x%016llX\n",
1809 : (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo));
1810 0 : DRM_INFO("PTB located at 0x%016llX\n",
1811 : (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1812 :
1813 0 : return 0;
1814 : }
1815 :
1816 0 : static int gmc_v9_0_hw_init(void *handle)
1817 : {
1818 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1819 : bool value;
1820 : int i, r;
1821 :
1822 : /* The sequence of these two function calls matters.*/
1823 0 : gmc_v9_0_init_golden_registers(adev);
1824 :
1825 0 : if (adev->mode_info.num_crtc) {
1826 : /* Lockout access through VGA aperture*/
1827 0 : WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1828 : /* disable VGA render */
1829 0 : WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1830 : }
1831 :
1832 0 : if (adev->mmhub.funcs->update_power_gating)
1833 0 : adev->mmhub.funcs->update_power_gating(adev, true);
1834 :
1835 0 : adev->hdp.funcs->init_registers(adev);
1836 :
1837 : /* After HDP is initialized, flush HDP.*/
1838 0 : adev->hdp.funcs->flush_hdp(adev, NULL);
1839 :
1840 0 : if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1841 : value = false;
1842 : else
1843 0 : value = true;
1844 :
1845 0 : if (!amdgpu_sriov_vf(adev)) {
1846 0 : adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1847 0 : adev->mmhub.funcs->set_fault_enable_default(adev, value);
1848 : }
1849 0 : for (i = 0; i < adev->num_vmhubs; ++i)
1850 0 : gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1851 :
1852 0 : if (adev->umc.funcs && adev->umc.funcs->init_registers)
1853 0 : adev->umc.funcs->init_registers(adev);
1854 :
1855 0 : r = gmc_v9_0_gart_enable(adev);
1856 0 : if (r)
1857 : return r;
1858 :
1859 0 : if (amdgpu_emu_mode == 1)
1860 0 : return amdgpu_gmc_vram_checking(adev);
1861 : else
1862 : return r;
1863 : }
1864 :
1865 : /**
1866 : * gmc_v9_0_gart_disable - gart disable
1867 : *
1868 : * @adev: amdgpu_device pointer
1869 : *
1870 : * This disables all VM page table.
1871 : */
1872 : static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1873 : {
1874 0 : adev->gfxhub.funcs->gart_disable(adev);
1875 0 : adev->mmhub.funcs->gart_disable(adev);
1876 : }
1877 :
1878 0 : static int gmc_v9_0_hw_fini(void *handle)
1879 : {
1880 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1881 :
1882 0 : gmc_v9_0_gart_disable(adev);
1883 :
1884 0 : if (amdgpu_sriov_vf(adev)) {
1885 : /* full access mode, so don't touch any GMC register */
1886 0 : DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1887 0 : return 0;
1888 : }
1889 :
1890 : /*
1891 : * Pair the operations did in gmc_v9_0_hw_init and thus maintain
1892 : * a correct cached state for GMC. Otherwise, the "gate" again
1893 : * operation on S3 resuming will fail due to wrong cached state.
1894 : */
1895 0 : if (adev->mmhub.funcs->update_power_gating)
1896 0 : adev->mmhub.funcs->update_power_gating(adev, false);
1897 :
1898 0 : amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1899 0 : amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1900 :
1901 0 : return 0;
1902 : }
1903 :
1904 0 : static int gmc_v9_0_suspend(void *handle)
1905 : {
1906 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1907 :
1908 0 : return gmc_v9_0_hw_fini(adev);
1909 : }
1910 :
1911 0 : static int gmc_v9_0_resume(void *handle)
1912 : {
1913 : int r;
1914 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1915 :
1916 0 : r = gmc_v9_0_hw_init(adev);
1917 0 : if (r)
1918 : return r;
1919 :
1920 0 : amdgpu_vmid_reset_all(adev);
1921 :
1922 0 : return 0;
1923 : }
1924 :
1925 0 : static bool gmc_v9_0_is_idle(void *handle)
1926 : {
1927 : /* MC is always ready in GMC v9.*/
1928 0 : return true;
1929 : }
1930 :
1931 0 : static int gmc_v9_0_wait_for_idle(void *handle)
1932 : {
1933 : /* There is no need to wait for MC idle in GMC v9.*/
1934 0 : return 0;
1935 : }
1936 :
1937 0 : static int gmc_v9_0_soft_reset(void *handle)
1938 : {
1939 : /* XXX for emulation.*/
1940 0 : return 0;
1941 : }
1942 :
1943 0 : static int gmc_v9_0_set_clockgating_state(void *handle,
1944 : enum amd_clockgating_state state)
1945 : {
1946 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947 :
1948 0 : adev->mmhub.funcs->set_clockgating(adev, state);
1949 :
1950 0 : athub_v1_0_set_clockgating(adev, state);
1951 :
1952 0 : return 0;
1953 : }
1954 :
1955 0 : static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
1956 : {
1957 0 : struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958 :
1959 0 : adev->mmhub.funcs->get_clockgating(adev, flags);
1960 :
1961 0 : athub_v1_0_get_clockgating(adev, flags);
1962 0 : }
1963 :
1964 0 : static int gmc_v9_0_set_powergating_state(void *handle,
1965 : enum amd_powergating_state state)
1966 : {
1967 0 : return 0;
1968 : }
1969 :
1970 : const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1971 : .name = "gmc_v9_0",
1972 : .early_init = gmc_v9_0_early_init,
1973 : .late_init = gmc_v9_0_late_init,
1974 : .sw_init = gmc_v9_0_sw_init,
1975 : .sw_fini = gmc_v9_0_sw_fini,
1976 : .hw_init = gmc_v9_0_hw_init,
1977 : .hw_fini = gmc_v9_0_hw_fini,
1978 : .suspend = gmc_v9_0_suspend,
1979 : .resume = gmc_v9_0_resume,
1980 : .is_idle = gmc_v9_0_is_idle,
1981 : .wait_for_idle = gmc_v9_0_wait_for_idle,
1982 : .soft_reset = gmc_v9_0_soft_reset,
1983 : .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1984 : .set_powergating_state = gmc_v9_0_set_powergating_state,
1985 : .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1986 : };
1987 :
1988 : const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1989 : {
1990 : .type = AMD_IP_BLOCK_TYPE_GMC,
1991 : .major = 9,
1992 : .minor = 0,
1993 : .rev = 0,
1994 : .funcs = &gmc_v9_0_ip_funcs,
1995 : };
|