Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0 OR MIT
2 : /*
3 : * Copyright 2022 Advanced Micro Devices, Inc.
4 : *
5 : * Permission is hereby granted, free of charge, to any person obtaining a
6 : * copy of this software and associated documentation files (the "Software"),
7 : * to deal in the Software without restriction, including without limitation
8 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 : * and/or sell copies of the Software, and to permit persons to whom the
10 : * Software is furnished to do so, subject to the following conditions:
11 : *
12 : * The above copyright notice and this permission notice shall be included in
13 : * all copies or substantial portions of the Software.
14 : *
15 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 : * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 : * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 : * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 : * OTHER DEALINGS IN THE SOFTWARE.
22 : */
23 :
24 : #include <drm/drm_drv.h>
25 :
26 : #include "amdgpu.h"
27 : #include "amdgpu_trace.h"
28 : #include "amdgpu_vm.h"
29 :
30 : /*
31 : * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
32 : */
33 : struct amdgpu_vm_pt_cursor {
34 : uint64_t pfn;
35 : struct amdgpu_vm_bo_base *parent;
36 : struct amdgpu_vm_bo_base *entry;
37 : unsigned int level;
38 : };
39 :
40 : /**
41 : * amdgpu_vm_pt_level_shift - return the addr shift for each level
42 : *
43 : * @adev: amdgpu_device pointer
44 : * @level: VMPT level
45 : *
46 : * Returns:
47 : * The number of bits the pfn needs to be right shifted for a level.
48 : */
49 : static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
50 : unsigned int level)
51 : {
52 0 : switch (level) {
53 : case AMDGPU_VM_PDB2:
54 : case AMDGPU_VM_PDB1:
55 : case AMDGPU_VM_PDB0:
56 0 : return 9 * (AMDGPU_VM_PDB0 - level) +
57 0 : adev->vm_manager.block_size;
58 : case AMDGPU_VM_PTB:
59 : return 0;
60 : default:
61 : return ~0;
62 : }
63 : }
64 :
65 : /**
66 : * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
67 : *
68 : * @adev: amdgpu_device pointer
69 : * @level: VMPT level
70 : *
71 : * Returns:
72 : * The number of entries in a page directory or page table.
73 : */
74 0 : static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
75 : unsigned int level)
76 : {
77 : unsigned int shift;
78 :
79 0 : shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
80 0 : if (level == adev->vm_manager.root_level)
81 : /* For the root directory */
82 0 : return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
83 0 : >> shift;
84 0 : else if (level != AMDGPU_VM_PTB)
85 : /* Everything in between */
86 : return 512;
87 :
88 : /* For the page tables on the leaves */
89 0 : return AMDGPU_VM_PTE_COUNT(adev);
90 : }
91 :
92 : /**
93 : * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
94 : *
95 : * @adev: amdgpu_device pointer
96 : *
97 : * Returns:
98 : * The number of entries in the root page directory which needs the ATS setting.
99 : */
100 : static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
101 : {
102 : unsigned int shift;
103 :
104 0 : shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
105 0 : return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
106 : }
107 :
108 : /**
109 : * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
110 : *
111 : * @adev: amdgpu_device pointer
112 : * @level: VMPT level
113 : *
114 : * Returns:
115 : * The mask to extract the entry number of a PD/PT from an address.
116 : */
117 : static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
118 : unsigned int level)
119 : {
120 0 : if (level <= adev->vm_manager.root_level)
121 : return 0xffffffff;
122 0 : else if (level != AMDGPU_VM_PTB)
123 : return 0x1ff;
124 : else
125 0 : return AMDGPU_VM_PTE_COUNT(adev) - 1;
126 : }
127 :
128 : /**
129 : * amdgpu_vm_pt_size - returns the size of the page table in bytes
130 : *
131 : * @adev: amdgpu_device pointer
132 : * @level: VMPT level
133 : *
134 : * Returns:
135 : * The size of the BO for a page directory or page table in bytes.
136 : */
137 : static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
138 : unsigned int level)
139 : {
140 0 : return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
141 : }
142 :
143 : /**
144 : * amdgpu_vm_pt_parent - get the parent page directory
145 : *
146 : * @pt: child page table
147 : *
148 : * Helper to get the parent entry for the child page table. NULL if we are at
149 : * the root page directory.
150 : */
151 : static struct amdgpu_vm_bo_base *
152 : amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
153 : {
154 0 : struct amdgpu_bo *parent = pt->bo->parent;
155 :
156 0 : if (!parent)
157 : return NULL;
158 :
159 0 : return parent->vm_bo;
160 : }
161 :
162 : /**
163 : * amdgpu_vm_pt_start - start PD/PT walk
164 : *
165 : * @adev: amdgpu_device pointer
166 : * @vm: amdgpu_vm structure
167 : * @start: start address of the walk
168 : * @cursor: state to initialize
169 : *
170 : * Initialize a amdgpu_vm_pt_cursor to start a walk.
171 : */
172 : static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
173 : struct amdgpu_vm *vm, uint64_t start,
174 : struct amdgpu_vm_pt_cursor *cursor)
175 : {
176 0 : cursor->pfn = start;
177 0 : cursor->parent = NULL;
178 0 : cursor->entry = &vm->root;
179 0 : cursor->level = adev->vm_manager.root_level;
180 : }
181 :
182 : /**
183 : * amdgpu_vm_pt_descendant - go to child node
184 : *
185 : * @adev: amdgpu_device pointer
186 : * @cursor: current state
187 : *
188 : * Walk to the child node of the current node.
189 : * Returns:
190 : * True if the walk was possible, false otherwise.
191 : */
192 0 : static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
193 : struct amdgpu_vm_pt_cursor *cursor)
194 : {
195 : unsigned int mask, shift, idx;
196 :
197 0 : if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
198 0 : !cursor->entry->bo)
199 : return false;
200 :
201 0 : mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
202 0 : shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
203 :
204 0 : ++cursor->level;
205 0 : idx = (cursor->pfn >> shift) & mask;
206 0 : cursor->parent = cursor->entry;
207 0 : cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
208 0 : return true;
209 : }
210 :
211 : /**
212 : * amdgpu_vm_pt_sibling - go to sibling node
213 : *
214 : * @adev: amdgpu_device pointer
215 : * @cursor: current state
216 : *
217 : * Walk to the sibling node of the current node.
218 : * Returns:
219 : * True if the walk was possible, false otherwise.
220 : */
221 0 : static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
222 : struct amdgpu_vm_pt_cursor *cursor)
223 : {
224 :
225 : unsigned int shift, num_entries;
226 : struct amdgpu_bo_vm *parent;
227 :
228 : /* Root doesn't have a sibling */
229 0 : if (!cursor->parent)
230 : return false;
231 :
232 : /* Go to our parents and see if we got a sibling */
233 0 : shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
234 0 : num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
235 0 : parent = to_amdgpu_bo_vm(cursor->parent->bo);
236 :
237 0 : if (cursor->entry == &parent->entries[num_entries - 1])
238 : return false;
239 :
240 0 : cursor->pfn += 1ULL << shift;
241 0 : cursor->pfn &= ~((1ULL << shift) - 1);
242 0 : ++cursor->entry;
243 0 : return true;
244 : }
245 :
246 : /**
247 : * amdgpu_vm_pt_ancestor - go to parent node
248 : *
249 : * @cursor: current state
250 : *
251 : * Walk to the parent node of the current node.
252 : * Returns:
253 : * True if the walk was possible, false otherwise.
254 : */
255 : static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
256 : {
257 0 : if (!cursor->parent)
258 : return false;
259 :
260 0 : --cursor->level;
261 0 : cursor->entry = cursor->parent;
262 0 : cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
263 : return true;
264 : }
265 :
266 : /**
267 : * amdgpu_vm_pt_next - get next PD/PT in hieratchy
268 : *
269 : * @adev: amdgpu_device pointer
270 : * @cursor: current state
271 : *
272 : * Walk the PD/PT tree to the next node.
273 : */
274 0 : static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
275 : struct amdgpu_vm_pt_cursor *cursor)
276 : {
277 : /* First try a newborn child */
278 0 : if (amdgpu_vm_pt_descendant(adev, cursor))
279 : return;
280 :
281 : /* If that didn't worked try to find a sibling */
282 0 : while (!amdgpu_vm_pt_sibling(adev, cursor)) {
283 : /* No sibling, go to our parents and grandparents */
284 0 : if (!amdgpu_vm_pt_ancestor(cursor)) {
285 0 : cursor->pfn = ~0ll;
286 0 : return;
287 : }
288 : }
289 : }
290 :
291 : /**
292 : * amdgpu_vm_pt_first_dfs - start a deep first search
293 : *
294 : * @adev: amdgpu_device structure
295 : * @vm: amdgpu_vm structure
296 : * @start: optional cursor to start with
297 : * @cursor: state to initialize
298 : *
299 : * Starts a deep first traversal of the PD/PT tree.
300 : */
301 0 : static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
302 : struct amdgpu_vm *vm,
303 : struct amdgpu_vm_pt_cursor *start,
304 : struct amdgpu_vm_pt_cursor *cursor)
305 : {
306 0 : if (start)
307 0 : *cursor = *start;
308 : else
309 0 : amdgpu_vm_pt_start(adev, vm, 0, cursor);
310 :
311 0 : while (amdgpu_vm_pt_descendant(adev, cursor))
312 : ;
313 0 : }
314 :
315 : /**
316 : * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
317 : *
318 : * @start: starting point for the search
319 : * @entry: current entry
320 : *
321 : * Returns:
322 : * True when the search should continue, false otherwise.
323 : */
324 : static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
325 : struct amdgpu_vm_bo_base *entry)
326 : {
327 0 : return entry && (!start || entry != start->entry);
328 : }
329 :
330 : /**
331 : * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
332 : *
333 : * @adev: amdgpu_device structure
334 : * @cursor: current state
335 : *
336 : * Move the cursor to the next node in a deep first search.
337 : */
338 0 : static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
339 : struct amdgpu_vm_pt_cursor *cursor)
340 : {
341 0 : if (!cursor->entry)
342 : return;
343 :
344 0 : if (!cursor->parent)
345 0 : cursor->entry = NULL;
346 0 : else if (amdgpu_vm_pt_sibling(adev, cursor))
347 0 : while (amdgpu_vm_pt_descendant(adev, cursor))
348 : ;
349 : else
350 : amdgpu_vm_pt_ancestor(cursor);
351 : }
352 :
353 : /*
354 : * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
355 : */
356 : #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
357 : for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
358 : (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
359 : amdgpu_vm_pt_continue_dfs((start), (entry)); \
360 : (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
361 :
362 : /**
363 : * amdgpu_vm_pt_clear - initially clear the PDs/PTs
364 : *
365 : * @adev: amdgpu_device pointer
366 : * @vm: VM to clear BO from
367 : * @vmbo: BO to clear
368 : * @immediate: use an immediate update
369 : *
370 : * Root PD needs to be reserved when calling this.
371 : *
372 : * Returns:
373 : * 0 on success, errno otherwise.
374 : */
375 0 : int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
376 : struct amdgpu_bo_vm *vmbo, bool immediate)
377 : {
378 0 : unsigned int level = adev->vm_manager.root_level;
379 0 : struct ttm_operation_ctx ctx = { true, false };
380 : struct amdgpu_vm_update_params params;
381 0 : struct amdgpu_bo *ancestor = &vmbo->bo;
382 : unsigned int entries, ats_entries;
383 0 : struct amdgpu_bo *bo = &vmbo->bo;
384 : uint64_t addr;
385 : int r, idx;
386 :
387 : /* Figure out our place in the hierarchy */
388 0 : if (ancestor->parent) {
389 0 : ++level;
390 0 : while (ancestor->parent->parent) {
391 0 : ++level;
392 0 : ancestor = ancestor->parent;
393 : }
394 : }
395 :
396 0 : entries = amdgpu_bo_size(bo) / 8;
397 0 : if (!vm->pte_support_ats) {
398 : ats_entries = 0;
399 :
400 0 : } else if (!bo->parent) {
401 0 : ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
402 0 : ats_entries = min(ats_entries, entries);
403 0 : entries -= ats_entries;
404 :
405 : } else {
406 : struct amdgpu_vm_bo_base *pt;
407 :
408 0 : pt = ancestor->vm_bo;
409 0 : ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
410 0 : if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
411 : ats_entries) {
412 : ats_entries = 0;
413 : } else {
414 0 : ats_entries = entries;
415 0 : entries = 0;
416 : }
417 : }
418 :
419 0 : r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
420 0 : if (r)
421 : return r;
422 :
423 0 : if (vmbo->shadow) {
424 0 : struct amdgpu_bo *shadow = vmbo->shadow;
425 :
426 0 : r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
427 0 : if (r)
428 : return r;
429 : }
430 :
431 0 : if (!drm_dev_enter(adev_to_drm(adev), &idx))
432 : return -ENODEV;
433 :
434 0 : r = vm->update_funcs->map_table(vmbo);
435 0 : if (r)
436 : goto exit;
437 :
438 0 : memset(¶ms, 0, sizeof(params));
439 0 : params.adev = adev;
440 0 : params.vm = vm;
441 0 : params.immediate = immediate;
442 :
443 0 : r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
444 0 : if (r)
445 : goto exit;
446 :
447 0 : addr = 0;
448 0 : if (ats_entries) {
449 0 : uint64_t value = 0, flags;
450 :
451 0 : flags = AMDGPU_PTE_DEFAULT_ATC;
452 0 : if (level != AMDGPU_VM_PTB) {
453 : /* Handle leaf PDEs as PTEs */
454 0 : flags |= AMDGPU_PDE_PTE;
455 0 : amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
456 : }
457 :
458 0 : r = vm->update_funcs->update(¶ms, vmbo, addr, 0,
459 : ats_entries, value, flags);
460 0 : if (r)
461 : goto exit;
462 :
463 0 : addr += ats_entries * 8;
464 : }
465 :
466 0 : if (entries) {
467 0 : uint64_t value = 0, flags = 0;
468 :
469 0 : if (adev->asic_type >= CHIP_VEGA10) {
470 0 : if (level != AMDGPU_VM_PTB) {
471 : /* Handle leaf PDEs as PTEs */
472 0 : flags |= AMDGPU_PDE_PTE;
473 0 : amdgpu_gmc_get_vm_pde(adev, level,
474 : &value, &flags);
475 : } else {
476 : /* Workaround for fault priority problem on GMC9 */
477 0 : flags = AMDGPU_PTE_EXECUTABLE;
478 : }
479 : }
480 :
481 0 : r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
482 : value, flags);
483 0 : if (r)
484 : goto exit;
485 : }
486 :
487 0 : r = vm->update_funcs->commit(¶ms, NULL);
488 : exit:
489 0 : drm_dev_exit(idx);
490 0 : return r;
491 : }
492 :
493 : /**
494 : * amdgpu_vm_pt_create - create bo for PD/PT
495 : *
496 : * @adev: amdgpu_device pointer
497 : * @vm: requesting vm
498 : * @level: the page table level
499 : * @immediate: use a immediate update
500 : * @vmbo: pointer to the buffer object pointer
501 : */
502 0 : int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
503 : int level, bool immediate, struct amdgpu_bo_vm **vmbo)
504 : {
505 : struct amdgpu_bo_param bp;
506 : struct amdgpu_bo *bo;
507 : struct dma_resv *resv;
508 : unsigned int num_entries;
509 : int r;
510 :
511 0 : memset(&bp, 0, sizeof(bp));
512 :
513 0 : bp.size = amdgpu_vm_pt_size(adev, level);
514 0 : bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
515 0 : bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
516 0 : bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
517 0 : bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
518 : AMDGPU_GEM_CREATE_CPU_GTT_USWC;
519 :
520 0 : if (level < AMDGPU_VM_PTB)
521 0 : num_entries = amdgpu_vm_pt_num_entries(adev, level);
522 : else
523 : num_entries = 0;
524 :
525 0 : bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
526 :
527 0 : if (vm->use_cpu_for_update)
528 0 : bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
529 :
530 0 : bp.type = ttm_bo_type_kernel;
531 0 : bp.no_wait_gpu = immediate;
532 0 : if (vm->root.bo)
533 0 : bp.resv = vm->root.bo->tbo.base.resv;
534 :
535 0 : r = amdgpu_bo_create_vm(adev, &bp, vmbo);
536 0 : if (r)
537 : return r;
538 :
539 0 : bo = &(*vmbo)->bo;
540 0 : if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
541 0 : (*vmbo)->shadow = NULL;
542 0 : return 0;
543 : }
544 :
545 0 : if (!bp.resv)
546 0 : WARN_ON(dma_resv_lock(bo->tbo.base.resv,
547 : NULL));
548 0 : resv = bp.resv;
549 0 : memset(&bp, 0, sizeof(bp));
550 0 : bp.size = amdgpu_vm_pt_size(adev, level);
551 0 : bp.domain = AMDGPU_GEM_DOMAIN_GTT;
552 0 : bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
553 0 : bp.type = ttm_bo_type_kernel;
554 0 : bp.resv = bo->tbo.base.resv;
555 0 : bp.bo_ptr_size = sizeof(struct amdgpu_bo);
556 :
557 0 : r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
558 :
559 0 : if (!resv)
560 0 : dma_resv_unlock(bo->tbo.base.resv);
561 :
562 0 : if (r) {
563 0 : amdgpu_bo_unref(&bo);
564 0 : return r;
565 : }
566 :
567 0 : (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
568 0 : amdgpu_bo_add_to_shadow_list(*vmbo);
569 :
570 0 : return 0;
571 : }
572 :
573 : /**
574 : * amdgpu_vm_pt_alloc - Allocate a specific page table
575 : *
576 : * @adev: amdgpu_device pointer
577 : * @vm: VM to allocate page tables for
578 : * @cursor: Which page table to allocate
579 : * @immediate: use an immediate update
580 : *
581 : * Make sure a specific page table or directory is allocated.
582 : *
583 : * Returns:
584 : * 1 if page table needed to be allocated, 0 if page table was already
585 : * allocated, negative errno if an error occurred.
586 : */
587 0 : static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
588 : struct amdgpu_vm *vm,
589 : struct amdgpu_vm_pt_cursor *cursor,
590 : bool immediate)
591 : {
592 0 : struct amdgpu_vm_bo_base *entry = cursor->entry;
593 : struct amdgpu_bo *pt_bo;
594 : struct amdgpu_bo_vm *pt;
595 : int r;
596 :
597 0 : if (entry->bo)
598 : return 0;
599 :
600 0 : r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
601 0 : if (r)
602 : return r;
603 :
604 : /* Keep a reference to the root directory to avoid
605 : * freeing them up in the wrong order.
606 : */
607 0 : pt_bo = &pt->bo;
608 0 : pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
609 0 : amdgpu_vm_bo_base_init(entry, vm, pt_bo);
610 0 : r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
611 0 : if (r)
612 : goto error_free_pt;
613 :
614 : return 0;
615 :
616 : error_free_pt:
617 0 : amdgpu_bo_unref(&pt->shadow);
618 0 : amdgpu_bo_unref(&pt_bo);
619 0 : return r;
620 : }
621 :
622 : /**
623 : * amdgpu_vm_pt_free - free one PD/PT
624 : *
625 : * @entry: PDE to free
626 : */
627 0 : static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
628 : {
629 : struct amdgpu_bo *shadow;
630 :
631 0 : if (!entry->bo)
632 0 : return;
633 0 : shadow = amdgpu_bo_shadowed(entry->bo);
634 0 : if (shadow) {
635 0 : ttm_bo_set_bulk_move(&shadow->tbo, NULL);
636 0 : amdgpu_bo_unref(&shadow);
637 : }
638 0 : ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
639 0 : entry->bo->vm_bo = NULL;
640 0 : list_del(&entry->vm_status);
641 0 : amdgpu_bo_unref(&entry->bo);
642 : }
643 :
644 : /**
645 : * amdgpu_vm_pt_free_dfs - free PD/PT levels
646 : *
647 : * @adev: amdgpu device structure
648 : * @vm: amdgpu vm structure
649 : * @start: optional cursor where to start freeing PDs/PTs
650 : *
651 : * Free the page directory or page table level and all sub levels.
652 : */
653 0 : static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
654 : struct amdgpu_vm *vm,
655 : struct amdgpu_vm_pt_cursor *start)
656 : {
657 : struct amdgpu_vm_pt_cursor cursor;
658 : struct amdgpu_vm_bo_base *entry;
659 :
660 0 : for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
661 0 : amdgpu_vm_pt_free(entry);
662 :
663 0 : if (start)
664 0 : amdgpu_vm_pt_free(start->entry);
665 0 : }
666 :
667 : /**
668 : * amdgpu_vm_pt_free_root - free root PD
669 : * @adev: amdgpu device structure
670 : * @vm: amdgpu vm structure
671 : *
672 : * Free the root page directory and everything below it.
673 : */
674 0 : void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
675 : {
676 0 : amdgpu_vm_pt_free_dfs(adev, vm, NULL);
677 0 : }
678 :
679 : /**
680 : * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
681 : *
682 : * @adev: amdgpu_device pointer
683 : * @vm: the VM to check
684 : *
685 : * Check all entries of the root PD, if any subsequent PDs are allocated,
686 : * it means there are page table creating and filling, and is no a clean
687 : * VM
688 : *
689 : * Returns:
690 : * 0 if this VM is clean
691 : */
692 0 : bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
693 : struct amdgpu_vm *vm)
694 : {
695 0 : enum amdgpu_vm_level root = adev->vm_manager.root_level;
696 0 : unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
697 0 : unsigned int i = 0;
698 :
699 0 : for (i = 0; i < entries; i++) {
700 0 : if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
701 : return false;
702 : }
703 : return true;
704 : }
705 :
706 : /**
707 : * amdgpu_vm_pde_update - update a single level in the hierarchy
708 : *
709 : * @params: parameters for the update
710 : * @entry: entry to update
711 : *
712 : * Makes sure the requested entry in parent is up to date.
713 : */
714 0 : int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
715 : struct amdgpu_vm_bo_base *entry)
716 : {
717 0 : struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
718 0 : struct amdgpu_bo *bo = parent->bo, *pbo;
719 0 : struct amdgpu_vm *vm = params->vm;
720 : uint64_t pde, pt, flags;
721 : unsigned int level;
722 :
723 0 : for (level = 0, pbo = bo->parent; pbo; ++level)
724 0 : pbo = pbo->parent;
725 :
726 0 : level += params->adev->vm_manager.root_level;
727 0 : amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
728 0 : pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
729 0 : return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
730 : 1, 0, flags);
731 : }
732 :
733 : /*
734 : * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
735 : *
736 : * Make sure to set the right flags for the PTEs at the desired level.
737 : */
738 0 : static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
739 : struct amdgpu_bo_vm *pt,
740 : unsigned int level,
741 : uint64_t pe, uint64_t addr,
742 : unsigned int count, uint32_t incr,
743 : uint64_t flags)
744 :
745 : {
746 0 : if (level != AMDGPU_VM_PTB) {
747 0 : flags |= AMDGPU_PDE_PTE;
748 0 : amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
749 :
750 0 : } else if (params->adev->asic_type >= CHIP_VEGA10 &&
751 0 : !(flags & AMDGPU_PTE_VALID) &&
752 : !(flags & AMDGPU_PTE_PRT)) {
753 :
754 : /* Workaround for fault priority problem on GMC9 */
755 0 : flags |= AMDGPU_PTE_EXECUTABLE;
756 : }
757 :
758 0 : params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
759 : flags);
760 0 : }
761 :
762 : /**
763 : * amdgpu_vm_pte_fragment - get fragment for PTEs
764 : *
765 : * @params: see amdgpu_vm_update_params definition
766 : * @start: first PTE to handle
767 : * @end: last PTE to handle
768 : * @flags: hw mapping flags
769 : * @frag: resulting fragment size
770 : * @frag_end: end of this fragment
771 : *
772 : * Returns the first possible fragment for the start and end address.
773 : */
774 : static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
775 : uint64_t start, uint64_t end, uint64_t flags,
776 : unsigned int *frag, uint64_t *frag_end)
777 : {
778 : /**
779 : * The MC L1 TLB supports variable sized pages, based on a fragment
780 : * field in the PTE. When this field is set to a non-zero value, page
781 : * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
782 : * flags are considered valid for all PTEs within the fragment range
783 : * and corresponding mappings are assumed to be physically contiguous.
784 : *
785 : * The L1 TLB can store a single PTE for the whole fragment,
786 : * significantly increasing the space available for translation
787 : * caching. This leads to large improvements in throughput when the
788 : * TLB is under pressure.
789 : *
790 : * The L2 TLB distributes small and large fragments into two
791 : * asymmetric partitions. The large fragment cache is significantly
792 : * larger. Thus, we try to use large fragments wherever possible.
793 : * Userspace can support this by aligning virtual base address and
794 : * allocation size to the fragment size.
795 : *
796 : * Starting with Vega10 the fragment size only controls the L1. The L2
797 : * is now directly feed with small/huge/giant pages from the walker.
798 : */
799 : unsigned int max_frag;
800 :
801 0 : if (params->adev->asic_type < CHIP_VEGA10)
802 0 : max_frag = params->adev->vm_manager.fragment_size;
803 : else
804 : max_frag = 31;
805 :
806 : /* system pages are non continuously */
807 0 : if (params->pages_addr) {
808 : *frag = 0;
809 : *frag_end = end;
810 : return;
811 : }
812 :
813 : /* This intentionally wraps around if no bit is set */
814 0 : *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
815 0 : if (*frag >= max_frag) {
816 0 : *frag = max_frag;
817 0 : *frag_end = end & ~((1ULL << max_frag) - 1);
818 : } else {
819 0 : *frag_end = start + (1 << *frag);
820 : }
821 : }
822 :
823 : /**
824 : * amdgpu_vm_ptes_update - make sure that page tables are valid
825 : *
826 : * @params: see amdgpu_vm_update_params definition
827 : * @start: start of GPU address range
828 : * @end: end of GPU address range
829 : * @dst: destination address to map to, the next dst inside the function
830 : * @flags: mapping flags
831 : *
832 : * Update the page tables in the range @start - @end.
833 : *
834 : * Returns:
835 : * 0 for success, -EINVAL for failure.
836 : */
837 0 : int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
838 : uint64_t start, uint64_t end,
839 : uint64_t dst, uint64_t flags)
840 : {
841 0 : struct amdgpu_device *adev = params->adev;
842 : struct amdgpu_vm_pt_cursor cursor;
843 0 : uint64_t frag_start = start, frag_end;
844 : unsigned int frag;
845 : int r;
846 :
847 : /* figure out the initial fragment */
848 0 : amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
849 : &frag_end);
850 :
851 : /* walk over the address space and update the PTs */
852 0 : amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
853 0 : while (cursor.pfn < end) {
854 : unsigned int shift, parent_shift, mask;
855 : uint64_t incr, entry_end, pe_start;
856 : struct amdgpu_bo *pt;
857 :
858 0 : if (!params->unlocked) {
859 : /* make sure that the page tables covering the
860 : * address range are actually allocated
861 : */
862 0 : r = amdgpu_vm_pt_alloc(params->adev, params->vm,
863 0 : &cursor, params->immediate);
864 0 : if (r)
865 : return r;
866 : }
867 :
868 0 : shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
869 0 : parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
870 0 : if (params->unlocked) {
871 : /* Unlocked updates are only allowed on the leaves */
872 0 : if (amdgpu_vm_pt_descendant(adev, &cursor))
873 0 : continue;
874 0 : } else if (adev->asic_type < CHIP_VEGA10 &&
875 0 : (flags & AMDGPU_PTE_VALID)) {
876 : /* No huge page support before GMC v9 */
877 0 : if (cursor.level != AMDGPU_VM_PTB) {
878 0 : if (!amdgpu_vm_pt_descendant(adev, &cursor))
879 : return -ENOENT;
880 0 : continue;
881 : }
882 0 : } else if (frag < shift) {
883 : /* We can't use this level when the fragment size is
884 : * smaller than the address shift. Go to the next
885 : * child entry and try again.
886 : */
887 0 : if (amdgpu_vm_pt_descendant(adev, &cursor))
888 0 : continue;
889 0 : } else if (frag >= parent_shift) {
890 : /* If the fragment size is even larger than the parent
891 : * shift we should go up one level and check it again.
892 : */
893 0 : if (!amdgpu_vm_pt_ancestor(&cursor))
894 : return -EINVAL;
895 0 : continue;
896 : }
897 :
898 0 : pt = cursor.entry->bo;
899 0 : if (!pt) {
900 : /* We need all PDs and PTs for mapping something, */
901 0 : if (flags & AMDGPU_PTE_VALID)
902 : return -ENOENT;
903 :
904 : /* but unmapping something can happen at a higher
905 : * level.
906 : */
907 0 : if (!amdgpu_vm_pt_ancestor(&cursor))
908 : return -EINVAL;
909 :
910 0 : pt = cursor.entry->bo;
911 0 : shift = parent_shift;
912 0 : frag_end = max(frag_end, ALIGN(frag_start + 1,
913 : 1ULL << shift));
914 : }
915 :
916 : /* Looks good so far, calculate parameters for the update */
917 0 : incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
918 0 : mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
919 0 : pe_start = ((cursor.pfn >> shift) & mask) * 8;
920 0 : entry_end = ((uint64_t)mask + 1) << shift;
921 0 : entry_end += cursor.pfn & ~(entry_end - 1);
922 0 : entry_end = min(entry_end, end);
923 :
924 : do {
925 0 : struct amdgpu_vm *vm = params->vm;
926 0 : uint64_t upd_end = min(entry_end, frag_end);
927 0 : unsigned int nptes = (upd_end - frag_start) >> shift;
928 0 : uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
929 :
930 : /* This can happen when we set higher level PDs to
931 : * silent to stop fault floods.
932 : */
933 0 : nptes = max(nptes, 1u);
934 :
935 0 : trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
936 0 : min(nptes, 32u), dst, incr,
937 : upd_flags,
938 : vm->task_info.pid,
939 : vm->immediate.fence_context);
940 0 : amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
941 : cursor.level, pe_start, dst,
942 : nptes, incr, upd_flags);
943 :
944 0 : pe_start += nptes * 8;
945 0 : dst += nptes * incr;
946 :
947 0 : frag_start = upd_end;
948 0 : if (frag_start >= frag_end) {
949 : /* figure out the next fragment */
950 0 : amdgpu_vm_pte_fragment(params, frag_start, end,
951 : flags, &frag, &frag_end);
952 0 : if (frag < shift)
953 : break;
954 : }
955 0 : } while (frag_start < entry_end);
956 :
957 0 : if (amdgpu_vm_pt_descendant(adev, &cursor)) {
958 : /* Free all child entries.
959 : * Update the tables with the flags and addresses and free up subsequent
960 : * tables in the case of huge pages or freed up areas.
961 : * This is the maximum you can free, because all other page tables are not
962 : * completely covered by the range and so potentially still in use.
963 : */
964 0 : while (cursor.pfn < frag_start) {
965 : /* Make sure previous mapping is freed */
966 0 : if (cursor.entry->bo) {
967 0 : params->table_freed = true;
968 0 : amdgpu_vm_pt_free_dfs(adev, params->vm,
969 : &cursor);
970 : }
971 0 : amdgpu_vm_pt_next(adev, &cursor);
972 : }
973 :
974 0 : } else if (frag >= shift) {
975 : /* or just move on to the next on the same level. */
976 0 : amdgpu_vm_pt_next(adev, &cursor);
977 : }
978 : }
979 :
980 : return 0;
981 : }
|