Line data Source code
1 : /*
2 : * mm/rmap.c - physical to virtual reverse mappings
3 : *
4 : * Copyright 2001, Rik van Riel <riel@conectiva.com.br>
5 : * Released under the General Public License (GPL).
6 : *
7 : * Simple, low overhead reverse mapping scheme.
8 : * Please try to keep this thing as modular as possible.
9 : *
10 : * Provides methods for unmapping each kind of mapped page:
11 : * the anon methods track anonymous pages, and
12 : * the file methods track pages belonging to an inode.
13 : *
14 : * Original design by Rik van Riel <riel@conectiva.com.br> 2001
15 : * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
16 : * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
17 : * Contributions by Hugh Dickins 2003, 2004
18 : */
19 :
20 : /*
21 : * Lock ordering in mm:
22 : *
23 : * inode->i_rwsem (while writing or truncating, not reading or faulting)
24 : * mm->mmap_lock
25 : * mapping->invalidate_lock (in filemap_fault)
26 : * page->flags PG_locked (lock_page) * (see hugetlbfs below)
27 : * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
28 : * mapping->i_mmap_rwsem
29 : * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
30 : * anon_vma->rwsem
31 : * mm->page_table_lock or pte_lock
32 : * swap_lock (in swap_duplicate, swap_info_get)
33 : * mmlist_lock (in mmput, drain_mmlist and others)
34 : * mapping->private_lock (in block_dirty_folio)
35 : * folio_lock_memcg move_lock (in block_dirty_folio)
36 : * i_pages lock (widely used)
37 : * lruvec->lru_lock (in folio_lruvec_lock_irq)
38 : * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
39 : * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
40 : * sb_lock (within inode_lock in fs/fs-writeback.c)
41 : * i_pages lock (widely used, in set_page_dirty,
42 : * in arch-dependent flush_dcache_mmap_lock,
43 : * within bdi.wb->list_lock in __sync_single_inode)
44 : *
45 : * anon_vma->rwsem,mapping->i_mmap_rwsem (memory_failure, collect_procs_anon)
46 : * ->tasklist_lock
47 : * pte map lock
48 : *
49 : * * hugetlbfs PageHuge() pages take locks in this order:
50 : * mapping->i_mmap_rwsem
51 : * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
52 : * page->flags PG_locked (lock_page)
53 : */
54 :
55 : #include <linux/mm.h>
56 : #include <linux/sched/mm.h>
57 : #include <linux/sched/task.h>
58 : #include <linux/pagemap.h>
59 : #include <linux/swap.h>
60 : #include <linux/swapops.h>
61 : #include <linux/slab.h>
62 : #include <linux/init.h>
63 : #include <linux/ksm.h>
64 : #include <linux/rmap.h>
65 : #include <linux/rcupdate.h>
66 : #include <linux/export.h>
67 : #include <linux/memcontrol.h>
68 : #include <linux/mmu_notifier.h>
69 : #include <linux/migrate.h>
70 : #include <linux/hugetlb.h>
71 : #include <linux/huge_mm.h>
72 : #include <linux/backing-dev.h>
73 : #include <linux/page_idle.h>
74 : #include <linux/memremap.h>
75 : #include <linux/userfaultfd_k.h>
76 :
77 : #include <asm/tlbflush.h>
78 :
79 : #define CREATE_TRACE_POINTS
80 : #include <trace/events/tlb.h>
81 : #include <trace/events/migrate.h>
82 :
83 : #include "internal.h"
84 :
85 : static struct kmem_cache *anon_vma_cachep;
86 : static struct kmem_cache *anon_vma_chain_cachep;
87 :
88 0 : static inline struct anon_vma *anon_vma_alloc(void)
89 : {
90 : struct anon_vma *anon_vma;
91 :
92 0 : anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
93 0 : if (anon_vma) {
94 0 : atomic_set(&anon_vma->refcount, 1);
95 0 : anon_vma->degree = 1; /* Reference for first vma */
96 0 : anon_vma->parent = anon_vma;
97 : /*
98 : * Initialise the anon_vma root to point to itself. If called
99 : * from fork, the root will be reset to the parents anon_vma.
100 : */
101 0 : anon_vma->root = anon_vma;
102 : }
103 :
104 0 : return anon_vma;
105 : }
106 :
107 0 : static inline void anon_vma_free(struct anon_vma *anon_vma)
108 : {
109 : VM_BUG_ON(atomic_read(&anon_vma->refcount));
110 :
111 : /*
112 : * Synchronize against folio_lock_anon_vma_read() such that
113 : * we can safely hold the lock without the anon_vma getting
114 : * freed.
115 : *
116 : * Relies on the full mb implied by the atomic_dec_and_test() from
117 : * put_anon_vma() against the acquire barrier implied by
118 : * down_read_trylock() from folio_lock_anon_vma_read(). This orders:
119 : *
120 : * folio_lock_anon_vma_read() VS put_anon_vma()
121 : * down_read_trylock() atomic_dec_and_test()
122 : * LOCK MB
123 : * atomic_read() rwsem_is_locked()
124 : *
125 : * LOCK should suffice since the actual taking of the lock must
126 : * happen _before_ what follows.
127 : */
128 : might_sleep();
129 0 : if (rwsem_is_locked(&anon_vma->root->rwsem)) {
130 0 : anon_vma_lock_write(anon_vma);
131 0 : anon_vma_unlock_write(anon_vma);
132 : }
133 :
134 0 : kmem_cache_free(anon_vma_cachep, anon_vma);
135 0 : }
136 :
137 : static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
138 : {
139 0 : return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
140 : }
141 :
142 : static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
143 : {
144 0 : kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
145 : }
146 :
147 : static void anon_vma_chain_link(struct vm_area_struct *vma,
148 : struct anon_vma_chain *avc,
149 : struct anon_vma *anon_vma)
150 : {
151 0 : avc->vma = vma;
152 0 : avc->anon_vma = anon_vma;
153 0 : list_add(&avc->same_vma, &vma->anon_vma_chain);
154 0 : anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
155 : }
156 :
157 : /**
158 : * __anon_vma_prepare - attach an anon_vma to a memory region
159 : * @vma: the memory region in question
160 : *
161 : * This makes sure the memory mapping described by 'vma' has
162 : * an 'anon_vma' attached to it, so that we can associate the
163 : * anonymous pages mapped into it with that anon_vma.
164 : *
165 : * The common case will be that we already have one, which
166 : * is handled inline by anon_vma_prepare(). But if
167 : * not we either need to find an adjacent mapping that we
168 : * can re-use the anon_vma from (very common when the only
169 : * reason for splitting a vma has been mprotect()), or we
170 : * allocate a new one.
171 : *
172 : * Anon-vma allocations are very subtle, because we may have
173 : * optimistically looked up an anon_vma in folio_lock_anon_vma_read()
174 : * and that may actually touch the rwsem even in the newly
175 : * allocated vma (it depends on RCU to make sure that the
176 : * anon_vma isn't actually destroyed).
177 : *
178 : * As a result, we need to do proper anon_vma locking even
179 : * for the new allocation. At the same time, we do not want
180 : * to do any locking for the common case of already having
181 : * an anon_vma.
182 : *
183 : * This must be called with the mmap_lock held for reading.
184 : */
185 0 : int __anon_vma_prepare(struct vm_area_struct *vma)
186 : {
187 0 : struct mm_struct *mm = vma->vm_mm;
188 : struct anon_vma *anon_vma, *allocated;
189 : struct anon_vma_chain *avc;
190 :
191 : might_sleep();
192 :
193 0 : avc = anon_vma_chain_alloc(GFP_KERNEL);
194 0 : if (!avc)
195 : goto out_enomem;
196 :
197 0 : anon_vma = find_mergeable_anon_vma(vma);
198 0 : allocated = NULL;
199 0 : if (!anon_vma) {
200 0 : anon_vma = anon_vma_alloc();
201 0 : if (unlikely(!anon_vma))
202 : goto out_enomem_free_avc;
203 : allocated = anon_vma;
204 : }
205 :
206 0 : anon_vma_lock_write(anon_vma);
207 : /* page_table_lock to protect against threads */
208 0 : spin_lock(&mm->page_table_lock);
209 0 : if (likely(!vma->anon_vma)) {
210 0 : vma->anon_vma = anon_vma;
211 0 : anon_vma_chain_link(vma, avc, anon_vma);
212 : /* vma reference or self-parent link for new root */
213 0 : anon_vma->degree++;
214 0 : allocated = NULL;
215 0 : avc = NULL;
216 : }
217 0 : spin_unlock(&mm->page_table_lock);
218 0 : anon_vma_unlock_write(anon_vma);
219 :
220 0 : if (unlikely(allocated))
221 : put_anon_vma(allocated);
222 0 : if (unlikely(avc))
223 : anon_vma_chain_free(avc);
224 :
225 : return 0;
226 :
227 : out_enomem_free_avc:
228 : anon_vma_chain_free(avc);
229 : out_enomem:
230 : return -ENOMEM;
231 : }
232 :
233 : /*
234 : * This is a useful helper function for locking the anon_vma root as
235 : * we traverse the vma->anon_vma_chain, looping over anon_vma's that
236 : * have the same vma.
237 : *
238 : * Such anon_vma's should have the same root, so you'd expect to see
239 : * just a single mutex_lock for the whole traversal.
240 : */
241 0 : static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
242 : {
243 0 : struct anon_vma *new_root = anon_vma->root;
244 0 : if (new_root != root) {
245 0 : if (WARN_ON_ONCE(root))
246 0 : up_write(&root->rwsem);
247 0 : root = new_root;
248 0 : down_write(&root->rwsem);
249 : }
250 0 : return root;
251 : }
252 :
253 : static inline void unlock_anon_vma_root(struct anon_vma *root)
254 : {
255 0 : if (root)
256 0 : up_write(&root->rwsem);
257 : }
258 :
259 : /*
260 : * Attach the anon_vmas from src to dst.
261 : * Returns 0 on success, -ENOMEM on failure.
262 : *
263 : * anon_vma_clone() is called by __vma_adjust(), __split_vma(), copy_vma() and
264 : * anon_vma_fork(). The first three want an exact copy of src, while the last
265 : * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
266 : * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
267 : * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
268 : *
269 : * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
270 : * and reuse existing anon_vma which has no vmas and only one child anon_vma.
271 : * This prevents degradation of anon_vma hierarchy to endless linear chain in
272 : * case of constantly forking task. On the other hand, an anon_vma with more
273 : * than one child isn't reused even if there was no alive vma, thus rmap
274 : * walker has a good chance of avoiding scanning the whole hierarchy when it
275 : * searches where page is mapped.
276 : */
277 0 : int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
278 : {
279 : struct anon_vma_chain *avc, *pavc;
280 0 : struct anon_vma *root = NULL;
281 :
282 0 : list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
283 : struct anon_vma *anon_vma;
284 :
285 0 : avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
286 0 : if (unlikely(!avc)) {
287 0 : unlock_anon_vma_root(root);
288 0 : root = NULL;
289 0 : avc = anon_vma_chain_alloc(GFP_KERNEL);
290 0 : if (!avc)
291 : goto enomem_failure;
292 : }
293 0 : anon_vma = pavc->anon_vma;
294 0 : root = lock_anon_vma_root(root, anon_vma);
295 0 : anon_vma_chain_link(dst, avc, anon_vma);
296 :
297 : /*
298 : * Reuse existing anon_vma if its degree lower than two,
299 : * that means it has no vma and only one anon_vma child.
300 : *
301 : * Do not chose parent anon_vma, otherwise first child
302 : * will always reuse it. Root anon_vma is never reused:
303 : * it has self-parent reference and at least one child.
304 : */
305 0 : if (!dst->anon_vma && src->anon_vma &&
306 0 : anon_vma != src->anon_vma && anon_vma->degree < 2)
307 0 : dst->anon_vma = anon_vma;
308 : }
309 0 : if (dst->anon_vma)
310 0 : dst->anon_vma->degree++;
311 : unlock_anon_vma_root(root);
312 : return 0;
313 :
314 : enomem_failure:
315 : /*
316 : * dst->anon_vma is dropped here otherwise its degree can be incorrectly
317 : * decremented in unlink_anon_vmas().
318 : * We can safely do this because callers of anon_vma_clone() don't care
319 : * about dst->anon_vma if anon_vma_clone() failed.
320 : */
321 0 : dst->anon_vma = NULL;
322 0 : unlink_anon_vmas(dst);
323 0 : return -ENOMEM;
324 : }
325 :
326 : /*
327 : * Attach vma to its own anon_vma, as well as to the anon_vmas that
328 : * the corresponding VMA in the parent process is attached to.
329 : * Returns 0 on success, non-zero on failure.
330 : */
331 0 : int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
332 : {
333 : struct anon_vma_chain *avc;
334 : struct anon_vma *anon_vma;
335 : int error;
336 :
337 : /* Don't bother if the parent process has no anon_vma here. */
338 0 : if (!pvma->anon_vma)
339 : return 0;
340 :
341 : /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
342 0 : vma->anon_vma = NULL;
343 :
344 : /*
345 : * First, attach the new VMA to the parent VMA's anon_vmas,
346 : * so rmap can find non-COWed pages in child processes.
347 : */
348 0 : error = anon_vma_clone(vma, pvma);
349 0 : if (error)
350 : return error;
351 :
352 : /* An existing anon_vma has been reused, all done then. */
353 0 : if (vma->anon_vma)
354 : return 0;
355 :
356 : /* Then add our own anon_vma. */
357 0 : anon_vma = anon_vma_alloc();
358 0 : if (!anon_vma)
359 : goto out_error;
360 0 : avc = anon_vma_chain_alloc(GFP_KERNEL);
361 0 : if (!avc)
362 : goto out_error_free_anon_vma;
363 :
364 : /*
365 : * The root anon_vma's rwsem is the lock actually used when we
366 : * lock any of the anon_vmas in this anon_vma tree.
367 : */
368 0 : anon_vma->root = pvma->anon_vma->root;
369 0 : anon_vma->parent = pvma->anon_vma;
370 : /*
371 : * With refcounts, an anon_vma can stay around longer than the
372 : * process it belongs to. The root anon_vma needs to be pinned until
373 : * this anon_vma is freed, because the lock lives in the root.
374 : */
375 0 : get_anon_vma(anon_vma->root);
376 : /* Mark this anon_vma as the one where our new (COWed) pages go. */
377 0 : vma->anon_vma = anon_vma;
378 0 : anon_vma_lock_write(anon_vma);
379 0 : anon_vma_chain_link(vma, avc, anon_vma);
380 0 : anon_vma->parent->degree++;
381 0 : anon_vma_unlock_write(anon_vma);
382 :
383 0 : return 0;
384 :
385 : out_error_free_anon_vma:
386 : put_anon_vma(anon_vma);
387 : out_error:
388 0 : unlink_anon_vmas(vma);
389 0 : return -ENOMEM;
390 : }
391 :
392 0 : void unlink_anon_vmas(struct vm_area_struct *vma)
393 : {
394 : struct anon_vma_chain *avc, *next;
395 0 : struct anon_vma *root = NULL;
396 :
397 : /*
398 : * Unlink each anon_vma chained to the VMA. This list is ordered
399 : * from newest to oldest, ensuring the root anon_vma gets freed last.
400 : */
401 0 : list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
402 0 : struct anon_vma *anon_vma = avc->anon_vma;
403 :
404 0 : root = lock_anon_vma_root(root, anon_vma);
405 0 : anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
406 :
407 : /*
408 : * Leave empty anon_vmas on the list - we'll need
409 : * to free them outside the lock.
410 : */
411 0 : if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
412 0 : anon_vma->parent->degree--;
413 0 : continue;
414 : }
415 :
416 0 : list_del(&avc->same_vma);
417 : anon_vma_chain_free(avc);
418 : }
419 0 : if (vma->anon_vma) {
420 0 : vma->anon_vma->degree--;
421 :
422 : /*
423 : * vma would still be needed after unlink, and anon_vma will be prepared
424 : * when handle fault.
425 : */
426 0 : vma->anon_vma = NULL;
427 : }
428 0 : unlock_anon_vma_root(root);
429 :
430 : /*
431 : * Iterate the list once more, it now only contains empty and unlinked
432 : * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
433 : * needing to write-acquire the anon_vma->root->rwsem.
434 : */
435 0 : list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
436 0 : struct anon_vma *anon_vma = avc->anon_vma;
437 :
438 : VM_WARN_ON(anon_vma->degree);
439 0 : put_anon_vma(anon_vma);
440 :
441 0 : list_del(&avc->same_vma);
442 0 : anon_vma_chain_free(avc);
443 : }
444 0 : }
445 :
446 0 : static void anon_vma_ctor(void *data)
447 : {
448 0 : struct anon_vma *anon_vma = data;
449 :
450 0 : init_rwsem(&anon_vma->rwsem);
451 0 : atomic_set(&anon_vma->refcount, 0);
452 0 : anon_vma->rb_root = RB_ROOT_CACHED;
453 0 : }
454 :
455 1 : void __init anon_vma_init(void)
456 : {
457 1 : anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
458 : 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
459 : anon_vma_ctor);
460 1 : anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
461 : SLAB_PANIC|SLAB_ACCOUNT);
462 1 : }
463 :
464 : /*
465 : * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
466 : *
467 : * Since there is no serialization what so ever against page_remove_rmap()
468 : * the best this function can do is return a refcount increased anon_vma
469 : * that might have been relevant to this page.
470 : *
471 : * The page might have been remapped to a different anon_vma or the anon_vma
472 : * returned may already be freed (and even reused).
473 : *
474 : * In case it was remapped to a different anon_vma, the new anon_vma will be a
475 : * child of the old anon_vma, and the anon_vma lifetime rules will therefore
476 : * ensure that any anon_vma obtained from the page will still be valid for as
477 : * long as we observe page_mapped() [ hence all those page_mapped() tests ].
478 : *
479 : * All users of this function must be very careful when walking the anon_vma
480 : * chain and verify that the page in question is indeed mapped in it
481 : * [ something equivalent to page_mapped_in_vma() ].
482 : *
483 : * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
484 : * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
485 : * if there is a mapcount, we can dereference the anon_vma after observing
486 : * those.
487 : */
488 0 : struct anon_vma *page_get_anon_vma(struct page *page)
489 : {
490 0 : struct anon_vma *anon_vma = NULL;
491 : unsigned long anon_mapping;
492 :
493 : rcu_read_lock();
494 0 : anon_mapping = (unsigned long)READ_ONCE(page->mapping);
495 0 : if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
496 : goto out;
497 0 : if (!page_mapped(page))
498 : goto out;
499 :
500 0 : anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
501 0 : if (!atomic_inc_not_zero(&anon_vma->refcount)) {
502 : anon_vma = NULL;
503 : goto out;
504 : }
505 :
506 : /*
507 : * If this page is still mapped, then its anon_vma cannot have been
508 : * freed. But if it has been unmapped, we have no security against the
509 : * anon_vma structure being freed and reused (for another anon_vma:
510 : * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero()
511 : * above cannot corrupt).
512 : */
513 0 : if (!page_mapped(page)) {
514 0 : rcu_read_unlock();
515 : put_anon_vma(anon_vma);
516 : return NULL;
517 : }
518 : out:
519 : rcu_read_unlock();
520 :
521 0 : return anon_vma;
522 : }
523 :
524 : /*
525 : * Similar to page_get_anon_vma() except it locks the anon_vma.
526 : *
527 : * Its a little more complex as it tries to keep the fast path to a single
528 : * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
529 : * reference like with page_get_anon_vma() and then block on the mutex.
530 : */
531 0 : struct anon_vma *folio_lock_anon_vma_read(struct folio *folio)
532 : {
533 0 : struct anon_vma *anon_vma = NULL;
534 : struct anon_vma *root_anon_vma;
535 : unsigned long anon_mapping;
536 :
537 : rcu_read_lock();
538 0 : anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
539 0 : if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
540 : goto out;
541 0 : if (!folio_mapped(folio))
542 : goto out;
543 :
544 0 : anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
545 0 : root_anon_vma = READ_ONCE(anon_vma->root);
546 0 : if (down_read_trylock(&root_anon_vma->rwsem)) {
547 : /*
548 : * If the folio is still mapped, then this anon_vma is still
549 : * its anon_vma, and holding the mutex ensures that it will
550 : * not go away, see anon_vma_free().
551 : */
552 0 : if (!folio_mapped(folio)) {
553 0 : up_read(&root_anon_vma->rwsem);
554 0 : anon_vma = NULL;
555 : }
556 : goto out;
557 : }
558 :
559 : /* trylock failed, we got to sleep */
560 0 : if (!atomic_inc_not_zero(&anon_vma->refcount)) {
561 : anon_vma = NULL;
562 : goto out;
563 : }
564 :
565 0 : if (!folio_mapped(folio)) {
566 0 : rcu_read_unlock();
567 : put_anon_vma(anon_vma);
568 : return NULL;
569 : }
570 :
571 : /* we pinned the anon_vma, its safe to sleep */
572 : rcu_read_unlock();
573 0 : anon_vma_lock_read(anon_vma);
574 :
575 0 : if (atomic_dec_and_test(&anon_vma->refcount)) {
576 : /*
577 : * Oops, we held the last refcount, release the lock
578 : * and bail -- can't simply use put_anon_vma() because
579 : * we'll deadlock on the anon_vma_lock_write() recursion.
580 : */
581 0 : anon_vma_unlock_read(anon_vma);
582 0 : __put_anon_vma(anon_vma);
583 0 : anon_vma = NULL;
584 : }
585 :
586 : return anon_vma;
587 :
588 : out:
589 : rcu_read_unlock();
590 0 : return anon_vma;
591 : }
592 :
593 0 : void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
594 : {
595 0 : anon_vma_unlock_read(anon_vma);
596 0 : }
597 :
598 : #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
599 : /*
600 : * Flush TLB entries for recently unmapped pages from remote CPUs. It is
601 : * important if a PTE was dirty when it was unmapped that it's flushed
602 : * before any IO is initiated on the page to prevent lost writes. Similarly,
603 : * it must be flushed before freeing to prevent data leakage.
604 : */
605 : void try_to_unmap_flush(void)
606 : {
607 : struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
608 :
609 : if (!tlb_ubc->flush_required)
610 : return;
611 :
612 : arch_tlbbatch_flush(&tlb_ubc->arch);
613 : tlb_ubc->flush_required = false;
614 : tlb_ubc->writable = false;
615 : }
616 :
617 : /* Flush iff there are potentially writable TLB entries that can race with IO */
618 : void try_to_unmap_flush_dirty(void)
619 : {
620 : struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
621 :
622 : if (tlb_ubc->writable)
623 : try_to_unmap_flush();
624 : }
625 :
626 : /*
627 : * Bits 0-14 of mm->tlb_flush_batched record pending generations.
628 : * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations.
629 : */
630 : #define TLB_FLUSH_BATCH_FLUSHED_SHIFT 16
631 : #define TLB_FLUSH_BATCH_PENDING_MASK \
632 : ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
633 : #define TLB_FLUSH_BATCH_PENDING_LARGE \
634 : (TLB_FLUSH_BATCH_PENDING_MASK / 2)
635 :
636 : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
637 : {
638 : struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
639 : int batch, nbatch;
640 :
641 : arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
642 : tlb_ubc->flush_required = true;
643 :
644 : /*
645 : * Ensure compiler does not re-order the setting of tlb_flush_batched
646 : * before the PTE is cleared.
647 : */
648 : barrier();
649 : batch = atomic_read(&mm->tlb_flush_batched);
650 : retry:
651 : if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
652 : /*
653 : * Prevent `pending' from catching up with `flushed' because of
654 : * overflow. Reset `pending' and `flushed' to be 1 and 0 if
655 : * `pending' becomes large.
656 : */
657 : nbatch = atomic_cmpxchg(&mm->tlb_flush_batched, batch, 1);
658 : if (nbatch != batch) {
659 : batch = nbatch;
660 : goto retry;
661 : }
662 : } else {
663 : atomic_inc(&mm->tlb_flush_batched);
664 : }
665 :
666 : /*
667 : * If the PTE was dirty then it's best to assume it's writable. The
668 : * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
669 : * before the page is queued for IO.
670 : */
671 : if (writable)
672 : tlb_ubc->writable = true;
673 : }
674 :
675 : /*
676 : * Returns true if the TLB flush should be deferred to the end of a batch of
677 : * unmap operations to reduce IPIs.
678 : */
679 : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
680 : {
681 : bool should_defer = false;
682 :
683 : if (!(flags & TTU_BATCH_FLUSH))
684 : return false;
685 :
686 : /* If remote CPUs need to be flushed then defer batch the flush */
687 : if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
688 : should_defer = true;
689 : put_cpu();
690 :
691 : return should_defer;
692 : }
693 :
694 : /*
695 : * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
696 : * releasing the PTL if TLB flushes are batched. It's possible for a parallel
697 : * operation such as mprotect or munmap to race between reclaim unmapping
698 : * the page and flushing the page. If this race occurs, it potentially allows
699 : * access to data via a stale TLB entry. Tracking all mm's that have TLB
700 : * batching in flight would be expensive during reclaim so instead track
701 : * whether TLB batching occurred in the past and if so then do a flush here
702 : * if required. This will cost one additional flush per reclaim cycle paid
703 : * by the first operation at risk such as mprotect and mumap.
704 : *
705 : * This must be called under the PTL so that an access to tlb_flush_batched
706 : * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
707 : * via the PTL.
708 : */
709 : void flush_tlb_batched_pending(struct mm_struct *mm)
710 : {
711 : int batch = atomic_read(&mm->tlb_flush_batched);
712 : int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK;
713 : int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;
714 :
715 : if (pending != flushed) {
716 : flush_tlb_mm(mm);
717 : /*
718 : * If the new TLB flushing is pending during flushing, leave
719 : * mm->tlb_flush_batched as is, to avoid losing flushing.
720 : */
721 : atomic_cmpxchg(&mm->tlb_flush_batched, batch,
722 : pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));
723 : }
724 : }
725 : #else
726 : static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
727 : {
728 : }
729 :
730 : static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
731 : {
732 : return false;
733 : }
734 : #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
735 :
736 : /*
737 : * At what user virtual address is page expected in vma?
738 : * Caller should check the page is actually part of the vma.
739 : */
740 0 : unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
741 : {
742 0 : struct folio *folio = page_folio(page);
743 0 : if (folio_test_anon(folio)) {
744 0 : struct anon_vma *page__anon_vma = folio_anon_vma(folio);
745 : /*
746 : * Note: swapoff's unuse_vma() is more efficient with this
747 : * check, and needs it to match anon_vma when KSM is active.
748 : */
749 0 : if (!vma->anon_vma || !page__anon_vma ||
750 0 : vma->anon_vma->root != page__anon_vma->root)
751 : return -EFAULT;
752 0 : } else if (!vma->vm_file) {
753 : return -EFAULT;
754 0 : } else if (vma->vm_file->f_mapping != folio->mapping) {
755 : return -EFAULT;
756 : }
757 :
758 0 : return vma_address(page, vma);
759 : }
760 :
761 0 : pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
762 : {
763 : pgd_t *pgd;
764 : p4d_t *p4d;
765 : pud_t *pud;
766 0 : pmd_t *pmd = NULL;
767 : pmd_t pmde;
768 :
769 0 : pgd = pgd_offset(mm, address);
770 : if (!pgd_present(*pgd))
771 : goto out;
772 :
773 0 : p4d = p4d_offset(pgd, address);
774 : if (!p4d_present(*p4d))
775 : goto out;
776 :
777 0 : pud = pud_offset(p4d, address);
778 0 : if (!pud_present(*pud))
779 : goto out;
780 :
781 0 : pmd = pmd_offset(pud, address);
782 : /*
783 : * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at()
784 : * without holding anon_vma lock for write. So when looking for a
785 : * genuine pmde (in which to find pte), test present and !THP together.
786 : */
787 0 : pmde = *pmd;
788 0 : barrier();
789 0 : if (!pmd_present(pmde) || pmd_trans_huge(pmde))
790 : pmd = NULL;
791 : out:
792 0 : return pmd;
793 : }
794 :
795 : struct folio_referenced_arg {
796 : int mapcount;
797 : int referenced;
798 : unsigned long vm_flags;
799 : struct mem_cgroup *memcg;
800 : };
801 : /*
802 : * arg: folio_referenced_arg will be passed
803 : */
804 0 : static bool folio_referenced_one(struct folio *folio,
805 : struct vm_area_struct *vma, unsigned long address, void *arg)
806 : {
807 0 : struct folio_referenced_arg *pra = arg;
808 0 : DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
809 0 : int referenced = 0;
810 :
811 0 : while (page_vma_mapped_walk(&pvmw)) {
812 0 : address = pvmw.address;
813 :
814 0 : if ((vma->vm_flags & VM_LOCKED) &&
815 0 : (!folio_test_large(folio) || !pvmw.pte)) {
816 : /* Restore the mlock which got missed */
817 0 : mlock_vma_folio(folio, vma, !pvmw.pte);
818 0 : page_vma_mapped_walk_done(&pvmw);
819 0 : pra->vm_flags |= VM_LOCKED;
820 0 : return false; /* To break the loop */
821 : }
822 :
823 0 : if (pvmw.pte) {
824 0 : if (ptep_clear_flush_young_notify(vma, address,
825 : pvmw.pte)) {
826 : /*
827 : * Don't treat a reference through
828 : * a sequentially read mapping as such.
829 : * If the folio has been used in another mapping,
830 : * we will catch it; if this other mapping is
831 : * already gone, the unmap path will have set
832 : * the referenced flag or activated the folio.
833 : */
834 0 : if (likely(!(vma->vm_flags & VM_SEQ_READ)))
835 0 : referenced++;
836 : }
837 : } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
838 : if (pmdp_clear_flush_young_notify(vma, address,
839 : pvmw.pmd))
840 : referenced++;
841 : } else {
842 : /* unexpected pmd-mapped folio? */
843 0 : WARN_ON_ONCE(1);
844 : }
845 :
846 0 : pra->mapcount--;
847 : }
848 :
849 : if (referenced)
850 : folio_clear_idle(folio);
851 0 : if (folio_test_clear_young(folio))
852 : referenced++;
853 :
854 0 : if (referenced) {
855 0 : pra->referenced++;
856 0 : pra->vm_flags |= vma->vm_flags & ~VM_LOCKED;
857 : }
858 :
859 0 : if (!pra->mapcount)
860 : return false; /* To break the loop */
861 :
862 0 : return true;
863 : }
864 :
865 0 : static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
866 : {
867 0 : struct folio_referenced_arg *pra = arg;
868 0 : struct mem_cgroup *memcg = pra->memcg;
869 :
870 0 : if (!mm_match_cgroup(vma->vm_mm, memcg))
871 : return true;
872 :
873 : return false;
874 : }
875 :
876 : /**
877 : * folio_referenced() - Test if the folio was referenced.
878 : * @folio: The folio to test.
879 : * @is_locked: Caller holds lock on the folio.
880 : * @memcg: target memory cgroup
881 : * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
882 : *
883 : * Quick test_and_clear_referenced for all mappings of a folio,
884 : *
885 : * Return: The number of mappings which referenced the folio.
886 : */
887 0 : int folio_referenced(struct folio *folio, int is_locked,
888 : struct mem_cgroup *memcg, unsigned long *vm_flags)
889 : {
890 0 : int we_locked = 0;
891 0 : struct folio_referenced_arg pra = {
892 0 : .mapcount = folio_mapcount(folio),
893 : .memcg = memcg,
894 : };
895 0 : struct rmap_walk_control rwc = {
896 : .rmap_one = folio_referenced_one,
897 : .arg = (void *)&pra,
898 : .anon_lock = folio_lock_anon_vma_read,
899 : };
900 :
901 0 : *vm_flags = 0;
902 0 : if (!pra.mapcount)
903 : return 0;
904 :
905 0 : if (!folio_raw_mapping(folio))
906 : return 0;
907 :
908 0 : if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) {
909 0 : we_locked = folio_trylock(folio);
910 0 : if (!we_locked)
911 : return 1;
912 : }
913 :
914 : /*
915 : * If we are reclaiming on behalf of a cgroup, skip
916 : * counting on behalf of references from different
917 : * cgroups
918 : */
919 0 : if (memcg) {
920 0 : rwc.invalid_vma = invalid_folio_referenced_vma;
921 : }
922 :
923 0 : rmap_walk(folio, &rwc);
924 0 : *vm_flags = pra.vm_flags;
925 :
926 0 : if (we_locked)
927 0 : folio_unlock(folio);
928 :
929 0 : return pra.referenced;
930 : }
931 :
932 0 : static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,
933 : unsigned long address, void *arg)
934 : {
935 0 : DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);
936 : struct mmu_notifier_range range;
937 0 : int *cleaned = arg;
938 :
939 : /*
940 : * We have to assume the worse case ie pmd for invalidation. Note that
941 : * the folio can not be freed from this function.
942 : */
943 : mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
944 : 0, vma, vma->vm_mm, address,
945 : vma_address_end(&pvmw));
946 : mmu_notifier_invalidate_range_start(&range);
947 :
948 0 : while (page_vma_mapped_walk(&pvmw)) {
949 0 : int ret = 0;
950 :
951 0 : address = pvmw.address;
952 0 : if (pvmw.pte) {
953 : pte_t entry;
954 0 : pte_t *pte = pvmw.pte;
955 :
956 0 : if (!pte_dirty(*pte) && !pte_write(*pte))
957 0 : continue;
958 :
959 0 : flush_cache_page(vma, address, pte_pfn(*pte));
960 0 : entry = ptep_clear_flush(vma, address, pte);
961 0 : entry = pte_wrprotect(entry);
962 0 : entry = pte_mkclean(entry);
963 0 : set_pte_at(vma->vm_mm, address, pte, entry);
964 : ret = 1;
965 : } else {
966 : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
967 : pmd_t *pmd = pvmw.pmd;
968 : pmd_t entry;
969 :
970 : if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
971 : continue;
972 :
973 : flush_cache_page(vma, address, folio_pfn(folio));
974 : entry = pmdp_invalidate(vma, address, pmd);
975 : entry = pmd_wrprotect(entry);
976 : entry = pmd_mkclean(entry);
977 : set_pmd_at(vma->vm_mm, address, pmd, entry);
978 : ret = 1;
979 : #else
980 : /* unexpected pmd-mapped folio? */
981 0 : WARN_ON_ONCE(1);
982 : #endif
983 : }
984 :
985 : /*
986 : * No need to call mmu_notifier_invalidate_range() as we are
987 : * downgrading page table protection not changing it to point
988 : * to a new page.
989 : *
990 : * See Documentation/vm/mmu_notifier.rst
991 : */
992 0 : if (ret)
993 0 : (*cleaned)++;
994 : }
995 :
996 0 : mmu_notifier_invalidate_range_end(&range);
997 :
998 0 : return true;
999 : }
1000 :
1001 0 : static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
1002 : {
1003 0 : if (vma->vm_flags & VM_SHARED)
1004 : return false;
1005 :
1006 0 : return true;
1007 : }
1008 :
1009 0 : int folio_mkclean(struct folio *folio)
1010 : {
1011 0 : int cleaned = 0;
1012 : struct address_space *mapping;
1013 0 : struct rmap_walk_control rwc = {
1014 : .arg = (void *)&cleaned,
1015 : .rmap_one = page_mkclean_one,
1016 : .invalid_vma = invalid_mkclean_vma,
1017 : };
1018 :
1019 0 : BUG_ON(!folio_test_locked(folio));
1020 :
1021 0 : if (!folio_mapped(folio))
1022 : return 0;
1023 :
1024 0 : mapping = folio_mapping(folio);
1025 0 : if (!mapping)
1026 : return 0;
1027 :
1028 0 : rmap_walk(folio, &rwc);
1029 :
1030 0 : return cleaned;
1031 : }
1032 : EXPORT_SYMBOL_GPL(folio_mkclean);
1033 :
1034 : /**
1035 : * page_move_anon_rmap - move a page to our anon_vma
1036 : * @page: the page to move to our anon_vma
1037 : * @vma: the vma the page belongs to
1038 : *
1039 : * When a page belongs exclusively to one process after a COW event,
1040 : * that page can be moved into the anon_vma that belongs to just that
1041 : * process, so the rmap code will not search the parent or sibling
1042 : * processes.
1043 : */
1044 0 : void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
1045 : {
1046 0 : struct anon_vma *anon_vma = vma->anon_vma;
1047 :
1048 0 : page = compound_head(page);
1049 :
1050 : VM_BUG_ON_PAGE(!PageLocked(page), page);
1051 : VM_BUG_ON_VMA(!anon_vma, vma);
1052 :
1053 0 : anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1054 : /*
1055 : * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written
1056 : * simultaneously, so a concurrent reader (eg folio_referenced()'s
1057 : * folio_test_anon()) will not see one without the other.
1058 : */
1059 0 : WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
1060 0 : }
1061 :
1062 : /**
1063 : * __page_set_anon_rmap - set up new anonymous rmap
1064 : * @page: Page or Hugepage to add to rmap
1065 : * @vma: VM area to add page to.
1066 : * @address: User virtual address of the mapping
1067 : * @exclusive: the page is exclusively owned by the current process
1068 : */
1069 0 : static void __page_set_anon_rmap(struct page *page,
1070 : struct vm_area_struct *vma, unsigned long address, int exclusive)
1071 : {
1072 0 : struct anon_vma *anon_vma = vma->anon_vma;
1073 :
1074 0 : BUG_ON(!anon_vma);
1075 :
1076 0 : if (PageAnon(page))
1077 : return;
1078 :
1079 : /*
1080 : * If the page isn't exclusively mapped into this vma,
1081 : * we must use the _oldest_ possible anon_vma for the
1082 : * page mapping!
1083 : */
1084 0 : if (!exclusive)
1085 0 : anon_vma = anon_vma->root;
1086 :
1087 : /*
1088 : * page_idle does a lockless/optimistic rmap scan on page->mapping.
1089 : * Make sure the compiler doesn't split the stores of anon_vma and
1090 : * the PAGE_MAPPING_ANON type identifier, otherwise the rmap code
1091 : * could mistake the mapping for a struct address_space and crash.
1092 : */
1093 0 : anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1094 0 : WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
1095 0 : page->index = linear_page_index(vma, address);
1096 : }
1097 :
1098 : /**
1099 : * __page_check_anon_rmap - sanity check anonymous rmap addition
1100 : * @page: the page to add the mapping to
1101 : * @vma: the vm area in which the mapping is added
1102 : * @address: the user virtual address mapped
1103 : */
1104 : static void __page_check_anon_rmap(struct page *page,
1105 : struct vm_area_struct *vma, unsigned long address)
1106 : {
1107 0 : struct folio *folio = page_folio(page);
1108 : /*
1109 : * The page's anon-rmap details (mapping and index) are guaranteed to
1110 : * be set up correctly at this point.
1111 : *
1112 : * We have exclusion against page_add_anon_rmap because the caller
1113 : * always holds the page locked.
1114 : *
1115 : * We have exclusion against page_add_new_anon_rmap because those pages
1116 : * are initially only visible via the pagetables, and the pte is locked
1117 : * over the call to page_add_new_anon_rmap.
1118 : */
1119 : VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
1120 : folio);
1121 : VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
1122 : page);
1123 : }
1124 :
1125 : /**
1126 : * page_add_anon_rmap - add pte mapping to an anonymous page
1127 : * @page: the page to add the mapping to
1128 : * @vma: the vm area in which the mapping is added
1129 : * @address: the user virtual address mapped
1130 : * @compound: charge the page as compound or small page
1131 : *
1132 : * The caller needs to hold the pte lock, and the page must be locked in
1133 : * the anon_vma case: to serialize mapping,index checking after setting,
1134 : * and to ensure that PageAnon is not being upgraded racily to PageKsm
1135 : * (but PageKsm is never downgraded to PageAnon).
1136 : */
1137 0 : void page_add_anon_rmap(struct page *page,
1138 : struct vm_area_struct *vma, unsigned long address, bool compound)
1139 : {
1140 0 : do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
1141 0 : }
1142 :
1143 : /*
1144 : * Special version of the above for do_swap_page, which often runs
1145 : * into pages that are exclusively owned by the current process.
1146 : * Everybody else should continue to use page_add_anon_rmap above.
1147 : */
1148 0 : void do_page_add_anon_rmap(struct page *page,
1149 : struct vm_area_struct *vma, unsigned long address, int flags)
1150 : {
1151 0 : bool compound = flags & RMAP_COMPOUND;
1152 : bool first;
1153 :
1154 0 : if (unlikely(PageKsm(page)))
1155 : lock_page_memcg(page);
1156 : else
1157 : VM_BUG_ON_PAGE(!PageLocked(page), page);
1158 :
1159 0 : if (compound) {
1160 : atomic_t *mapcount;
1161 : VM_BUG_ON_PAGE(!PageLocked(page), page);
1162 : VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1163 0 : mapcount = compound_mapcount_ptr(page);
1164 0 : first = atomic_inc_and_test(mapcount);
1165 : } else {
1166 0 : first = atomic_inc_and_test(&page->_mapcount);
1167 : }
1168 :
1169 0 : if (first) {
1170 0 : int nr = compound ? thp_nr_pages(page) : 1;
1171 : /*
1172 : * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1173 : * these counters are not modified in interrupt context, and
1174 : * pte lock(a spinlock) is held, which implies preemption
1175 : * disabled.
1176 : */
1177 0 : if (compound)
1178 0 : __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
1179 0 : __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1180 : }
1181 :
1182 0 : if (unlikely(PageKsm(page)))
1183 : unlock_page_memcg(page);
1184 :
1185 : /* address might be in next vma when migration races vma_adjust */
1186 0 : else if (first)
1187 0 : __page_set_anon_rmap(page, vma, address,
1188 : flags & RMAP_EXCLUSIVE);
1189 : else
1190 0 : __page_check_anon_rmap(page, vma, address);
1191 :
1192 0 : mlock_vma_page(page, vma, compound);
1193 0 : }
1194 :
1195 : /**
1196 : * page_add_new_anon_rmap - add pte mapping to a new anonymous page
1197 : * @page: the page to add the mapping to
1198 : * @vma: the vm area in which the mapping is added
1199 : * @address: the user virtual address mapped
1200 : * @compound: charge the page as compound or small page
1201 : *
1202 : * Same as page_add_anon_rmap but must only be called on *new* pages.
1203 : * This means the inc-and-test can be bypassed.
1204 : * Page does not have to be locked.
1205 : */
1206 0 : void page_add_new_anon_rmap(struct page *page,
1207 : struct vm_area_struct *vma, unsigned long address, bool compound)
1208 : {
1209 0 : int nr = compound ? thp_nr_pages(page) : 1;
1210 :
1211 : VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
1212 0 : __SetPageSwapBacked(page);
1213 0 : if (compound) {
1214 : VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1215 : /* increment count (starts at -1) */
1216 0 : atomic_set(compound_mapcount_ptr(page), 0);
1217 0 : atomic_set(compound_pincount_ptr(page), 0);
1218 :
1219 0 : __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
1220 : } else {
1221 : /* Anon THP always mapped first with PMD */
1222 : VM_BUG_ON_PAGE(PageTransCompound(page), page);
1223 : /* increment count (starts at -1) */
1224 0 : atomic_set(&page->_mapcount, 0);
1225 : }
1226 0 : __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1227 0 : __page_set_anon_rmap(page, vma, address, 1);
1228 0 : }
1229 :
1230 : /**
1231 : * page_add_file_rmap - add pte mapping to a file page
1232 : * @page: the page to add the mapping to
1233 : * @vma: the vm area in which the mapping is added
1234 : * @compound: charge the page as compound or small page
1235 : *
1236 : * The caller needs to hold the pte lock.
1237 : */
1238 0 : void page_add_file_rmap(struct page *page,
1239 : struct vm_area_struct *vma, bool compound)
1240 : {
1241 0 : int i, nr = 0;
1242 :
1243 : VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
1244 0 : lock_page_memcg(page);
1245 : if (compound && PageTransHuge(page)) {
1246 : int nr_pages = thp_nr_pages(page);
1247 :
1248 : for (i = 0; i < nr_pages; i++) {
1249 : if (atomic_inc_and_test(&page[i]._mapcount))
1250 : nr++;
1251 : }
1252 : if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
1253 : goto out;
1254 :
1255 : /*
1256 : * It is racy to ClearPageDoubleMap in page_remove_file_rmap();
1257 : * but page lock is held by all page_add_file_rmap() compound
1258 : * callers, and SetPageDoubleMap below warns if !PageLocked:
1259 : * so here is a place that DoubleMap can be safely cleared.
1260 : */
1261 : VM_WARN_ON_ONCE(!PageLocked(page));
1262 : if (nr == nr_pages && PageDoubleMap(page))
1263 : ClearPageDoubleMap(page);
1264 :
1265 : if (PageSwapBacked(page))
1266 : __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
1267 : nr_pages);
1268 : else
1269 : __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
1270 : nr_pages);
1271 : } else {
1272 0 : if (PageTransCompound(page) && page_mapping(page)) {
1273 : VM_WARN_ON_ONCE(!PageLocked(page));
1274 : SetPageDoubleMap(compound_head(page));
1275 : }
1276 0 : if (atomic_inc_and_test(&page->_mapcount))
1277 0 : nr++;
1278 : }
1279 : out:
1280 0 : if (nr)
1281 0 : __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
1282 0 : unlock_page_memcg(page);
1283 :
1284 0 : mlock_vma_page(page, vma, compound);
1285 0 : }
1286 :
1287 : static void page_remove_file_rmap(struct page *page, bool compound)
1288 : {
1289 0 : int i, nr = 0;
1290 :
1291 : VM_BUG_ON_PAGE(compound && !PageHead(page), page);
1292 :
1293 : /* Hugepages are not counted in NR_FILE_MAPPED for now. */
1294 0 : if (unlikely(PageHuge(page))) {
1295 : /* hugetlb pages are always mapped with pmds */
1296 : atomic_dec(compound_mapcount_ptr(page));
1297 : return;
1298 : }
1299 :
1300 : /* page still mapped by someone else? */
1301 : if (compound && PageTransHuge(page)) {
1302 : int nr_pages = thp_nr_pages(page);
1303 :
1304 : for (i = 0; i < nr_pages; i++) {
1305 : if (atomic_add_negative(-1, &page[i]._mapcount))
1306 : nr++;
1307 : }
1308 : if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1309 : goto out;
1310 : if (PageSwapBacked(page))
1311 : __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
1312 : -nr_pages);
1313 : else
1314 : __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
1315 : -nr_pages);
1316 : } else {
1317 0 : if (atomic_add_negative(-1, &page->_mapcount))
1318 0 : nr++;
1319 : }
1320 : out:
1321 0 : if (nr)
1322 0 : __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
1323 : }
1324 :
1325 : static void page_remove_anon_compound_rmap(struct page *page)
1326 : {
1327 : int i, nr;
1328 :
1329 0 : if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1330 : return;
1331 :
1332 : /* Hugepages are not counted in NR_ANON_PAGES for now. */
1333 : if (unlikely(PageHuge(page)))
1334 : return;
1335 :
1336 : if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1337 : return;
1338 :
1339 : __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
1340 :
1341 : if (TestClearPageDoubleMap(page)) {
1342 : /*
1343 : * Subpages can be mapped with PTEs too. Check how many of
1344 : * them are still mapped.
1345 : */
1346 : for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
1347 : if (atomic_add_negative(-1, &page[i]._mapcount))
1348 : nr++;
1349 : }
1350 :
1351 : /*
1352 : * Queue the page for deferred split if at least one small
1353 : * page of the compound page is unmapped, but at least one
1354 : * small page is still mapped.
1355 : */
1356 : if (nr && nr < thp_nr_pages(page))
1357 : deferred_split_huge_page(page);
1358 : } else {
1359 : nr = thp_nr_pages(page);
1360 : }
1361 :
1362 : if (nr)
1363 : __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
1364 : }
1365 :
1366 : /**
1367 : * page_remove_rmap - take down pte mapping from a page
1368 : * @page: page to remove mapping from
1369 : * @vma: the vm area from which the mapping is removed
1370 : * @compound: uncharge the page as compound or small page
1371 : *
1372 : * The caller needs to hold the pte lock.
1373 : */
1374 0 : void page_remove_rmap(struct page *page,
1375 : struct vm_area_struct *vma, bool compound)
1376 : {
1377 0 : lock_page_memcg(page);
1378 :
1379 0 : if (!PageAnon(page)) {
1380 0 : page_remove_file_rmap(page, compound);
1381 : goto out;
1382 : }
1383 :
1384 0 : if (compound) {
1385 : page_remove_anon_compound_rmap(page);
1386 : goto out;
1387 : }
1388 :
1389 : /* page still mapped by someone else? */
1390 0 : if (!atomic_add_negative(-1, &page->_mapcount))
1391 : goto out;
1392 :
1393 : /*
1394 : * We use the irq-unsafe __{inc|mod}_zone_page_stat because
1395 : * these counters are not modified in interrupt context, and
1396 : * pte lock(a spinlock) is held, which implies preemption disabled.
1397 : */
1398 0 : __dec_lruvec_page_state(page, NR_ANON_MAPPED);
1399 :
1400 0 : if (PageTransCompound(page))
1401 : deferred_split_huge_page(compound_head(page));
1402 :
1403 : /*
1404 : * It would be tidy to reset the PageAnon mapping here,
1405 : * but that might overwrite a racing page_add_anon_rmap
1406 : * which increments mapcount after us but sets mapping
1407 : * before us: so leave the reset to free_unref_page,
1408 : * and remember that it's only reliable while mapped.
1409 : * Leaving it set also helps swapoff to reinstate ptes
1410 : * faster for those pages still in swapcache.
1411 : */
1412 : out:
1413 0 : unlock_page_memcg(page);
1414 :
1415 0 : munlock_vma_page(page, vma, compound);
1416 0 : }
1417 :
1418 : /*
1419 : * @arg: enum ttu_flags will be passed to this argument
1420 : */
1421 0 : static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
1422 : unsigned long address, void *arg)
1423 : {
1424 0 : struct mm_struct *mm = vma->vm_mm;
1425 0 : DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
1426 : pte_t pteval;
1427 : struct page *subpage;
1428 0 : bool ret = true;
1429 : struct mmu_notifier_range range;
1430 0 : enum ttu_flags flags = (enum ttu_flags)(long)arg;
1431 :
1432 : /*
1433 : * When racing against e.g. zap_pte_range() on another cpu,
1434 : * in between its ptep_get_and_clear_full() and page_remove_rmap(),
1435 : * try_to_unmap() may return before page_mapped() has become false,
1436 : * if page table locking is skipped: use TTU_SYNC to wait for that.
1437 : */
1438 0 : if (flags & TTU_SYNC)
1439 0 : pvmw.flags = PVMW_SYNC;
1440 :
1441 : if (flags & TTU_SPLIT_HUGE_PMD)
1442 : split_huge_pmd_address(vma, address, false, folio);
1443 :
1444 : /*
1445 : * For THP, we have to assume the worse case ie pmd for invalidation.
1446 : * For hugetlb, it could be much worse if we need to do pud
1447 : * invalidation in the case of pmd sharing.
1448 : *
1449 : * Note that the folio can not be freed in this function as call of
1450 : * try_to_unmap() must hold a reference on the folio.
1451 : */
1452 : range.end = vma_address_end(&pvmw);
1453 : mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
1454 : address, range.end);
1455 : if (folio_test_hugetlb(folio)) {
1456 : /*
1457 : * If sharing is possible, start and end will be adjusted
1458 : * accordingly.
1459 : */
1460 : adjust_range_if_pmd_sharing_possible(vma, &range.start,
1461 : &range.end);
1462 : }
1463 : mmu_notifier_invalidate_range_start(&range);
1464 :
1465 0 : while (page_vma_mapped_walk(&pvmw)) {
1466 : /* Unexpected PMD-mapped THP? */
1467 : VM_BUG_ON_FOLIO(!pvmw.pte, folio);
1468 :
1469 : /*
1470 : * If the folio is in an mlock()d vma, we must not swap it out.
1471 : */
1472 0 : if (!(flags & TTU_IGNORE_MLOCK) &&
1473 0 : (vma->vm_flags & VM_LOCKED)) {
1474 : /* Restore the mlock which got missed */
1475 0 : mlock_vma_folio(folio, vma, false);
1476 0 : page_vma_mapped_walk_done(&pvmw);
1477 : ret = false;
1478 : break;
1479 : }
1480 :
1481 0 : subpage = folio_page(folio,
1482 : pte_pfn(*pvmw.pte) - folio_pfn(folio));
1483 0 : address = pvmw.address;
1484 :
1485 0 : if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
1486 : /*
1487 : * To call huge_pmd_unshare, i_mmap_rwsem must be
1488 : * held in write mode. Caller needs to explicitly
1489 : * do this outside rmap routines.
1490 : */
1491 : VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
1492 : if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1493 : /*
1494 : * huge_pmd_unshare unmapped an entire PMD
1495 : * page. There is no way of knowing exactly
1496 : * which PMDs may be cached for this mm, so
1497 : * we must flush them all. start/end were
1498 : * already adjusted above to cover this range.
1499 : */
1500 : flush_cache_range(vma, range.start, range.end);
1501 : flush_tlb_range(vma, range.start, range.end);
1502 : mmu_notifier_invalidate_range(mm, range.start,
1503 : range.end);
1504 :
1505 : /*
1506 : * The ref count of the PMD page was dropped
1507 : * which is part of the way map counting
1508 : * is done for shared PMDs. Return 'true'
1509 : * here. When there is no other sharing,
1510 : * huge_pmd_unshare returns false and we will
1511 : * unmap the actual page and drop map count
1512 : * to zero.
1513 : */
1514 : page_vma_mapped_walk_done(&pvmw);
1515 : break;
1516 : }
1517 : }
1518 :
1519 : /* Nuke the page table entry. */
1520 0 : flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
1521 0 : if (should_defer_flush(mm, flags)) {
1522 : /*
1523 : * We clear the PTE but do not flush so potentially
1524 : * a remote CPU could still be writing to the folio.
1525 : * If the entry was previously clean then the
1526 : * architecture must guarantee that a clear->dirty
1527 : * transition on a cached TLB entry is written through
1528 : * and traps if the PTE is unmapped.
1529 : */
1530 : pteval = ptep_get_and_clear(mm, address, pvmw.pte);
1531 :
1532 : set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
1533 : } else {
1534 0 : pteval = ptep_clear_flush(vma, address, pvmw.pte);
1535 : }
1536 :
1537 : /* Set the dirty flag on the folio now the pte is gone. */
1538 0 : if (pte_dirty(pteval))
1539 0 : folio_mark_dirty(folio);
1540 :
1541 : /* Update high watermark before we lower rss */
1542 0 : update_hiwater_rss(mm);
1543 :
1544 0 : if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
1545 : pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1546 : if (folio_test_hugetlb(folio)) {
1547 : hugetlb_count_sub(folio_nr_pages(folio), mm);
1548 : set_huge_swap_pte_at(mm, address,
1549 : pvmw.pte, pteval,
1550 : vma_mmu_pagesize(vma));
1551 : } else {
1552 : dec_mm_counter(mm, mm_counter(&folio->page));
1553 : set_pte_at(mm, address, pvmw.pte, pteval);
1554 : }
1555 :
1556 0 : } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
1557 : /*
1558 : * The guest indicated that the page content is of no
1559 : * interest anymore. Simply discard the pte, vmscan
1560 : * will take care of the rest.
1561 : * A future reference will then fault in a new zero
1562 : * page. When userfaultfd is active, we must not drop
1563 : * this page though, as its main user (postcopy
1564 : * migration) will not expect userfaults on already
1565 : * copied pages.
1566 : */
1567 : dec_mm_counter(mm, mm_counter(&folio->page));
1568 : /* We have to invalidate as we cleared the pte */
1569 : mmu_notifier_invalidate_range(mm, address,
1570 : address + PAGE_SIZE);
1571 0 : } else if (folio_test_anon(folio)) {
1572 0 : swp_entry_t entry = { .val = page_private(subpage) };
1573 : pte_t swp_pte;
1574 : /*
1575 : * Store the swap location in the pte.
1576 : * See handle_pte_fault() ...
1577 : */
1578 0 : if (unlikely(folio_test_swapbacked(folio) !=
1579 : folio_test_swapcache(folio))) {
1580 0 : WARN_ON_ONCE(1);
1581 0 : ret = false;
1582 : /* We have to invalidate as we cleared the pte */
1583 0 : mmu_notifier_invalidate_range(mm, address,
1584 : address + PAGE_SIZE);
1585 0 : page_vma_mapped_walk_done(&pvmw);
1586 : break;
1587 : }
1588 :
1589 : /* MADV_FREE page check */
1590 0 : if (!folio_test_swapbacked(folio)) {
1591 : int ref_count, map_count;
1592 :
1593 : /*
1594 : * Synchronize with gup_pte_range():
1595 : * - clear PTE; barrier; read refcount
1596 : * - inc refcount; barrier; read PTE
1597 : */
1598 0 : smp_mb();
1599 :
1600 0 : ref_count = folio_ref_count(folio);
1601 0 : map_count = folio_mapcount(folio);
1602 :
1603 : /*
1604 : * Order reads for page refcount and dirty flag
1605 : * (see comments in __remove_mapping()).
1606 : */
1607 0 : smp_rmb();
1608 :
1609 : /*
1610 : * The only page refs must be one from isolation
1611 : * plus the rmap(s) (dropped by discard:).
1612 : */
1613 0 : if (ref_count == 1 + map_count &&
1614 0 : !folio_test_dirty(folio)) {
1615 : /* Invalidate as we cleared the pte */
1616 0 : mmu_notifier_invalidate_range(mm,
1617 : address, address + PAGE_SIZE);
1618 0 : dec_mm_counter(mm, MM_ANONPAGES);
1619 0 : goto discard;
1620 : }
1621 :
1622 : /*
1623 : * If the folio was redirtied, it cannot be
1624 : * discarded. Remap the page to page table.
1625 : */
1626 0 : set_pte_at(mm, address, pvmw.pte, pteval);
1627 0 : folio_set_swapbacked(folio);
1628 0 : ret = false;
1629 0 : page_vma_mapped_walk_done(&pvmw);
1630 : break;
1631 : }
1632 :
1633 0 : if (swap_duplicate(entry) < 0) {
1634 0 : set_pte_at(mm, address, pvmw.pte, pteval);
1635 0 : ret = false;
1636 0 : page_vma_mapped_walk_done(&pvmw);
1637 : break;
1638 : }
1639 0 : if (arch_unmap_one(mm, vma, address, pteval) < 0) {
1640 : set_pte_at(mm, address, pvmw.pte, pteval);
1641 : ret = false;
1642 : page_vma_mapped_walk_done(&pvmw);
1643 : break;
1644 : }
1645 0 : if (list_empty(&mm->mmlist)) {
1646 0 : spin_lock(&mmlist_lock);
1647 0 : if (list_empty(&mm->mmlist))
1648 0 : list_add(&mm->mmlist, &init_mm.mmlist);
1649 : spin_unlock(&mmlist_lock);
1650 : }
1651 0 : dec_mm_counter(mm, MM_ANONPAGES);
1652 0 : inc_mm_counter(mm, MM_SWAPENTS);
1653 0 : swp_pte = swp_entry_to_pte(entry);
1654 0 : if (pte_soft_dirty(pteval))
1655 : swp_pte = pte_swp_mksoft_dirty(swp_pte);
1656 : if (pte_uffd_wp(pteval))
1657 : swp_pte = pte_swp_mkuffd_wp(swp_pte);
1658 0 : set_pte_at(mm, address, pvmw.pte, swp_pte);
1659 : /* Invalidate as we cleared the pte */
1660 0 : mmu_notifier_invalidate_range(mm, address,
1661 : address + PAGE_SIZE);
1662 : } else {
1663 : /*
1664 : * This is a locked file-backed folio,
1665 : * so it cannot be removed from the page
1666 : * cache and replaced by a new folio before
1667 : * mmu_notifier_invalidate_range_end, so no
1668 : * concurrent thread might update its page table
1669 : * to point at a new folio while a device is
1670 : * still using this folio.
1671 : *
1672 : * See Documentation/vm/mmu_notifier.rst
1673 : */
1674 0 : dec_mm_counter(mm, mm_counter_file(&folio->page));
1675 : }
1676 : discard:
1677 : /*
1678 : * No need to call mmu_notifier_invalidate_range() it has be
1679 : * done above for all cases requiring it to happen under page
1680 : * table lock before mmu_notifier_invalidate_range_end()
1681 : *
1682 : * See Documentation/vm/mmu_notifier.rst
1683 : */
1684 0 : page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
1685 0 : if (vma->vm_flags & VM_LOCKED)
1686 0 : mlock_page_drain_local();
1687 : folio_put(folio);
1688 : }
1689 :
1690 0 : mmu_notifier_invalidate_range_end(&range);
1691 :
1692 0 : return ret;
1693 : }
1694 :
1695 0 : static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
1696 : {
1697 0 : return vma_is_temporary_stack(vma);
1698 : }
1699 :
1700 0 : static int page_not_mapped(struct folio *folio)
1701 : {
1702 0 : return !folio_mapped(folio);
1703 : }
1704 :
1705 : /**
1706 : * try_to_unmap - Try to remove all page table mappings to a folio.
1707 : * @folio: The folio to unmap.
1708 : * @flags: action and flags
1709 : *
1710 : * Tries to remove all the page table entries which are mapping this
1711 : * folio. It is the caller's responsibility to check if the folio is
1712 : * still mapped if needed (use TTU_SYNC to prevent accounting races).
1713 : *
1714 : * Context: Caller must hold the folio lock.
1715 : */
1716 0 : void try_to_unmap(struct folio *folio, enum ttu_flags flags)
1717 : {
1718 0 : struct rmap_walk_control rwc = {
1719 : .rmap_one = try_to_unmap_one,
1720 0 : .arg = (void *)flags,
1721 : .done = page_not_mapped,
1722 : .anon_lock = folio_lock_anon_vma_read,
1723 : };
1724 :
1725 0 : if (flags & TTU_RMAP_LOCKED)
1726 0 : rmap_walk_locked(folio, &rwc);
1727 : else
1728 0 : rmap_walk(folio, &rwc);
1729 0 : }
1730 :
1731 : /*
1732 : * @arg: enum ttu_flags will be passed to this argument.
1733 : *
1734 : * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs
1735 : * containing migration entries.
1736 : */
1737 0 : static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
1738 : unsigned long address, void *arg)
1739 : {
1740 0 : struct mm_struct *mm = vma->vm_mm;
1741 0 : DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
1742 : pte_t pteval;
1743 : struct page *subpage;
1744 0 : bool ret = true;
1745 : struct mmu_notifier_range range;
1746 0 : enum ttu_flags flags = (enum ttu_flags)(long)arg;
1747 :
1748 : /*
1749 : * When racing against e.g. zap_pte_range() on another cpu,
1750 : * in between its ptep_get_and_clear_full() and page_remove_rmap(),
1751 : * try_to_migrate() may return before page_mapped() has become false,
1752 : * if page table locking is skipped: use TTU_SYNC to wait for that.
1753 : */
1754 0 : if (flags & TTU_SYNC)
1755 0 : pvmw.flags = PVMW_SYNC;
1756 :
1757 : /*
1758 : * unmap_page() in mm/huge_memory.c is the only user of migration with
1759 : * TTU_SPLIT_HUGE_PMD and it wants to freeze.
1760 : */
1761 : if (flags & TTU_SPLIT_HUGE_PMD)
1762 : split_huge_pmd_address(vma, address, true, folio);
1763 :
1764 : /*
1765 : * For THP, we have to assume the worse case ie pmd for invalidation.
1766 : * For hugetlb, it could be much worse if we need to do pud
1767 : * invalidation in the case of pmd sharing.
1768 : *
1769 : * Note that the page can not be free in this function as call of
1770 : * try_to_unmap() must hold a reference on the page.
1771 : */
1772 : range.end = vma_address_end(&pvmw);
1773 : mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
1774 : address, range.end);
1775 : if (folio_test_hugetlb(folio)) {
1776 : /*
1777 : * If sharing is possible, start and end will be adjusted
1778 : * accordingly.
1779 : */
1780 : adjust_range_if_pmd_sharing_possible(vma, &range.start,
1781 : &range.end);
1782 : }
1783 : mmu_notifier_invalidate_range_start(&range);
1784 :
1785 0 : while (page_vma_mapped_walk(&pvmw)) {
1786 : #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1787 : /* PMD-mapped THP migration entry */
1788 : if (!pvmw.pte) {
1789 : subpage = folio_page(folio,
1790 : pmd_pfn(*pvmw.pmd) - folio_pfn(folio));
1791 : VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
1792 : !folio_test_pmd_mappable(folio), folio);
1793 :
1794 : set_pmd_migration_entry(&pvmw, subpage);
1795 : continue;
1796 : }
1797 : #endif
1798 :
1799 : /* Unexpected PMD-mapped THP? */
1800 : VM_BUG_ON_FOLIO(!pvmw.pte, folio);
1801 :
1802 0 : subpage = folio_page(folio,
1803 : pte_pfn(*pvmw.pte) - folio_pfn(folio));
1804 0 : address = pvmw.address;
1805 :
1806 0 : if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
1807 : /*
1808 : * To call huge_pmd_unshare, i_mmap_rwsem must be
1809 : * held in write mode. Caller needs to explicitly
1810 : * do this outside rmap routines.
1811 : */
1812 : VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
1813 : if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1814 : /*
1815 : * huge_pmd_unshare unmapped an entire PMD
1816 : * page. There is no way of knowing exactly
1817 : * which PMDs may be cached for this mm, so
1818 : * we must flush them all. start/end were
1819 : * already adjusted above to cover this range.
1820 : */
1821 : flush_cache_range(vma, range.start, range.end);
1822 : flush_tlb_range(vma, range.start, range.end);
1823 : mmu_notifier_invalidate_range(mm, range.start,
1824 : range.end);
1825 :
1826 : /*
1827 : * The ref count of the PMD page was dropped
1828 : * which is part of the way map counting
1829 : * is done for shared PMDs. Return 'true'
1830 : * here. When there is no other sharing,
1831 : * huge_pmd_unshare returns false and we will
1832 : * unmap the actual page and drop map count
1833 : * to zero.
1834 : */
1835 : page_vma_mapped_walk_done(&pvmw);
1836 : break;
1837 : }
1838 : }
1839 :
1840 : /* Nuke the page table entry. */
1841 0 : flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
1842 0 : pteval = ptep_clear_flush(vma, address, pvmw.pte);
1843 :
1844 : /* Set the dirty flag on the folio now the pte is gone. */
1845 0 : if (pte_dirty(pteval))
1846 0 : folio_mark_dirty(folio);
1847 :
1848 : /* Update high watermark before we lower rss */
1849 0 : update_hiwater_rss(mm);
1850 :
1851 0 : if (folio_is_zone_device(folio)) {
1852 : unsigned long pfn = folio_pfn(folio);
1853 : swp_entry_t entry;
1854 : pte_t swp_pte;
1855 :
1856 : /*
1857 : * Store the pfn of the page in a special migration
1858 : * pte. do_swap_page() will wait until the migration
1859 : * pte is removed and then restart fault handling.
1860 : */
1861 : entry = pte_to_swp_entry(pteval);
1862 : if (is_writable_device_private_entry(entry))
1863 : entry = make_writable_migration_entry(pfn);
1864 : else
1865 : entry = make_readable_migration_entry(pfn);
1866 : swp_pte = swp_entry_to_pte(entry);
1867 :
1868 : /*
1869 : * pteval maps a zone device page and is therefore
1870 : * a swap pte.
1871 : */
1872 : if (pte_swp_soft_dirty(pteval))
1873 : swp_pte = pte_swp_mksoft_dirty(swp_pte);
1874 : if (pte_swp_uffd_wp(pteval))
1875 : swp_pte = pte_swp_mkuffd_wp(swp_pte);
1876 : set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1877 : trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
1878 : compound_order(&folio->page));
1879 : /*
1880 : * No need to invalidate here it will synchronize on
1881 : * against the special swap migration pte.
1882 : *
1883 : * The assignment to subpage above was computed from a
1884 : * swap PTE which results in an invalid pointer.
1885 : * Since only PAGE_SIZE pages can currently be
1886 : * migrated, just set it to page. This will need to be
1887 : * changed when hugepage migrations to device private
1888 : * memory are supported.
1889 : */
1890 : subpage = &folio->page;
1891 0 : } else if (PageHWPoison(subpage)) {
1892 : pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1893 : if (folio_test_hugetlb(folio)) {
1894 : hugetlb_count_sub(folio_nr_pages(folio), mm);
1895 : set_huge_swap_pte_at(mm, address,
1896 : pvmw.pte, pteval,
1897 : vma_mmu_pagesize(vma));
1898 : } else {
1899 : dec_mm_counter(mm, mm_counter(&folio->page));
1900 : set_pte_at(mm, address, pvmw.pte, pteval);
1901 : }
1902 :
1903 0 : } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
1904 : /*
1905 : * The guest indicated that the page content is of no
1906 : * interest anymore. Simply discard the pte, vmscan
1907 : * will take care of the rest.
1908 : * A future reference will then fault in a new zero
1909 : * page. When userfaultfd is active, we must not drop
1910 : * this page though, as its main user (postcopy
1911 : * migration) will not expect userfaults on already
1912 : * copied pages.
1913 : */
1914 : dec_mm_counter(mm, mm_counter(&folio->page));
1915 : /* We have to invalidate as we cleared the pte */
1916 : mmu_notifier_invalidate_range(mm, address,
1917 : address + PAGE_SIZE);
1918 : } else {
1919 : swp_entry_t entry;
1920 : pte_t swp_pte;
1921 :
1922 0 : if (arch_unmap_one(mm, vma, address, pteval) < 0) {
1923 : set_pte_at(mm, address, pvmw.pte, pteval);
1924 : ret = false;
1925 : page_vma_mapped_walk_done(&pvmw);
1926 : break;
1927 : }
1928 :
1929 : /*
1930 : * Store the pfn of the page in a special migration
1931 : * pte. do_swap_page() will wait until the migration
1932 : * pte is removed and then restart fault handling.
1933 : */
1934 0 : if (pte_write(pteval))
1935 0 : entry = make_writable_migration_entry(
1936 0 : page_to_pfn(subpage));
1937 : else
1938 0 : entry = make_readable_migration_entry(
1939 0 : page_to_pfn(subpage));
1940 :
1941 0 : swp_pte = swp_entry_to_pte(entry);
1942 0 : if (pte_soft_dirty(pteval))
1943 : swp_pte = pte_swp_mksoft_dirty(swp_pte);
1944 : if (pte_uffd_wp(pteval))
1945 : swp_pte = pte_swp_mkuffd_wp(swp_pte);
1946 0 : set_pte_at(mm, address, pvmw.pte, swp_pte);
1947 0 : trace_set_migration_pte(address, pte_val(swp_pte),
1948 0 : compound_order(&folio->page));
1949 : /*
1950 : * No need to invalidate here it will synchronize on
1951 : * against the special swap migration pte.
1952 : */
1953 : }
1954 :
1955 : /*
1956 : * No need to call mmu_notifier_invalidate_range() it has be
1957 : * done above for all cases requiring it to happen under page
1958 : * table lock before mmu_notifier_invalidate_range_end()
1959 : *
1960 : * See Documentation/vm/mmu_notifier.rst
1961 : */
1962 0 : page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
1963 0 : if (vma->vm_flags & VM_LOCKED)
1964 0 : mlock_page_drain_local();
1965 : folio_put(folio);
1966 : }
1967 :
1968 0 : mmu_notifier_invalidate_range_end(&range);
1969 :
1970 0 : return ret;
1971 : }
1972 :
1973 : /**
1974 : * try_to_migrate - try to replace all page table mappings with swap entries
1975 : * @folio: the folio to replace page table entries for
1976 : * @flags: action and flags
1977 : *
1978 : * Tries to remove all the page table entries which are mapping this folio and
1979 : * replace them with special swap entries. Caller must hold the folio lock.
1980 : */
1981 0 : void try_to_migrate(struct folio *folio, enum ttu_flags flags)
1982 : {
1983 0 : struct rmap_walk_control rwc = {
1984 : .rmap_one = try_to_migrate_one,
1985 0 : .arg = (void *)flags,
1986 : .done = page_not_mapped,
1987 : .anon_lock = folio_lock_anon_vma_read,
1988 : };
1989 :
1990 : /*
1991 : * Migration always ignores mlock and only supports TTU_RMAP_LOCKED and
1992 : * TTU_SPLIT_HUGE_PMD and TTU_SYNC flags.
1993 : */
1994 0 : if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
1995 : TTU_SYNC)))
1996 0 : return;
1997 :
1998 0 : if (folio_is_zone_device(folio) && !folio_is_device_private(folio))
1999 : return;
2000 :
2001 : /*
2002 : * During exec, a temporary VMA is setup and later moved.
2003 : * The VMA is moved under the anon_vma lock but not the
2004 : * page tables leading to a race where migration cannot
2005 : * find the migration ptes. Rather than increasing the
2006 : * locking requirements of exec(), migration skips
2007 : * temporary VMAs until after exec() completes.
2008 : */
2009 0 : if (!folio_test_ksm(folio) && folio_test_anon(folio))
2010 0 : rwc.invalid_vma = invalid_migration_vma;
2011 :
2012 0 : if (flags & TTU_RMAP_LOCKED)
2013 0 : rmap_walk_locked(folio, &rwc);
2014 : else
2015 0 : rmap_walk(folio, &rwc);
2016 : }
2017 :
2018 : #ifdef CONFIG_DEVICE_PRIVATE
2019 : struct make_exclusive_args {
2020 : struct mm_struct *mm;
2021 : unsigned long address;
2022 : void *owner;
2023 : bool valid;
2024 : };
2025 :
2026 : static bool page_make_device_exclusive_one(struct folio *folio,
2027 : struct vm_area_struct *vma, unsigned long address, void *priv)
2028 : {
2029 : struct mm_struct *mm = vma->vm_mm;
2030 : DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
2031 : struct make_exclusive_args *args = priv;
2032 : pte_t pteval;
2033 : struct page *subpage;
2034 : bool ret = true;
2035 : struct mmu_notifier_range range;
2036 : swp_entry_t entry;
2037 : pte_t swp_pte;
2038 :
2039 : mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
2040 : vma->vm_mm, address, min(vma->vm_end,
2041 : address + folio_size(folio)),
2042 : args->owner);
2043 : mmu_notifier_invalidate_range_start(&range);
2044 :
2045 : while (page_vma_mapped_walk(&pvmw)) {
2046 : /* Unexpected PMD-mapped THP? */
2047 : VM_BUG_ON_FOLIO(!pvmw.pte, folio);
2048 :
2049 : if (!pte_present(*pvmw.pte)) {
2050 : ret = false;
2051 : page_vma_mapped_walk_done(&pvmw);
2052 : break;
2053 : }
2054 :
2055 : subpage = folio_page(folio,
2056 : pte_pfn(*pvmw.pte) - folio_pfn(folio));
2057 : address = pvmw.address;
2058 :
2059 : /* Nuke the page table entry. */
2060 : flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
2061 : pteval = ptep_clear_flush(vma, address, pvmw.pte);
2062 :
2063 : /* Set the dirty flag on the folio now the pte is gone. */
2064 : if (pte_dirty(pteval))
2065 : folio_mark_dirty(folio);
2066 :
2067 : /*
2068 : * Check that our target page is still mapped at the expected
2069 : * address.
2070 : */
2071 : if (args->mm == mm && args->address == address &&
2072 : pte_write(pteval))
2073 : args->valid = true;
2074 :
2075 : /*
2076 : * Store the pfn of the page in a special migration
2077 : * pte. do_swap_page() will wait until the migration
2078 : * pte is removed and then restart fault handling.
2079 : */
2080 : if (pte_write(pteval))
2081 : entry = make_writable_device_exclusive_entry(
2082 : page_to_pfn(subpage));
2083 : else
2084 : entry = make_readable_device_exclusive_entry(
2085 : page_to_pfn(subpage));
2086 : swp_pte = swp_entry_to_pte(entry);
2087 : if (pte_soft_dirty(pteval))
2088 : swp_pte = pte_swp_mksoft_dirty(swp_pte);
2089 : if (pte_uffd_wp(pteval))
2090 : swp_pte = pte_swp_mkuffd_wp(swp_pte);
2091 :
2092 : set_pte_at(mm, address, pvmw.pte, swp_pte);
2093 :
2094 : /*
2095 : * There is a reference on the page for the swap entry which has
2096 : * been removed, so shouldn't take another.
2097 : */
2098 : page_remove_rmap(subpage, vma, false);
2099 : }
2100 :
2101 : mmu_notifier_invalidate_range_end(&range);
2102 :
2103 : return ret;
2104 : }
2105 :
2106 : /**
2107 : * folio_make_device_exclusive - Mark the folio exclusively owned by a device.
2108 : * @folio: The folio to replace page table entries for.
2109 : * @mm: The mm_struct where the folio is expected to be mapped.
2110 : * @address: Address where the folio is expected to be mapped.
2111 : * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks
2112 : *
2113 : * Tries to remove all the page table entries which are mapping this
2114 : * folio and replace them with special device exclusive swap entries to
2115 : * grant a device exclusive access to the folio.
2116 : *
2117 : * Context: Caller must hold the folio lock.
2118 : * Return: false if the page is still mapped, or if it could not be unmapped
2119 : * from the expected address. Otherwise returns true (success).
2120 : */
2121 : static bool folio_make_device_exclusive(struct folio *folio,
2122 : struct mm_struct *mm, unsigned long address, void *owner)
2123 : {
2124 : struct make_exclusive_args args = {
2125 : .mm = mm,
2126 : .address = address,
2127 : .owner = owner,
2128 : .valid = false,
2129 : };
2130 : struct rmap_walk_control rwc = {
2131 : .rmap_one = page_make_device_exclusive_one,
2132 : .done = page_not_mapped,
2133 : .anon_lock = folio_lock_anon_vma_read,
2134 : .arg = &args,
2135 : };
2136 :
2137 : /*
2138 : * Restrict to anonymous folios for now to avoid potential writeback
2139 : * issues.
2140 : */
2141 : if (!folio_test_anon(folio))
2142 : return false;
2143 :
2144 : rmap_walk(folio, &rwc);
2145 :
2146 : return args.valid && !folio_mapcount(folio);
2147 : }
2148 :
2149 : /**
2150 : * make_device_exclusive_range() - Mark a range for exclusive use by a device
2151 : * @mm: mm_struct of assoicated target process
2152 : * @start: start of the region to mark for exclusive device access
2153 : * @end: end address of region
2154 : * @pages: returns the pages which were successfully marked for exclusive access
2155 : * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier to allow filtering
2156 : *
2157 : * Returns: number of pages found in the range by GUP. A page is marked for
2158 : * exclusive access only if the page pointer is non-NULL.
2159 : *
2160 : * This function finds ptes mapping page(s) to the given address range, locks
2161 : * them and replaces mappings with special swap entries preventing userspace CPU
2162 : * access. On fault these entries are replaced with the original mapping after
2163 : * calling MMU notifiers.
2164 : *
2165 : * A driver using this to program access from a device must use a mmu notifier
2166 : * critical section to hold a device specific lock during programming. Once
2167 : * programming is complete it should drop the page lock and reference after
2168 : * which point CPU access to the page will revoke the exclusive access.
2169 : */
2170 : int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
2171 : unsigned long end, struct page **pages,
2172 : void *owner)
2173 : {
2174 : long npages = (end - start) >> PAGE_SHIFT;
2175 : long i;
2176 :
2177 : npages = get_user_pages_remote(mm, start, npages,
2178 : FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
2179 : pages, NULL, NULL);
2180 : if (npages < 0)
2181 : return npages;
2182 :
2183 : for (i = 0; i < npages; i++, start += PAGE_SIZE) {
2184 : struct folio *folio = page_folio(pages[i]);
2185 : if (PageTail(pages[i]) || !folio_trylock(folio)) {
2186 : folio_put(folio);
2187 : pages[i] = NULL;
2188 : continue;
2189 : }
2190 :
2191 : if (!folio_make_device_exclusive(folio, mm, start, owner)) {
2192 : folio_unlock(folio);
2193 : folio_put(folio);
2194 : pages[i] = NULL;
2195 : }
2196 : }
2197 :
2198 : return npages;
2199 : }
2200 : EXPORT_SYMBOL_GPL(make_device_exclusive_range);
2201 : #endif
2202 :
2203 0 : void __put_anon_vma(struct anon_vma *anon_vma)
2204 : {
2205 0 : struct anon_vma *root = anon_vma->root;
2206 :
2207 0 : anon_vma_free(anon_vma);
2208 0 : if (root != anon_vma && atomic_dec_and_test(&root->refcount))
2209 0 : anon_vma_free(root);
2210 0 : }
2211 :
2212 0 : static struct anon_vma *rmap_walk_anon_lock(struct folio *folio,
2213 : const struct rmap_walk_control *rwc)
2214 : {
2215 : struct anon_vma *anon_vma;
2216 :
2217 0 : if (rwc->anon_lock)
2218 0 : return rwc->anon_lock(folio);
2219 :
2220 : /*
2221 : * Note: remove_migration_ptes() cannot use folio_lock_anon_vma_read()
2222 : * because that depends on page_mapped(); but not all its usages
2223 : * are holding mmap_lock. Users without mmap_lock are required to
2224 : * take a reference count to prevent the anon_vma disappearing
2225 : */
2226 0 : anon_vma = folio_anon_vma(folio);
2227 0 : if (!anon_vma)
2228 : return NULL;
2229 :
2230 0 : anon_vma_lock_read(anon_vma);
2231 : return anon_vma;
2232 : }
2233 :
2234 : /*
2235 : * rmap_walk_anon - do something to anonymous page using the object-based
2236 : * rmap method
2237 : * @page: the page to be handled
2238 : * @rwc: control variable according to each walk type
2239 : *
2240 : * Find all the mappings of a page using the mapping pointer and the vma chains
2241 : * contained in the anon_vma struct it points to.
2242 : */
2243 0 : static void rmap_walk_anon(struct folio *folio,
2244 : const struct rmap_walk_control *rwc, bool locked)
2245 : {
2246 : struct anon_vma *anon_vma;
2247 : pgoff_t pgoff_start, pgoff_end;
2248 : struct anon_vma_chain *avc;
2249 :
2250 0 : if (locked) {
2251 0 : anon_vma = folio_anon_vma(folio);
2252 : /* anon_vma disappear under us? */
2253 : VM_BUG_ON_FOLIO(!anon_vma, folio);
2254 : } else {
2255 0 : anon_vma = rmap_walk_anon_lock(folio, rwc);
2256 : }
2257 0 : if (!anon_vma)
2258 : return;
2259 :
2260 0 : pgoff_start = folio_pgoff(folio);
2261 0 : pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
2262 0 : anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
2263 : pgoff_start, pgoff_end) {
2264 0 : struct vm_area_struct *vma = avc->vma;
2265 0 : unsigned long address = vma_address(&folio->page, vma);
2266 :
2267 : VM_BUG_ON_VMA(address == -EFAULT, vma);
2268 0 : cond_resched();
2269 :
2270 0 : if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2271 0 : continue;
2272 :
2273 0 : if (!rwc->rmap_one(folio, vma, address, rwc->arg))
2274 : break;
2275 0 : if (rwc->done && rwc->done(folio))
2276 : break;
2277 : }
2278 :
2279 0 : if (!locked)
2280 0 : anon_vma_unlock_read(anon_vma);
2281 : }
2282 :
2283 : /*
2284 : * rmap_walk_file - do something to file page using the object-based rmap method
2285 : * @page: the page to be handled
2286 : * @rwc: control variable according to each walk type
2287 : *
2288 : * Find all the mappings of a page using the mapping pointer and the vma chains
2289 : * contained in the address_space struct it points to.
2290 : */
2291 0 : static void rmap_walk_file(struct folio *folio,
2292 : const struct rmap_walk_control *rwc, bool locked)
2293 : {
2294 0 : struct address_space *mapping = folio_mapping(folio);
2295 : pgoff_t pgoff_start, pgoff_end;
2296 : struct vm_area_struct *vma;
2297 :
2298 : /*
2299 : * The page lock not only makes sure that page->mapping cannot
2300 : * suddenly be NULLified by truncation, it makes sure that the
2301 : * structure at mapping cannot be freed and reused yet,
2302 : * so we can safely take mapping->i_mmap_rwsem.
2303 : */
2304 : VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
2305 :
2306 0 : if (!mapping)
2307 : return;
2308 :
2309 0 : pgoff_start = folio_pgoff(folio);
2310 0 : pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
2311 0 : if (!locked)
2312 : i_mmap_lock_read(mapping);
2313 0 : vma_interval_tree_foreach(vma, &mapping->i_mmap,
2314 : pgoff_start, pgoff_end) {
2315 0 : unsigned long address = vma_address(&folio->page, vma);
2316 :
2317 : VM_BUG_ON_VMA(address == -EFAULT, vma);
2318 0 : cond_resched();
2319 :
2320 0 : if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2321 0 : continue;
2322 :
2323 0 : if (!rwc->rmap_one(folio, vma, address, rwc->arg))
2324 : goto done;
2325 0 : if (rwc->done && rwc->done(folio))
2326 : goto done;
2327 : }
2328 :
2329 : done:
2330 0 : if (!locked)
2331 : i_mmap_unlock_read(mapping);
2332 : }
2333 :
2334 0 : void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc)
2335 : {
2336 0 : if (unlikely(folio_test_ksm(folio)))
2337 : rmap_walk_ksm(folio, rwc);
2338 0 : else if (folio_test_anon(folio))
2339 0 : rmap_walk_anon(folio, rwc, false);
2340 : else
2341 0 : rmap_walk_file(folio, rwc, false);
2342 0 : }
2343 :
2344 : /* Like rmap_walk, but caller holds relevant rmap lock */
2345 0 : void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc)
2346 : {
2347 : /* no ksm support for now */
2348 : VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio);
2349 0 : if (folio_test_anon(folio))
2350 0 : rmap_walk_anon(folio, rwc, true);
2351 : else
2352 0 : rmap_walk_file(folio, rwc, true);
2353 0 : }
2354 :
2355 : #ifdef CONFIG_HUGETLB_PAGE
2356 : /*
2357 : * The following two functions are for anonymous (private mapped) hugepages.
2358 : * Unlike common anonymous pages, anonymous hugepages have no accounting code
2359 : * and no lru code, because we handle hugepages differently from common pages.
2360 : */
2361 : void hugepage_add_anon_rmap(struct page *page,
2362 : struct vm_area_struct *vma, unsigned long address)
2363 : {
2364 : struct anon_vma *anon_vma = vma->anon_vma;
2365 : int first;
2366 :
2367 : BUG_ON(!PageLocked(page));
2368 : BUG_ON(!anon_vma);
2369 : /* address might be in next vma when migration races vma_adjust */
2370 : first = atomic_inc_and_test(compound_mapcount_ptr(page));
2371 : if (first)
2372 : __page_set_anon_rmap(page, vma, address, 0);
2373 : }
2374 :
2375 : void hugepage_add_new_anon_rmap(struct page *page,
2376 : struct vm_area_struct *vma, unsigned long address)
2377 : {
2378 : BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2379 : atomic_set(compound_mapcount_ptr(page), 0);
2380 : atomic_set(compound_pincount_ptr(page), 0);
2381 :
2382 : __page_set_anon_rmap(page, vma, address, 1);
2383 : }
2384 : #endif /* CONFIG_HUGETLB_PAGE */
|