Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/pagewalk.h>
3 : #include <linux/vmacache.h>
4 : #include <linux/mm_inline.h>
5 : #include <linux/hugetlb.h>
6 : #include <linux/huge_mm.h>
7 : #include <linux/mount.h>
8 : #include <linux/seq_file.h>
9 : #include <linux/highmem.h>
10 : #include <linux/ptrace.h>
11 : #include <linux/slab.h>
12 : #include <linux/pagemap.h>
13 : #include <linux/mempolicy.h>
14 : #include <linux/rmap.h>
15 : #include <linux/swap.h>
16 : #include <linux/sched/mm.h>
17 : #include <linux/swapops.h>
18 : #include <linux/mmu_notifier.h>
19 : #include <linux/page_idle.h>
20 : #include <linux/shmem_fs.h>
21 : #include <linux/uaccess.h>
22 : #include <linux/pkeys.h>
23 :
24 : #include <asm/elf.h>
25 : #include <asm/tlb.h>
26 : #include <asm/tlbflush.h>
27 : #include "internal.h"
28 :
29 : #define SEQ_PUT_DEC(str, val) \
30 : seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
31 0 : void task_mem(struct seq_file *m, struct mm_struct *mm)
32 : {
33 : unsigned long text, lib, swap, anon, file, shmem;
34 : unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
35 :
36 0 : anon = get_mm_counter(mm, MM_ANONPAGES);
37 0 : file = get_mm_counter(mm, MM_FILEPAGES);
38 0 : shmem = get_mm_counter(mm, MM_SHMEMPAGES);
39 :
40 : /*
41 : * Note: to minimize their overhead, mm maintains hiwater_vm and
42 : * hiwater_rss only when about to *lower* total_vm or rss. Any
43 : * collector of these hiwater stats must therefore get total_vm
44 : * and rss too, which will usually be the higher. Barriers? not
45 : * worth the effort, such snapshots can always be inconsistent.
46 : */
47 0 : hiwater_vm = total_vm = mm->total_vm;
48 0 : if (hiwater_vm < mm->hiwater_vm)
49 0 : hiwater_vm = mm->hiwater_vm;
50 0 : hiwater_rss = total_rss = anon + file + shmem;
51 0 : if (hiwater_rss < mm->hiwater_rss)
52 0 : hiwater_rss = mm->hiwater_rss;
53 :
54 : /* split executable areas between text and lib */
55 0 : text = PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK);
56 0 : text = min(text, mm->exec_vm << PAGE_SHIFT);
57 0 : lib = (mm->exec_vm << PAGE_SHIFT) - text;
58 :
59 0 : swap = get_mm_counter(mm, MM_SWAPENTS);
60 0 : SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
61 0 : SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
62 0 : SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
63 0 : SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
64 0 : SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
65 0 : SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
66 0 : SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
67 0 : SEQ_PUT_DEC(" kB\nRssFile:\t", file);
68 0 : SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
69 0 : SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
70 0 : SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
71 0 : seq_put_decimal_ull_width(m,
72 0 : " kB\nVmExe:\t", text >> 10, 8);
73 0 : seq_put_decimal_ull_width(m,
74 0 : " kB\nVmLib:\t", lib >> 10, 8);
75 0 : seq_put_decimal_ull_width(m,
76 0 : " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
77 0 : SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
78 0 : seq_puts(m, " kB\n");
79 0 : hugetlb_report_usage(m, mm);
80 0 : }
81 : #undef SEQ_PUT_DEC
82 :
83 0 : unsigned long task_vsize(struct mm_struct *mm)
84 : {
85 0 : return PAGE_SIZE * mm->total_vm;
86 : }
87 :
88 0 : unsigned long task_statm(struct mm_struct *mm,
89 : unsigned long *shared, unsigned long *text,
90 : unsigned long *data, unsigned long *resident)
91 : {
92 0 : *shared = get_mm_counter(mm, MM_FILEPAGES) +
93 0 : get_mm_counter(mm, MM_SHMEMPAGES);
94 0 : *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
95 0 : >> PAGE_SHIFT;
96 0 : *data = mm->data_vm + mm->stack_vm;
97 0 : *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
98 0 : return mm->total_vm;
99 : }
100 :
101 : #ifdef CONFIG_NUMA
102 : /*
103 : * Save get_task_policy() for show_numa_map().
104 : */
105 : static void hold_task_mempolicy(struct proc_maps_private *priv)
106 : {
107 : struct task_struct *task = priv->task;
108 :
109 : task_lock(task);
110 : priv->task_mempolicy = get_task_policy(task);
111 : mpol_get(priv->task_mempolicy);
112 : task_unlock(task);
113 : }
114 : static void release_task_mempolicy(struct proc_maps_private *priv)
115 : {
116 : mpol_put(priv->task_mempolicy);
117 : }
118 : #else
119 : static void hold_task_mempolicy(struct proc_maps_private *priv)
120 : {
121 : }
122 : static void release_task_mempolicy(struct proc_maps_private *priv)
123 : {
124 : }
125 : #endif
126 :
127 0 : static void *m_start(struct seq_file *m, loff_t *ppos)
128 : {
129 0 : struct proc_maps_private *priv = m->private;
130 0 : unsigned long last_addr = *ppos;
131 : struct mm_struct *mm;
132 : struct vm_area_struct *vma;
133 :
134 : /* See m_next(). Zero at the start or after lseek. */
135 0 : if (last_addr == -1UL)
136 : return NULL;
137 :
138 0 : priv->task = get_proc_task(priv->inode);
139 0 : if (!priv->task)
140 : return ERR_PTR(-ESRCH);
141 :
142 0 : mm = priv->mm;
143 0 : if (!mm || !mmget_not_zero(mm)) {
144 0 : put_task_struct(priv->task);
145 0 : priv->task = NULL;
146 0 : return NULL;
147 : }
148 :
149 0 : if (mmap_read_lock_killable(mm)) {
150 0 : mmput(mm);
151 0 : put_task_struct(priv->task);
152 0 : priv->task = NULL;
153 0 : return ERR_PTR(-EINTR);
154 : }
155 :
156 0 : hold_task_mempolicy(priv);
157 0 : priv->tail_vma = get_gate_vma(mm);
158 :
159 0 : vma = find_vma(mm, last_addr);
160 0 : if (vma)
161 : return vma;
162 :
163 0 : return priv->tail_vma;
164 : }
165 :
166 0 : static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
167 : {
168 0 : struct proc_maps_private *priv = m->private;
169 0 : struct vm_area_struct *next, *vma = v;
170 :
171 0 : if (vma == priv->tail_vma)
172 : next = NULL;
173 0 : else if (vma->vm_next)
174 : next = vma->vm_next;
175 : else
176 0 : next = priv->tail_vma;
177 :
178 0 : *ppos = next ? next->vm_start : -1UL;
179 :
180 0 : return next;
181 : }
182 :
183 0 : static void m_stop(struct seq_file *m, void *v)
184 : {
185 0 : struct proc_maps_private *priv = m->private;
186 0 : struct mm_struct *mm = priv->mm;
187 :
188 0 : if (!priv->task)
189 : return;
190 :
191 0 : release_task_mempolicy(priv);
192 0 : mmap_read_unlock(mm);
193 0 : mmput(mm);
194 0 : put_task_struct(priv->task);
195 0 : priv->task = NULL;
196 : }
197 :
198 0 : static int proc_maps_open(struct inode *inode, struct file *file,
199 : const struct seq_operations *ops, int psize)
200 : {
201 0 : struct proc_maps_private *priv = __seq_open_private(file, ops, psize);
202 :
203 0 : if (!priv)
204 : return -ENOMEM;
205 :
206 0 : priv->inode = inode;
207 0 : priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
208 0 : if (IS_ERR(priv->mm)) {
209 0 : int err = PTR_ERR(priv->mm);
210 :
211 0 : seq_release_private(inode, file);
212 0 : return err;
213 : }
214 :
215 : return 0;
216 : }
217 :
218 0 : static int proc_map_release(struct inode *inode, struct file *file)
219 : {
220 0 : struct seq_file *seq = file->private_data;
221 0 : struct proc_maps_private *priv = seq->private;
222 :
223 0 : if (priv->mm)
224 0 : mmdrop(priv->mm);
225 :
226 0 : return seq_release_private(inode, file);
227 : }
228 :
229 : static int do_maps_open(struct inode *inode, struct file *file,
230 : const struct seq_operations *ops)
231 : {
232 0 : return proc_maps_open(inode, file, ops,
233 : sizeof(struct proc_maps_private));
234 : }
235 :
236 : /*
237 : * Indicate if the VMA is a stack for the given task; for
238 : * /proc/PID/maps that is the stack of the main task.
239 : */
240 : static int is_stack(struct vm_area_struct *vma)
241 : {
242 : /*
243 : * We make no effort to guess what a given thread considers to be
244 : * its "stack". It's not even well-defined for programs written
245 : * languages like Go.
246 : */
247 0 : return vma->vm_start <= vma->vm_mm->start_stack &&
248 0 : vma->vm_end >= vma->vm_mm->start_stack;
249 : }
250 :
251 0 : static void show_vma_header_prefix(struct seq_file *m,
252 : unsigned long start, unsigned long end,
253 : vm_flags_t flags, unsigned long long pgoff,
254 : dev_t dev, unsigned long ino)
255 : {
256 0 : seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
257 0 : seq_put_hex_ll(m, NULL, start, 8);
258 0 : seq_put_hex_ll(m, "-", end, 8);
259 0 : seq_putc(m, ' ');
260 0 : seq_putc(m, flags & VM_READ ? 'r' : '-');
261 0 : seq_putc(m, flags & VM_WRITE ? 'w' : '-');
262 0 : seq_putc(m, flags & VM_EXEC ? 'x' : '-');
263 0 : seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
264 0 : seq_put_hex_ll(m, " ", pgoff, 8);
265 0 : seq_put_hex_ll(m, " ", MAJOR(dev), 2);
266 0 : seq_put_hex_ll(m, ":", MINOR(dev), 2);
267 0 : seq_put_decimal_ull(m, " ", ino);
268 0 : seq_putc(m, ' ');
269 0 : }
270 :
271 : static void
272 0 : show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
273 : {
274 0 : struct mm_struct *mm = vma->vm_mm;
275 0 : struct file *file = vma->vm_file;
276 0 : vm_flags_t flags = vma->vm_flags;
277 0 : unsigned long ino = 0;
278 0 : unsigned long long pgoff = 0;
279 : unsigned long start, end;
280 0 : dev_t dev = 0;
281 0 : const char *name = NULL;
282 :
283 0 : if (file) {
284 0 : struct inode *inode = file_inode(vma->vm_file);
285 0 : dev = inode->i_sb->s_dev;
286 0 : ino = inode->i_ino;
287 0 : pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
288 : }
289 :
290 0 : start = vma->vm_start;
291 0 : end = vma->vm_end;
292 0 : show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
293 :
294 : /*
295 : * Print the dentry name for named mappings, and a
296 : * special [heap] marker for the heap:
297 : */
298 0 : if (file) {
299 0 : seq_pad(m, ' ');
300 0 : seq_file_path(m, file, "\n");
301 0 : goto done;
302 : }
303 :
304 0 : if (vma->vm_ops && vma->vm_ops->name) {
305 0 : name = vma->vm_ops->name(vma);
306 0 : if (name)
307 : goto done;
308 : }
309 :
310 0 : name = arch_vma_name(vma);
311 0 : if (!name) {
312 : struct anon_vma_name *anon_name;
313 :
314 0 : if (!mm) {
315 : name = "[vdso]";
316 : goto done;
317 : }
318 :
319 0 : if (vma->vm_start <= mm->brk &&
320 0 : vma->vm_end >= mm->start_brk) {
321 : name = "[heap]";
322 : goto done;
323 : }
324 :
325 0 : if (is_stack(vma)) {
326 : name = "[stack]";
327 : goto done;
328 : }
329 :
330 0 : anon_name = anon_vma_name(vma);
331 : if (anon_name) {
332 : seq_pad(m, ' ');
333 : seq_printf(m, "[anon:%s]", anon_name->name);
334 : }
335 : }
336 :
337 : done:
338 0 : if (name) {
339 0 : seq_pad(m, ' ');
340 0 : seq_puts(m, name);
341 : }
342 0 : seq_putc(m, '\n');
343 0 : }
344 :
345 0 : static int show_map(struct seq_file *m, void *v)
346 : {
347 0 : show_map_vma(m, v);
348 0 : return 0;
349 : }
350 :
351 : static const struct seq_operations proc_pid_maps_op = {
352 : .start = m_start,
353 : .next = m_next,
354 : .stop = m_stop,
355 : .show = show_map
356 : };
357 :
358 0 : static int pid_maps_open(struct inode *inode, struct file *file)
359 : {
360 0 : return do_maps_open(inode, file, &proc_pid_maps_op);
361 : }
362 :
363 : const struct file_operations proc_pid_maps_operations = {
364 : .open = pid_maps_open,
365 : .read = seq_read,
366 : .llseek = seq_lseek,
367 : .release = proc_map_release,
368 : };
369 :
370 : /*
371 : * Proportional Set Size(PSS): my share of RSS.
372 : *
373 : * PSS of a process is the count of pages it has in memory, where each
374 : * page is divided by the number of processes sharing it. So if a
375 : * process has 1000 pages all to itself, and 1000 shared with one other
376 : * process, its PSS will be 1500.
377 : *
378 : * To keep (accumulated) division errors low, we adopt a 64bit
379 : * fixed-point pss counter to minimize division errors. So (pss >>
380 : * PSS_SHIFT) would be the real byte count.
381 : *
382 : * A shift of 12 before division means (assuming 4K page size):
383 : * - 1M 3-user-pages add up to 8KB errors;
384 : * - supports mapcount up to 2^24, or 16M;
385 : * - supports PSS up to 2^52 bytes, or 4PB.
386 : */
387 : #define PSS_SHIFT 12
388 :
389 : #ifdef CONFIG_PROC_PAGE_MONITOR
390 : struct mem_size_stats {
391 : unsigned long resident;
392 : unsigned long shared_clean;
393 : unsigned long shared_dirty;
394 : unsigned long private_clean;
395 : unsigned long private_dirty;
396 : unsigned long referenced;
397 : unsigned long anonymous;
398 : unsigned long lazyfree;
399 : unsigned long anonymous_thp;
400 : unsigned long shmem_thp;
401 : unsigned long file_thp;
402 : unsigned long swap;
403 : unsigned long shared_hugetlb;
404 : unsigned long private_hugetlb;
405 : u64 pss;
406 : u64 pss_anon;
407 : u64 pss_file;
408 : u64 pss_shmem;
409 : u64 pss_locked;
410 : u64 swap_pss;
411 : };
412 :
413 0 : static void smaps_page_accumulate(struct mem_size_stats *mss,
414 : struct page *page, unsigned long size, unsigned long pss,
415 : bool dirty, bool locked, bool private)
416 : {
417 0 : mss->pss += pss;
418 :
419 0 : if (PageAnon(page))
420 0 : mss->pss_anon += pss;
421 0 : else if (PageSwapBacked(page))
422 0 : mss->pss_shmem += pss;
423 : else
424 0 : mss->pss_file += pss;
425 :
426 0 : if (locked)
427 0 : mss->pss_locked += pss;
428 :
429 0 : if (dirty || PageDirty(page)) {
430 0 : if (private)
431 0 : mss->private_dirty += size;
432 : else
433 0 : mss->shared_dirty += size;
434 : } else {
435 0 : if (private)
436 0 : mss->private_clean += size;
437 : else
438 0 : mss->shared_clean += size;
439 : }
440 0 : }
441 :
442 0 : static void smaps_account(struct mem_size_stats *mss, struct page *page,
443 : bool compound, bool young, bool dirty, bool locked,
444 : bool migration)
445 : {
446 0 : int i, nr = compound ? compound_nr(page) : 1;
447 0 : unsigned long size = nr * PAGE_SIZE;
448 :
449 : /*
450 : * First accumulate quantities that depend only on |size| and the type
451 : * of the compound page.
452 : */
453 0 : if (PageAnon(page)) {
454 0 : mss->anonymous += size;
455 0 : if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
456 0 : mss->lazyfree += size;
457 : }
458 :
459 0 : mss->resident += size;
460 : /* Accumulate the size in pages that have been accessed. */
461 0 : if (young || page_is_young(page) || PageReferenced(page))
462 0 : mss->referenced += size;
463 :
464 : /*
465 : * Then accumulate quantities that may depend on sharing, or that may
466 : * differ page-by-page.
467 : *
468 : * page_count(page) == 1 guarantees the page is mapped exactly once.
469 : * If any subpage of the compound page mapped with PTE it would elevate
470 : * page_count().
471 : *
472 : * The page_mapcount() is called to get a snapshot of the mapcount.
473 : * Without holding the page lock this snapshot can be slightly wrong as
474 : * we cannot always read the mapcount atomically. It is not safe to
475 : * call page_mapcount() even with PTL held if the page is not mapped,
476 : * especially for migration entries. Treat regular migration entries
477 : * as mapcount == 1.
478 : */
479 0 : if ((page_count(page) == 1) || migration) {
480 0 : smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
481 : locked, true);
482 0 : return;
483 : }
484 0 : for (i = 0; i < nr; i++, page++) {
485 0 : int mapcount = page_mapcount(page);
486 0 : unsigned long pss = PAGE_SIZE << PSS_SHIFT;
487 0 : if (mapcount >= 2)
488 0 : pss /= mapcount;
489 0 : smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
490 : mapcount < 2);
491 : }
492 : }
493 :
494 : #ifdef CONFIG_SHMEM
495 0 : static int smaps_pte_hole(unsigned long addr, unsigned long end,
496 : __always_unused int depth, struct mm_walk *walk)
497 : {
498 0 : struct mem_size_stats *mss = walk->private;
499 0 : struct vm_area_struct *vma = walk->vma;
500 :
501 0 : mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping,
502 : linear_page_index(vma, addr),
503 : linear_page_index(vma, end));
504 :
505 0 : return 0;
506 : }
507 : #else
508 : #define smaps_pte_hole NULL
509 : #endif /* CONFIG_SHMEM */
510 :
511 : static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk)
512 : {
513 : #ifdef CONFIG_SHMEM
514 0 : if (walk->ops->pte_hole) {
515 : /* depth is not used */
516 0 : smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk);
517 : }
518 : #endif
519 : }
520 :
521 0 : static void smaps_pte_entry(pte_t *pte, unsigned long addr,
522 : struct mm_walk *walk)
523 : {
524 0 : struct mem_size_stats *mss = walk->private;
525 0 : struct vm_area_struct *vma = walk->vma;
526 0 : bool locked = !!(vma->vm_flags & VM_LOCKED);
527 0 : struct page *page = NULL;
528 0 : bool migration = false;
529 :
530 0 : if (pte_present(*pte)) {
531 0 : page = vm_normal_page(vma, addr, *pte);
532 0 : } else if (is_swap_pte(*pte)) {
533 0 : swp_entry_t swpent = pte_to_swp_entry(*pte);
534 :
535 0 : if (!non_swap_entry(swpent)) {
536 : int mapcount;
537 :
538 0 : mss->swap += PAGE_SIZE;
539 0 : mapcount = swp_swapcount(swpent);
540 0 : if (mapcount >= 2) {
541 0 : u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
542 :
543 0 : do_div(pss_delta, mapcount);
544 0 : mss->swap_pss += pss_delta;
545 : } else {
546 0 : mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
547 : }
548 0 : } else if (is_pfn_swap_entry(swpent)) {
549 0 : if (is_migration_entry(swpent))
550 0 : migration = true;
551 0 : page = pfn_swap_entry_to_page(swpent);
552 : }
553 : } else {
554 : smaps_pte_hole_lookup(addr, walk);
555 : return;
556 : }
557 :
558 0 : if (!page)
559 : return;
560 :
561 0 : smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
562 : locked, migration);
563 : }
564 :
565 : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
566 : static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
567 : struct mm_walk *walk)
568 : {
569 : struct mem_size_stats *mss = walk->private;
570 : struct vm_area_struct *vma = walk->vma;
571 : bool locked = !!(vma->vm_flags & VM_LOCKED);
572 : struct page *page = NULL;
573 : bool migration = false;
574 :
575 : if (pmd_present(*pmd)) {
576 : /* FOLL_DUMP will return -EFAULT on huge zero page */
577 : page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
578 : } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
579 : swp_entry_t entry = pmd_to_swp_entry(*pmd);
580 :
581 : if (is_migration_entry(entry)) {
582 : migration = true;
583 : page = pfn_swap_entry_to_page(entry);
584 : }
585 : }
586 : if (IS_ERR_OR_NULL(page))
587 : return;
588 : if (PageAnon(page))
589 : mss->anonymous_thp += HPAGE_PMD_SIZE;
590 : else if (PageSwapBacked(page))
591 : mss->shmem_thp += HPAGE_PMD_SIZE;
592 : else if (is_zone_device_page(page))
593 : /* pass */;
594 : else
595 : mss->file_thp += HPAGE_PMD_SIZE;
596 :
597 : smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
598 : locked, migration);
599 : }
600 : #else
601 : static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
602 : struct mm_walk *walk)
603 : {
604 : }
605 : #endif
606 :
607 0 : static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
608 : struct mm_walk *walk)
609 : {
610 0 : struct vm_area_struct *vma = walk->vma;
611 : pte_t *pte;
612 : spinlock_t *ptl;
613 :
614 0 : ptl = pmd_trans_huge_lock(pmd, vma);
615 : if (ptl) {
616 : smaps_pmd_entry(pmd, addr, walk);
617 : spin_unlock(ptl);
618 : goto out;
619 : }
620 :
621 0 : if (pmd_trans_unstable(pmd))
622 : goto out;
623 : /*
624 : * The mmap_lock held all the way back in m_start() is what
625 : * keeps khugepaged out of here and from collapsing things
626 : * in here.
627 : */
628 0 : pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
629 0 : for (; addr != end; pte++, addr += PAGE_SIZE)
630 0 : smaps_pte_entry(pte, addr, walk);
631 0 : pte_unmap_unlock(pte - 1, ptl);
632 : out:
633 0 : cond_resched();
634 0 : return 0;
635 : }
636 :
637 0 : static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
638 : {
639 : /*
640 : * Don't forget to update Documentation/ on changes.
641 : */
642 : static const char mnemonics[BITS_PER_LONG][2] = {
643 : /*
644 : * In case if we meet a flag we don't know about.
645 : */
646 : [0 ... (BITS_PER_LONG-1)] = "??",
647 :
648 : [ilog2(VM_READ)] = "rd",
649 : [ilog2(VM_WRITE)] = "wr",
650 : [ilog2(VM_EXEC)] = "ex",
651 : [ilog2(VM_SHARED)] = "sh",
652 : [ilog2(VM_MAYREAD)] = "mr",
653 : [ilog2(VM_MAYWRITE)] = "mw",
654 : [ilog2(VM_MAYEXEC)] = "me",
655 : [ilog2(VM_MAYSHARE)] = "ms",
656 : [ilog2(VM_GROWSDOWN)] = "gd",
657 : [ilog2(VM_PFNMAP)] = "pf",
658 : [ilog2(VM_LOCKED)] = "lo",
659 : [ilog2(VM_IO)] = "io",
660 : [ilog2(VM_SEQ_READ)] = "sr",
661 : [ilog2(VM_RAND_READ)] = "rr",
662 : [ilog2(VM_DONTCOPY)] = "dc",
663 : [ilog2(VM_DONTEXPAND)] = "de",
664 : [ilog2(VM_ACCOUNT)] = "ac",
665 : [ilog2(VM_NORESERVE)] = "nr",
666 : [ilog2(VM_HUGETLB)] = "ht",
667 : [ilog2(VM_SYNC)] = "sf",
668 : [ilog2(VM_ARCH_1)] = "ar",
669 : [ilog2(VM_WIPEONFORK)] = "wf",
670 : [ilog2(VM_DONTDUMP)] = "dd",
671 : #ifdef CONFIG_ARM64_BTI
672 : [ilog2(VM_ARM64_BTI)] = "bt",
673 : #endif
674 : #ifdef CONFIG_MEM_SOFT_DIRTY
675 : [ilog2(VM_SOFTDIRTY)] = "sd",
676 : #endif
677 : [ilog2(VM_MIXEDMAP)] = "mm",
678 : [ilog2(VM_HUGEPAGE)] = "hg",
679 : [ilog2(VM_NOHUGEPAGE)] = "nh",
680 : [ilog2(VM_MERGEABLE)] = "mg",
681 : [ilog2(VM_UFFD_MISSING)]= "um",
682 : [ilog2(VM_UFFD_WP)] = "uw",
683 : #ifdef CONFIG_ARM64_MTE
684 : [ilog2(VM_MTE)] = "mt",
685 : [ilog2(VM_MTE_ALLOWED)] = "",
686 : #endif
687 : #ifdef CONFIG_ARCH_HAS_PKEYS
688 : /* These come out via ProtectionKey: */
689 : [ilog2(VM_PKEY_BIT0)] = "",
690 : [ilog2(VM_PKEY_BIT1)] = "",
691 : [ilog2(VM_PKEY_BIT2)] = "",
692 : [ilog2(VM_PKEY_BIT3)] = "",
693 : #if VM_PKEY_BIT4
694 : [ilog2(VM_PKEY_BIT4)] = "",
695 : #endif
696 : #endif /* CONFIG_ARCH_HAS_PKEYS */
697 : #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
698 : [ilog2(VM_UFFD_MINOR)] = "ui",
699 : #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
700 : };
701 : size_t i;
702 :
703 0 : seq_puts(m, "VmFlags: ");
704 0 : for (i = 0; i < BITS_PER_LONG; i++) {
705 0 : if (!mnemonics[i][0])
706 0 : continue;
707 0 : if (vma->vm_flags & (1UL << i)) {
708 0 : seq_putc(m, mnemonics[i][0]);
709 0 : seq_putc(m, mnemonics[i][1]);
710 0 : seq_putc(m, ' ');
711 : }
712 : }
713 0 : seq_putc(m, '\n');
714 0 : }
715 :
716 : #ifdef CONFIG_HUGETLB_PAGE
717 : static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
718 : unsigned long addr, unsigned long end,
719 : struct mm_walk *walk)
720 : {
721 : struct mem_size_stats *mss = walk->private;
722 : struct vm_area_struct *vma = walk->vma;
723 : struct page *page = NULL;
724 :
725 : if (pte_present(*pte)) {
726 : page = vm_normal_page(vma, addr, *pte);
727 : } else if (is_swap_pte(*pte)) {
728 : swp_entry_t swpent = pte_to_swp_entry(*pte);
729 :
730 : if (is_pfn_swap_entry(swpent))
731 : page = pfn_swap_entry_to_page(swpent);
732 : }
733 : if (page) {
734 : int mapcount = page_mapcount(page);
735 :
736 : if (mapcount >= 2)
737 : mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
738 : else
739 : mss->private_hugetlb += huge_page_size(hstate_vma(vma));
740 : }
741 : return 0;
742 : }
743 : #else
744 : #define smaps_hugetlb_range NULL
745 : #endif /* HUGETLB_PAGE */
746 :
747 : static const struct mm_walk_ops smaps_walk_ops = {
748 : .pmd_entry = smaps_pte_range,
749 : .hugetlb_entry = smaps_hugetlb_range,
750 : };
751 :
752 : static const struct mm_walk_ops smaps_shmem_walk_ops = {
753 : .pmd_entry = smaps_pte_range,
754 : .hugetlb_entry = smaps_hugetlb_range,
755 : .pte_hole = smaps_pte_hole,
756 : };
757 :
758 : /*
759 : * Gather mem stats from @vma with the indicated beginning
760 : * address @start, and keep them in @mss.
761 : *
762 : * Use vm_start of @vma as the beginning address if @start is 0.
763 : */
764 0 : static void smap_gather_stats(struct vm_area_struct *vma,
765 : struct mem_size_stats *mss, unsigned long start)
766 : {
767 0 : const struct mm_walk_ops *ops = &smaps_walk_ops;
768 :
769 : /* Invalid start */
770 0 : if (start >= vma->vm_end)
771 : return;
772 :
773 : #ifdef CONFIG_SHMEM
774 0 : if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
775 : /*
776 : * For shared or readonly shmem mappings we know that all
777 : * swapped out pages belong to the shmem object, and we can
778 : * obtain the swap value much more efficiently. For private
779 : * writable mappings, we might have COW pages that are
780 : * not affected by the parent swapped out pages of the shmem
781 : * object, so we have to distinguish them during the page walk.
782 : * Unless we know that the shmem object (or the part mapped by
783 : * our VMA) has no swapped out pages at all.
784 : */
785 0 : unsigned long shmem_swapped = shmem_swap_usage(vma);
786 :
787 0 : if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
788 : !(vma->vm_flags & VM_WRITE))) {
789 0 : mss->swap += shmem_swapped;
790 : } else {
791 : ops = &smaps_shmem_walk_ops;
792 : }
793 : }
794 : #endif
795 : /* mmap_lock is held in m_start */
796 0 : if (!start)
797 0 : walk_page_vma(vma, ops, mss);
798 : else
799 0 : walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
800 : }
801 :
802 : #define SEQ_PUT_DEC(str, val) \
803 : seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
804 :
805 : /* Show the contents common for smaps and smaps_rollup */
806 0 : static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
807 : bool rollup_mode)
808 : {
809 0 : SEQ_PUT_DEC("Rss: ", mss->resident);
810 0 : SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
811 0 : if (rollup_mode) {
812 : /*
813 : * These are meaningful only for smaps_rollup, otherwise two of
814 : * them are zero, and the other one is the same as Pss.
815 : */
816 0 : SEQ_PUT_DEC(" kB\nPss_Anon: ",
817 : mss->pss_anon >> PSS_SHIFT);
818 0 : SEQ_PUT_DEC(" kB\nPss_File: ",
819 : mss->pss_file >> PSS_SHIFT);
820 0 : SEQ_PUT_DEC(" kB\nPss_Shmem: ",
821 : mss->pss_shmem >> PSS_SHIFT);
822 : }
823 0 : SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
824 0 : SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
825 0 : SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
826 0 : SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
827 0 : SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
828 0 : SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
829 0 : SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
830 0 : SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
831 0 : SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
832 0 : SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
833 0 : SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
834 0 : seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
835 0 : mss->private_hugetlb >> 10, 7);
836 0 : SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
837 0 : SEQ_PUT_DEC(" kB\nSwapPss: ",
838 : mss->swap_pss >> PSS_SHIFT);
839 0 : SEQ_PUT_DEC(" kB\nLocked: ",
840 : mss->pss_locked >> PSS_SHIFT);
841 0 : seq_puts(m, " kB\n");
842 0 : }
843 :
844 0 : static int show_smap(struct seq_file *m, void *v)
845 : {
846 0 : struct vm_area_struct *vma = v;
847 : struct mem_size_stats mss;
848 :
849 0 : memset(&mss, 0, sizeof(mss));
850 :
851 0 : smap_gather_stats(vma, &mss, 0);
852 :
853 0 : show_map_vma(m, vma);
854 :
855 0 : SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
856 0 : SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
857 0 : SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
858 0 : seq_puts(m, " kB\n");
859 :
860 0 : __show_smap(m, &mss, false);
861 :
862 0 : seq_printf(m, "THPeligible: %d\n",
863 0 : transparent_hugepage_active(vma));
864 :
865 : if (arch_pkeys_enabled())
866 : seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
867 0 : show_smap_vma_flags(m, vma);
868 :
869 0 : return 0;
870 : }
871 :
872 0 : static int show_smaps_rollup(struct seq_file *m, void *v)
873 : {
874 0 : struct proc_maps_private *priv = m->private;
875 : struct mem_size_stats mss;
876 : struct mm_struct *mm;
877 : struct vm_area_struct *vma;
878 0 : unsigned long last_vma_end = 0;
879 0 : int ret = 0;
880 :
881 0 : priv->task = get_proc_task(priv->inode);
882 0 : if (!priv->task)
883 : return -ESRCH;
884 :
885 0 : mm = priv->mm;
886 0 : if (!mm || !mmget_not_zero(mm)) {
887 : ret = -ESRCH;
888 : goto out_put_task;
889 : }
890 :
891 0 : memset(&mss, 0, sizeof(mss));
892 :
893 0 : ret = mmap_read_lock_killable(mm);
894 0 : if (ret)
895 : goto out_put_mm;
896 :
897 0 : hold_task_mempolicy(priv);
898 :
899 0 : for (vma = priv->mm->mmap; vma;) {
900 0 : smap_gather_stats(vma, &mss, 0);
901 0 : last_vma_end = vma->vm_end;
902 :
903 : /*
904 : * Release mmap_lock temporarily if someone wants to
905 : * access it for write request.
906 : */
907 0 : if (mmap_lock_is_contended(mm)) {
908 0 : mmap_read_unlock(mm);
909 0 : ret = mmap_read_lock_killable(mm);
910 0 : if (ret) {
911 : release_task_mempolicy(priv);
912 : goto out_put_mm;
913 : }
914 :
915 : /*
916 : * After dropping the lock, there are four cases to
917 : * consider. See the following example for explanation.
918 : *
919 : * +------+------+-----------+
920 : * | VMA1 | VMA2 | VMA3 |
921 : * +------+------+-----------+
922 : * | | | |
923 : * 4k 8k 16k 400k
924 : *
925 : * Suppose we drop the lock after reading VMA2 due to
926 : * contention, then we get:
927 : *
928 : * last_vma_end = 16k
929 : *
930 : * 1) VMA2 is freed, but VMA3 exists:
931 : *
932 : * find_vma(mm, 16k - 1) will return VMA3.
933 : * In this case, just continue from VMA3.
934 : *
935 : * 2) VMA2 still exists:
936 : *
937 : * find_vma(mm, 16k - 1) will return VMA2.
938 : * Iterate the loop like the original one.
939 : *
940 : * 3) No more VMAs can be found:
941 : *
942 : * find_vma(mm, 16k - 1) will return NULL.
943 : * No more things to do, just break.
944 : *
945 : * 4) (last_vma_end - 1) is the middle of a vma (VMA'):
946 : *
947 : * find_vma(mm, 16k - 1) will return VMA' whose range
948 : * contains last_vma_end.
949 : * Iterate VMA' from last_vma_end.
950 : */
951 0 : vma = find_vma(mm, last_vma_end - 1);
952 : /* Case 3 above */
953 0 : if (!vma)
954 : break;
955 :
956 : /* Case 1 above */
957 0 : if (vma->vm_start >= last_vma_end)
958 0 : continue;
959 :
960 : /* Case 4 above */
961 0 : if (vma->vm_end > last_vma_end)
962 0 : smap_gather_stats(vma, &mss, last_vma_end);
963 : }
964 : /* Case 2 above */
965 0 : vma = vma->vm_next;
966 : }
967 :
968 0 : show_vma_header_prefix(m, priv->mm->mmap->vm_start,
969 : last_vma_end, 0, 0, 0, 0);
970 0 : seq_pad(m, ' ');
971 0 : seq_puts(m, "[rollup]\n");
972 :
973 0 : __show_smap(m, &mss, true);
974 :
975 0 : release_task_mempolicy(priv);
976 : mmap_read_unlock(mm);
977 :
978 : out_put_mm:
979 0 : mmput(mm);
980 : out_put_task:
981 0 : put_task_struct(priv->task);
982 0 : priv->task = NULL;
983 :
984 0 : return ret;
985 : }
986 : #undef SEQ_PUT_DEC
987 :
988 : static const struct seq_operations proc_pid_smaps_op = {
989 : .start = m_start,
990 : .next = m_next,
991 : .stop = m_stop,
992 : .show = show_smap
993 : };
994 :
995 0 : static int pid_smaps_open(struct inode *inode, struct file *file)
996 : {
997 0 : return do_maps_open(inode, file, &proc_pid_smaps_op);
998 : }
999 :
1000 0 : static int smaps_rollup_open(struct inode *inode, struct file *file)
1001 : {
1002 : int ret;
1003 : struct proc_maps_private *priv;
1004 :
1005 0 : priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
1006 0 : if (!priv)
1007 : return -ENOMEM;
1008 :
1009 0 : ret = single_open(file, show_smaps_rollup, priv);
1010 0 : if (ret)
1011 : goto out_free;
1012 :
1013 0 : priv->inode = inode;
1014 0 : priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
1015 0 : if (IS_ERR(priv->mm)) {
1016 0 : ret = PTR_ERR(priv->mm);
1017 :
1018 0 : single_release(inode, file);
1019 0 : goto out_free;
1020 : }
1021 :
1022 : return 0;
1023 :
1024 : out_free:
1025 0 : kfree(priv);
1026 0 : return ret;
1027 : }
1028 :
1029 0 : static int smaps_rollup_release(struct inode *inode, struct file *file)
1030 : {
1031 0 : struct seq_file *seq = file->private_data;
1032 0 : struct proc_maps_private *priv = seq->private;
1033 :
1034 0 : if (priv->mm)
1035 0 : mmdrop(priv->mm);
1036 :
1037 0 : kfree(priv);
1038 0 : return single_release(inode, file);
1039 : }
1040 :
1041 : const struct file_operations proc_pid_smaps_operations = {
1042 : .open = pid_smaps_open,
1043 : .read = seq_read,
1044 : .llseek = seq_lseek,
1045 : .release = proc_map_release,
1046 : };
1047 :
1048 : const struct file_operations proc_pid_smaps_rollup_operations = {
1049 : .open = smaps_rollup_open,
1050 : .read = seq_read,
1051 : .llseek = seq_lseek,
1052 : .release = smaps_rollup_release,
1053 : };
1054 :
1055 : enum clear_refs_types {
1056 : CLEAR_REFS_ALL = 1,
1057 : CLEAR_REFS_ANON,
1058 : CLEAR_REFS_MAPPED,
1059 : CLEAR_REFS_SOFT_DIRTY,
1060 : CLEAR_REFS_MM_HIWATER_RSS,
1061 : CLEAR_REFS_LAST,
1062 : };
1063 :
1064 : struct clear_refs_private {
1065 : enum clear_refs_types type;
1066 : };
1067 :
1068 : #ifdef CONFIG_MEM_SOFT_DIRTY
1069 :
1070 : static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1071 : {
1072 : struct page *page;
1073 :
1074 : if (!pte_write(pte))
1075 : return false;
1076 : if (!is_cow_mapping(vma->vm_flags))
1077 : return false;
1078 : if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
1079 : return false;
1080 : page = vm_normal_page(vma, addr, pte);
1081 : if (!page)
1082 : return false;
1083 : return page_maybe_dma_pinned(page);
1084 : }
1085 :
1086 : static inline void clear_soft_dirty(struct vm_area_struct *vma,
1087 : unsigned long addr, pte_t *pte)
1088 : {
1089 : /*
1090 : * The soft-dirty tracker uses #PF-s to catch writes
1091 : * to pages, so write-protect the pte as well. See the
1092 : * Documentation/admin-guide/mm/soft-dirty.rst for full description
1093 : * of how soft-dirty works.
1094 : */
1095 : pte_t ptent = *pte;
1096 :
1097 : if (pte_present(ptent)) {
1098 : pte_t old_pte;
1099 :
1100 : if (pte_is_pinned(vma, addr, ptent))
1101 : return;
1102 : old_pte = ptep_modify_prot_start(vma, addr, pte);
1103 : ptent = pte_wrprotect(old_pte);
1104 : ptent = pte_clear_soft_dirty(ptent);
1105 : ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
1106 : } else if (is_swap_pte(ptent)) {
1107 : ptent = pte_swp_clear_soft_dirty(ptent);
1108 : set_pte_at(vma->vm_mm, addr, pte, ptent);
1109 : }
1110 : }
1111 : #else
1112 : static inline void clear_soft_dirty(struct vm_area_struct *vma,
1113 : unsigned long addr, pte_t *pte)
1114 : {
1115 : }
1116 : #endif
1117 :
1118 : #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1119 : static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1120 : unsigned long addr, pmd_t *pmdp)
1121 : {
1122 : pmd_t old, pmd = *pmdp;
1123 :
1124 : if (pmd_present(pmd)) {
1125 : /* See comment in change_huge_pmd() */
1126 : old = pmdp_invalidate(vma, addr, pmdp);
1127 : if (pmd_dirty(old))
1128 : pmd = pmd_mkdirty(pmd);
1129 : if (pmd_young(old))
1130 : pmd = pmd_mkyoung(pmd);
1131 :
1132 : pmd = pmd_wrprotect(pmd);
1133 : pmd = pmd_clear_soft_dirty(pmd);
1134 :
1135 : set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1136 : } else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
1137 : pmd = pmd_swp_clear_soft_dirty(pmd);
1138 : set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
1139 : }
1140 : }
1141 : #else
1142 : static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
1143 : unsigned long addr, pmd_t *pmdp)
1144 : {
1145 : }
1146 : #endif
1147 :
1148 0 : static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
1149 : unsigned long end, struct mm_walk *walk)
1150 : {
1151 0 : struct clear_refs_private *cp = walk->private;
1152 0 : struct vm_area_struct *vma = walk->vma;
1153 : pte_t *pte, ptent;
1154 : spinlock_t *ptl;
1155 : struct page *page;
1156 :
1157 0 : ptl = pmd_trans_huge_lock(pmd, vma);
1158 : if (ptl) {
1159 : if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1160 : clear_soft_dirty_pmd(vma, addr, pmd);
1161 : goto out;
1162 : }
1163 :
1164 : if (!pmd_present(*pmd))
1165 : goto out;
1166 :
1167 : page = pmd_page(*pmd);
1168 :
1169 : /* Clear accessed and referenced bits. */
1170 : pmdp_test_and_clear_young(vma, addr, pmd);
1171 : test_and_clear_page_young(page);
1172 : ClearPageReferenced(page);
1173 : out:
1174 : spin_unlock(ptl);
1175 : return 0;
1176 : }
1177 :
1178 0 : if (pmd_trans_unstable(pmd))
1179 : return 0;
1180 :
1181 0 : pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
1182 0 : for (; addr != end; pte++, addr += PAGE_SIZE) {
1183 0 : ptent = *pte;
1184 :
1185 0 : if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
1186 0 : clear_soft_dirty(vma, addr, pte);
1187 0 : continue;
1188 : }
1189 :
1190 0 : if (!pte_present(ptent))
1191 0 : continue;
1192 :
1193 0 : page = vm_normal_page(vma, addr, ptent);
1194 0 : if (!page)
1195 0 : continue;
1196 :
1197 : /* Clear accessed and referenced bits. */
1198 0 : ptep_test_and_clear_young(vma, addr, pte);
1199 0 : test_and_clear_page_young(page);
1200 : ClearPageReferenced(page);
1201 : }
1202 0 : pte_unmap_unlock(pte - 1, ptl);
1203 0 : cond_resched();
1204 : return 0;
1205 : }
1206 :
1207 0 : static int clear_refs_test_walk(unsigned long start, unsigned long end,
1208 : struct mm_walk *walk)
1209 : {
1210 0 : struct clear_refs_private *cp = walk->private;
1211 0 : struct vm_area_struct *vma = walk->vma;
1212 :
1213 0 : if (vma->vm_flags & VM_PFNMAP)
1214 : return 1;
1215 :
1216 : /*
1217 : * Writing 1 to /proc/pid/clear_refs affects all pages.
1218 : * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
1219 : * Writing 3 to /proc/pid/clear_refs only affects file mapped pages.
1220 : * Writing 4 to /proc/pid/clear_refs affects all pages.
1221 : */
1222 0 : if (cp->type == CLEAR_REFS_ANON && vma->vm_file)
1223 : return 1;
1224 0 : if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file)
1225 : return 1;
1226 0 : return 0;
1227 : }
1228 :
1229 : static const struct mm_walk_ops clear_refs_walk_ops = {
1230 : .pmd_entry = clear_refs_pte_range,
1231 : .test_walk = clear_refs_test_walk,
1232 : };
1233 :
1234 0 : static ssize_t clear_refs_write(struct file *file, const char __user *buf,
1235 : size_t count, loff_t *ppos)
1236 : {
1237 : struct task_struct *task;
1238 : char buffer[PROC_NUMBUF];
1239 : struct mm_struct *mm;
1240 : struct vm_area_struct *vma;
1241 : enum clear_refs_types type;
1242 : int itype;
1243 : int rv;
1244 :
1245 0 : memset(buffer, 0, sizeof(buffer));
1246 0 : if (count > sizeof(buffer) - 1)
1247 0 : count = sizeof(buffer) - 1;
1248 0 : if (copy_from_user(buffer, buf, count))
1249 : return -EFAULT;
1250 0 : rv = kstrtoint(strstrip(buffer), 10, &itype);
1251 0 : if (rv < 0)
1252 0 : return rv;
1253 0 : type = (enum clear_refs_types)itype;
1254 0 : if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
1255 : return -EINVAL;
1256 :
1257 0 : task = get_proc_task(file_inode(file));
1258 0 : if (!task)
1259 : return -ESRCH;
1260 0 : mm = get_task_mm(task);
1261 0 : if (mm) {
1262 : struct mmu_notifier_range range;
1263 0 : struct clear_refs_private cp = {
1264 : .type = type,
1265 : };
1266 :
1267 0 : if (mmap_write_lock_killable(mm)) {
1268 : count = -EINTR;
1269 : goto out_mm;
1270 : }
1271 0 : if (type == CLEAR_REFS_MM_HIWATER_RSS) {
1272 : /*
1273 : * Writing 5 to /proc/pid/clear_refs resets the peak
1274 : * resident set size to this mm's current rss value.
1275 : */
1276 : reset_mm_hiwater_rss(mm);
1277 : goto out_unlock;
1278 : }
1279 :
1280 0 : if (type == CLEAR_REFS_SOFT_DIRTY) {
1281 0 : for (vma = mm->mmap; vma; vma = vma->vm_next) {
1282 : if (!(vma->vm_flags & VM_SOFTDIRTY))
1283 0 : continue;
1284 : vma->vm_flags &= ~VM_SOFTDIRTY;
1285 : vma_set_page_prot(vma);
1286 : }
1287 :
1288 0 : inc_tlb_flush_pending(mm);
1289 : mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
1290 : 0, NULL, mm, 0, -1UL);
1291 : mmu_notifier_invalidate_range_start(&range);
1292 : }
1293 0 : walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
1294 : &cp);
1295 0 : if (type == CLEAR_REFS_SOFT_DIRTY) {
1296 0 : mmu_notifier_invalidate_range_end(&range);
1297 0 : flush_tlb_mm(mm);
1298 : dec_tlb_flush_pending(mm);
1299 : }
1300 : out_unlock:
1301 : mmap_write_unlock(mm);
1302 : out_mm:
1303 0 : mmput(mm);
1304 : }
1305 0 : put_task_struct(task);
1306 :
1307 0 : return count;
1308 : }
1309 :
1310 : const struct file_operations proc_clear_refs_operations = {
1311 : .write = clear_refs_write,
1312 : .llseek = noop_llseek,
1313 : };
1314 :
1315 : typedef struct {
1316 : u64 pme;
1317 : } pagemap_entry_t;
1318 :
1319 : struct pagemapread {
1320 : int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
1321 : pagemap_entry_t *buffer;
1322 : bool show_pfn;
1323 : };
1324 :
1325 : #define PAGEMAP_WALK_SIZE (PMD_SIZE)
1326 : #define PAGEMAP_WALK_MASK (PMD_MASK)
1327 :
1328 : #define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
1329 : #define PM_PFRAME_BITS 55
1330 : #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
1331 : #define PM_SOFT_DIRTY BIT_ULL(55)
1332 : #define PM_MMAP_EXCLUSIVE BIT_ULL(56)
1333 : #define PM_UFFD_WP BIT_ULL(57)
1334 : #define PM_FILE BIT_ULL(61)
1335 : #define PM_SWAP BIT_ULL(62)
1336 : #define PM_PRESENT BIT_ULL(63)
1337 :
1338 : #define PM_END_OF_BUFFER 1
1339 :
1340 : static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
1341 : {
1342 0 : return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
1343 : }
1344 :
1345 : static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
1346 : struct pagemapread *pm)
1347 : {
1348 0 : pm->buffer[pm->pos++] = *pme;
1349 0 : if (pm->pos >= pm->len)
1350 : return PM_END_OF_BUFFER;
1351 : return 0;
1352 : }
1353 :
1354 0 : static int pagemap_pte_hole(unsigned long start, unsigned long end,
1355 : __always_unused int depth, struct mm_walk *walk)
1356 : {
1357 0 : struct pagemapread *pm = walk->private;
1358 0 : unsigned long addr = start;
1359 0 : int err = 0;
1360 :
1361 0 : while (addr < end) {
1362 0 : struct vm_area_struct *vma = find_vma(walk->mm, addr);
1363 0 : pagemap_entry_t pme = make_pme(0, 0);
1364 : /* End of address space hole, which we mark as non-present. */
1365 : unsigned long hole_end;
1366 :
1367 0 : if (vma)
1368 0 : hole_end = min(end, vma->vm_start);
1369 : else
1370 : hole_end = end;
1371 :
1372 0 : for (; addr < hole_end; addr += PAGE_SIZE) {
1373 0 : err = add_to_pagemap(addr, &pme, pm);
1374 0 : if (err)
1375 : goto out;
1376 : }
1377 :
1378 0 : if (!vma)
1379 : break;
1380 :
1381 : /* Addresses in the VMA. */
1382 : if (vma->vm_flags & VM_SOFTDIRTY)
1383 : pme = make_pme(0, PM_SOFT_DIRTY);
1384 0 : for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1385 0 : err = add_to_pagemap(addr, &pme, pm);
1386 0 : if (err)
1387 : goto out;
1388 : }
1389 : }
1390 : out:
1391 0 : return err;
1392 : }
1393 :
1394 0 : static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
1395 : struct vm_area_struct *vma, unsigned long addr, pte_t pte)
1396 : {
1397 0 : u64 frame = 0, flags = 0;
1398 0 : struct page *page = NULL;
1399 0 : bool migration = false;
1400 :
1401 0 : if (pte_present(pte)) {
1402 0 : if (pm->show_pfn)
1403 0 : frame = pte_pfn(pte);
1404 0 : flags |= PM_PRESENT;
1405 0 : page = vm_normal_page(vma, addr, pte);
1406 0 : if (pte_soft_dirty(pte))
1407 : flags |= PM_SOFT_DIRTY;
1408 : if (pte_uffd_wp(pte))
1409 : flags |= PM_UFFD_WP;
1410 0 : } else if (is_swap_pte(pte)) {
1411 : swp_entry_t entry;
1412 0 : if (pte_swp_soft_dirty(pte))
1413 : flags |= PM_SOFT_DIRTY;
1414 : if (pte_swp_uffd_wp(pte))
1415 : flags |= PM_UFFD_WP;
1416 0 : entry = pte_to_swp_entry(pte);
1417 0 : if (pm->show_pfn)
1418 0 : frame = swp_type(entry) |
1419 0 : (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
1420 0 : flags |= PM_SWAP;
1421 0 : migration = is_migration_entry(entry);
1422 0 : if (is_pfn_swap_entry(entry))
1423 0 : page = pfn_swap_entry_to_page(entry);
1424 : }
1425 :
1426 0 : if (page && !PageAnon(page))
1427 0 : flags |= PM_FILE;
1428 0 : if (page && !migration && page_mapcount(page) == 1)
1429 0 : flags |= PM_MMAP_EXCLUSIVE;
1430 : if (vma->vm_flags & VM_SOFTDIRTY)
1431 : flags |= PM_SOFT_DIRTY;
1432 :
1433 0 : return make_pme(frame, flags);
1434 : }
1435 :
1436 0 : static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
1437 : struct mm_walk *walk)
1438 : {
1439 0 : struct vm_area_struct *vma = walk->vma;
1440 0 : struct pagemapread *pm = walk->private;
1441 : spinlock_t *ptl;
1442 : pte_t *pte, *orig_pte;
1443 0 : int err = 0;
1444 : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1445 : bool migration = false;
1446 :
1447 : ptl = pmd_trans_huge_lock(pmdp, vma);
1448 : if (ptl) {
1449 : u64 flags = 0, frame = 0;
1450 : pmd_t pmd = *pmdp;
1451 : struct page *page = NULL;
1452 :
1453 : if (vma->vm_flags & VM_SOFTDIRTY)
1454 : flags |= PM_SOFT_DIRTY;
1455 :
1456 : if (pmd_present(pmd)) {
1457 : page = pmd_page(pmd);
1458 :
1459 : flags |= PM_PRESENT;
1460 : if (pmd_soft_dirty(pmd))
1461 : flags |= PM_SOFT_DIRTY;
1462 : if (pmd_uffd_wp(pmd))
1463 : flags |= PM_UFFD_WP;
1464 : if (pm->show_pfn)
1465 : frame = pmd_pfn(pmd) +
1466 : ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1467 : }
1468 : #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1469 : else if (is_swap_pmd(pmd)) {
1470 : swp_entry_t entry = pmd_to_swp_entry(pmd);
1471 : unsigned long offset;
1472 :
1473 : if (pm->show_pfn) {
1474 : offset = swp_offset(entry) +
1475 : ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1476 : frame = swp_type(entry) |
1477 : (offset << MAX_SWAPFILES_SHIFT);
1478 : }
1479 : flags |= PM_SWAP;
1480 : if (pmd_swp_soft_dirty(pmd))
1481 : flags |= PM_SOFT_DIRTY;
1482 : if (pmd_swp_uffd_wp(pmd))
1483 : flags |= PM_UFFD_WP;
1484 : VM_BUG_ON(!is_pmd_migration_entry(pmd));
1485 : migration = is_migration_entry(entry);
1486 : page = pfn_swap_entry_to_page(entry);
1487 : }
1488 : #endif
1489 :
1490 : if (page && !migration && page_mapcount(page) == 1)
1491 : flags |= PM_MMAP_EXCLUSIVE;
1492 :
1493 : for (; addr != end; addr += PAGE_SIZE) {
1494 : pagemap_entry_t pme = make_pme(frame, flags);
1495 :
1496 : err = add_to_pagemap(addr, &pme, pm);
1497 : if (err)
1498 : break;
1499 : if (pm->show_pfn) {
1500 : if (flags & PM_PRESENT)
1501 : frame++;
1502 : else if (flags & PM_SWAP)
1503 : frame += (1 << MAX_SWAPFILES_SHIFT);
1504 : }
1505 : }
1506 : spin_unlock(ptl);
1507 : return err;
1508 : }
1509 :
1510 : if (pmd_trans_unstable(pmdp))
1511 : return 0;
1512 : #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1513 :
1514 : /*
1515 : * We can assume that @vma always points to a valid one and @end never
1516 : * goes beyond vma->vm_end.
1517 : */
1518 0 : orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
1519 0 : for (; addr < end; pte++, addr += PAGE_SIZE) {
1520 : pagemap_entry_t pme;
1521 :
1522 0 : pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
1523 0 : err = add_to_pagemap(addr, &pme, pm);
1524 0 : if (err)
1525 : break;
1526 : }
1527 0 : pte_unmap_unlock(orig_pte, ptl);
1528 :
1529 0 : cond_resched();
1530 :
1531 0 : return err;
1532 : }
1533 :
1534 : #ifdef CONFIG_HUGETLB_PAGE
1535 : /* This function walks within one hugetlb entry in the single call */
1536 : static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
1537 : unsigned long addr, unsigned long end,
1538 : struct mm_walk *walk)
1539 : {
1540 : struct pagemapread *pm = walk->private;
1541 : struct vm_area_struct *vma = walk->vma;
1542 : u64 flags = 0, frame = 0;
1543 : int err = 0;
1544 : pte_t pte;
1545 :
1546 : if (vma->vm_flags & VM_SOFTDIRTY)
1547 : flags |= PM_SOFT_DIRTY;
1548 :
1549 : pte = huge_ptep_get(ptep);
1550 : if (pte_present(pte)) {
1551 : struct page *page = pte_page(pte);
1552 :
1553 : if (!PageAnon(page))
1554 : flags |= PM_FILE;
1555 :
1556 : if (page_mapcount(page) == 1)
1557 : flags |= PM_MMAP_EXCLUSIVE;
1558 :
1559 : flags |= PM_PRESENT;
1560 : if (pm->show_pfn)
1561 : frame = pte_pfn(pte) +
1562 : ((addr & ~hmask) >> PAGE_SHIFT);
1563 : }
1564 :
1565 : for (; addr != end; addr += PAGE_SIZE) {
1566 : pagemap_entry_t pme = make_pme(frame, flags);
1567 :
1568 : err = add_to_pagemap(addr, &pme, pm);
1569 : if (err)
1570 : return err;
1571 : if (pm->show_pfn && (flags & PM_PRESENT))
1572 : frame++;
1573 : }
1574 :
1575 : cond_resched();
1576 :
1577 : return err;
1578 : }
1579 : #else
1580 : #define pagemap_hugetlb_range NULL
1581 : #endif /* HUGETLB_PAGE */
1582 :
1583 : static const struct mm_walk_ops pagemap_ops = {
1584 : .pmd_entry = pagemap_pmd_range,
1585 : .pte_hole = pagemap_pte_hole,
1586 : .hugetlb_entry = pagemap_hugetlb_range,
1587 : };
1588 :
1589 : /*
1590 : * /proc/pid/pagemap - an array mapping virtual pages to pfns
1591 : *
1592 : * For each page in the address space, this file contains one 64-bit entry
1593 : * consisting of the following:
1594 : *
1595 : * Bits 0-54 page frame number (PFN) if present
1596 : * Bits 0-4 swap type if swapped
1597 : * Bits 5-54 swap offset if swapped
1598 : * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
1599 : * Bit 56 page exclusively mapped
1600 : * Bit 57 pte is uffd-wp write-protected
1601 : * Bits 58-60 zero
1602 : * Bit 61 page is file-page or shared-anon
1603 : * Bit 62 page swapped
1604 : * Bit 63 page present
1605 : *
1606 : * If the page is not present but in swap, then the PFN contains an
1607 : * encoding of the swap file number and the page's offset into the
1608 : * swap. Unmapped pages return a null PFN. This allows determining
1609 : * precisely which pages are mapped (or in swap) and comparing mapped
1610 : * pages between processes.
1611 : *
1612 : * Efficient users of this interface will use /proc/pid/maps to
1613 : * determine which areas of memory are actually mapped and llseek to
1614 : * skip over unmapped regions.
1615 : */
1616 0 : static ssize_t pagemap_read(struct file *file, char __user *buf,
1617 : size_t count, loff_t *ppos)
1618 : {
1619 0 : struct mm_struct *mm = file->private_data;
1620 : struct pagemapread pm;
1621 : unsigned long src;
1622 : unsigned long svpfn;
1623 : unsigned long start_vaddr;
1624 : unsigned long end_vaddr;
1625 0 : int ret = 0, copied = 0;
1626 :
1627 0 : if (!mm || !mmget_not_zero(mm))
1628 : goto out;
1629 :
1630 0 : ret = -EINVAL;
1631 : /* file position must be aligned */
1632 0 : if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
1633 : goto out_mm;
1634 :
1635 0 : ret = 0;
1636 0 : if (!count)
1637 : goto out_mm;
1638 :
1639 : /* do not disclose physical addresses: attack vector */
1640 0 : pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
1641 :
1642 0 : pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1643 0 : pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
1644 0 : ret = -ENOMEM;
1645 0 : if (!pm.buffer)
1646 : goto out_mm;
1647 :
1648 0 : src = *ppos;
1649 0 : svpfn = src / PM_ENTRY_BYTES;
1650 0 : end_vaddr = mm->task_size;
1651 :
1652 : /* watch out for wraparound */
1653 0 : start_vaddr = end_vaddr;
1654 0 : if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
1655 0 : start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
1656 :
1657 : /* Ensure the address is inside the task */
1658 0 : if (start_vaddr > mm->task_size)
1659 0 : start_vaddr = end_vaddr;
1660 :
1661 : /*
1662 : * The odds are that this will stop walking way
1663 : * before end_vaddr, because the length of the
1664 : * user buffer is tracked in "pm", and the walk
1665 : * will stop when we hit the end of the buffer.
1666 : */
1667 : ret = 0;
1668 0 : while (count && (start_vaddr < end_vaddr)) {
1669 : int len;
1670 : unsigned long end;
1671 :
1672 0 : pm.pos = 0;
1673 0 : end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
1674 : /* overflow ? */
1675 0 : if (end < start_vaddr || end > end_vaddr)
1676 0 : end = end_vaddr;
1677 0 : ret = mmap_read_lock_killable(mm);
1678 0 : if (ret)
1679 : goto out_free;
1680 0 : ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
1681 0 : mmap_read_unlock(mm);
1682 0 : start_vaddr = end;
1683 :
1684 0 : len = min(count, PM_ENTRY_BYTES * pm.pos);
1685 0 : if (copy_to_user(buf, pm.buffer, len)) {
1686 : ret = -EFAULT;
1687 : goto out_free;
1688 : }
1689 0 : copied += len;
1690 0 : buf += len;
1691 0 : count -= len;
1692 : }
1693 0 : *ppos += copied;
1694 0 : if (!ret || ret == PM_END_OF_BUFFER)
1695 0 : ret = copied;
1696 :
1697 : out_free:
1698 0 : kfree(pm.buffer);
1699 : out_mm:
1700 0 : mmput(mm);
1701 : out:
1702 0 : return ret;
1703 : }
1704 :
1705 0 : static int pagemap_open(struct inode *inode, struct file *file)
1706 : {
1707 : struct mm_struct *mm;
1708 :
1709 0 : mm = proc_mem_open(inode, PTRACE_MODE_READ);
1710 0 : if (IS_ERR(mm))
1711 0 : return PTR_ERR(mm);
1712 0 : file->private_data = mm;
1713 0 : return 0;
1714 : }
1715 :
1716 0 : static int pagemap_release(struct inode *inode, struct file *file)
1717 : {
1718 0 : struct mm_struct *mm = file->private_data;
1719 :
1720 0 : if (mm)
1721 : mmdrop(mm);
1722 0 : return 0;
1723 : }
1724 :
1725 : const struct file_operations proc_pagemap_operations = {
1726 : .llseek = mem_lseek, /* borrow this */
1727 : .read = pagemap_read,
1728 : .open = pagemap_open,
1729 : .release = pagemap_release,
1730 : };
1731 : #endif /* CONFIG_PROC_PAGE_MONITOR */
1732 :
1733 : #ifdef CONFIG_NUMA
1734 :
1735 : struct numa_maps {
1736 : unsigned long pages;
1737 : unsigned long anon;
1738 : unsigned long active;
1739 : unsigned long writeback;
1740 : unsigned long mapcount_max;
1741 : unsigned long dirty;
1742 : unsigned long swapcache;
1743 : unsigned long node[MAX_NUMNODES];
1744 : };
1745 :
1746 : struct numa_maps_private {
1747 : struct proc_maps_private proc_maps;
1748 : struct numa_maps md;
1749 : };
1750 :
1751 : static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
1752 : unsigned long nr_pages)
1753 : {
1754 : int count = page_mapcount(page);
1755 :
1756 : md->pages += nr_pages;
1757 : if (pte_dirty || PageDirty(page))
1758 : md->dirty += nr_pages;
1759 :
1760 : if (PageSwapCache(page))
1761 : md->swapcache += nr_pages;
1762 :
1763 : if (PageActive(page) || PageUnevictable(page))
1764 : md->active += nr_pages;
1765 :
1766 : if (PageWriteback(page))
1767 : md->writeback += nr_pages;
1768 :
1769 : if (PageAnon(page))
1770 : md->anon += nr_pages;
1771 :
1772 : if (count > md->mapcount_max)
1773 : md->mapcount_max = count;
1774 :
1775 : md->node[page_to_nid(page)] += nr_pages;
1776 : }
1777 :
1778 : static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
1779 : unsigned long addr)
1780 : {
1781 : struct page *page;
1782 : int nid;
1783 :
1784 : if (!pte_present(pte))
1785 : return NULL;
1786 :
1787 : page = vm_normal_page(vma, addr, pte);
1788 : if (!page)
1789 : return NULL;
1790 :
1791 : if (PageReserved(page))
1792 : return NULL;
1793 :
1794 : nid = page_to_nid(page);
1795 : if (!node_isset(nid, node_states[N_MEMORY]))
1796 : return NULL;
1797 :
1798 : return page;
1799 : }
1800 :
1801 : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1802 : static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
1803 : struct vm_area_struct *vma,
1804 : unsigned long addr)
1805 : {
1806 : struct page *page;
1807 : int nid;
1808 :
1809 : if (!pmd_present(pmd))
1810 : return NULL;
1811 :
1812 : page = vm_normal_page_pmd(vma, addr, pmd);
1813 : if (!page)
1814 : return NULL;
1815 :
1816 : if (PageReserved(page))
1817 : return NULL;
1818 :
1819 : nid = page_to_nid(page);
1820 : if (!node_isset(nid, node_states[N_MEMORY]))
1821 : return NULL;
1822 :
1823 : return page;
1824 : }
1825 : #endif
1826 :
1827 : static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1828 : unsigned long end, struct mm_walk *walk)
1829 : {
1830 : struct numa_maps *md = walk->private;
1831 : struct vm_area_struct *vma = walk->vma;
1832 : spinlock_t *ptl;
1833 : pte_t *orig_pte;
1834 : pte_t *pte;
1835 :
1836 : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1837 : ptl = pmd_trans_huge_lock(pmd, vma);
1838 : if (ptl) {
1839 : struct page *page;
1840 :
1841 : page = can_gather_numa_stats_pmd(*pmd, vma, addr);
1842 : if (page)
1843 : gather_stats(page, md, pmd_dirty(*pmd),
1844 : HPAGE_PMD_SIZE/PAGE_SIZE);
1845 : spin_unlock(ptl);
1846 : return 0;
1847 : }
1848 :
1849 : if (pmd_trans_unstable(pmd))
1850 : return 0;
1851 : #endif
1852 : orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
1853 : do {
1854 : struct page *page = can_gather_numa_stats(*pte, vma, addr);
1855 : if (!page)
1856 : continue;
1857 : gather_stats(page, md, pte_dirty(*pte), 1);
1858 :
1859 : } while (pte++, addr += PAGE_SIZE, addr != end);
1860 : pte_unmap_unlock(orig_pte, ptl);
1861 : cond_resched();
1862 : return 0;
1863 : }
1864 : #ifdef CONFIG_HUGETLB_PAGE
1865 : static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1866 : unsigned long addr, unsigned long end, struct mm_walk *walk)
1867 : {
1868 : pte_t huge_pte = huge_ptep_get(pte);
1869 : struct numa_maps *md;
1870 : struct page *page;
1871 :
1872 : if (!pte_present(huge_pte))
1873 : return 0;
1874 :
1875 : page = pte_page(huge_pte);
1876 : if (!page)
1877 : return 0;
1878 :
1879 : md = walk->private;
1880 : gather_stats(page, md, pte_dirty(huge_pte), 1);
1881 : return 0;
1882 : }
1883 :
1884 : #else
1885 : static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1886 : unsigned long addr, unsigned long end, struct mm_walk *walk)
1887 : {
1888 : return 0;
1889 : }
1890 : #endif
1891 :
1892 : static const struct mm_walk_ops show_numa_ops = {
1893 : .hugetlb_entry = gather_hugetlb_stats,
1894 : .pmd_entry = gather_pte_stats,
1895 : };
1896 :
1897 : /*
1898 : * Display pages allocated per node and memory policy via /proc.
1899 : */
1900 : static int show_numa_map(struct seq_file *m, void *v)
1901 : {
1902 : struct numa_maps_private *numa_priv = m->private;
1903 : struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
1904 : struct vm_area_struct *vma = v;
1905 : struct numa_maps *md = &numa_priv->md;
1906 : struct file *file = vma->vm_file;
1907 : struct mm_struct *mm = vma->vm_mm;
1908 : struct mempolicy *pol;
1909 : char buffer[64];
1910 : int nid;
1911 :
1912 : if (!mm)
1913 : return 0;
1914 :
1915 : /* Ensure we start with an empty set of numa_maps statistics. */
1916 : memset(md, 0, sizeof(*md));
1917 :
1918 : pol = __get_vma_policy(vma, vma->vm_start);
1919 : if (pol) {
1920 : mpol_to_str(buffer, sizeof(buffer), pol);
1921 : mpol_cond_put(pol);
1922 : } else {
1923 : mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy);
1924 : }
1925 :
1926 : seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1927 :
1928 : if (file) {
1929 : seq_puts(m, " file=");
1930 : seq_file_path(m, file, "\n\t= ");
1931 : } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1932 : seq_puts(m, " heap");
1933 : } else if (is_stack(vma)) {
1934 : seq_puts(m, " stack");
1935 : }
1936 :
1937 : if (is_vm_hugetlb_page(vma))
1938 : seq_puts(m, " huge");
1939 :
1940 : /* mmap_lock is held by m_start */
1941 : walk_page_vma(vma, &show_numa_ops, md);
1942 :
1943 : if (!md->pages)
1944 : goto out;
1945 :
1946 : if (md->anon)
1947 : seq_printf(m, " anon=%lu", md->anon);
1948 :
1949 : if (md->dirty)
1950 : seq_printf(m, " dirty=%lu", md->dirty);
1951 :
1952 : if (md->pages != md->anon && md->pages != md->dirty)
1953 : seq_printf(m, " mapped=%lu", md->pages);
1954 :
1955 : if (md->mapcount_max > 1)
1956 : seq_printf(m, " mapmax=%lu", md->mapcount_max);
1957 :
1958 : if (md->swapcache)
1959 : seq_printf(m, " swapcache=%lu", md->swapcache);
1960 :
1961 : if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1962 : seq_printf(m, " active=%lu", md->active);
1963 :
1964 : if (md->writeback)
1965 : seq_printf(m, " writeback=%lu", md->writeback);
1966 :
1967 : for_each_node_state(nid, N_MEMORY)
1968 : if (md->node[nid])
1969 : seq_printf(m, " N%d=%lu", nid, md->node[nid]);
1970 :
1971 : seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
1972 : out:
1973 : seq_putc(m, '\n');
1974 : return 0;
1975 : }
1976 :
1977 : static const struct seq_operations proc_pid_numa_maps_op = {
1978 : .start = m_start,
1979 : .next = m_next,
1980 : .stop = m_stop,
1981 : .show = show_numa_map,
1982 : };
1983 :
1984 : static int pid_numa_maps_open(struct inode *inode, struct file *file)
1985 : {
1986 : return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
1987 : sizeof(struct numa_maps_private));
1988 : }
1989 :
1990 : const struct file_operations proc_pid_numa_maps_operations = {
1991 : .open = pid_numa_maps_open,
1992 : .read = seq_read,
1993 : .llseek = seq_lseek,
1994 : .release = proc_map_release,
1995 : };
1996 :
1997 : #endif /* CONFIG_NUMA */
|