Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * mm/fadvise.c
4 : *
5 : * Copyright (C) 2002, Linus Torvalds
6 : *
7 : * 11Jan2003 Andrew Morton
8 : * Initial version.
9 : */
10 :
11 : #include <linux/kernel.h>
12 : #include <linux/file.h>
13 : #include <linux/fs.h>
14 : #include <linux/mm.h>
15 : #include <linux/pagemap.h>
16 : #include <linux/backing-dev.h>
17 : #include <linux/pagevec.h>
18 : #include <linux/fadvise.h>
19 : #include <linux/writeback.h>
20 : #include <linux/syscalls.h>
21 : #include <linux/swap.h>
22 :
23 : #include <asm/unistd.h>
24 :
25 : #include "internal.h"
26 :
27 : /*
28 : * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
29 : * deactivate the pages and clear PG_Referenced.
30 : */
31 :
32 0 : int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
33 : {
34 : struct inode *inode;
35 : struct address_space *mapping;
36 : struct backing_dev_info *bdi;
37 : loff_t endbyte; /* inclusive */
38 : pgoff_t start_index;
39 : pgoff_t end_index;
40 : unsigned long nrpages;
41 :
42 0 : inode = file_inode(file);
43 0 : if (S_ISFIFO(inode->i_mode))
44 : return -ESPIPE;
45 :
46 0 : mapping = file->f_mapping;
47 0 : if (!mapping || len < 0)
48 : return -EINVAL;
49 :
50 0 : bdi = inode_to_bdi(mapping->host);
51 :
52 0 : if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
53 0 : switch (advice) {
54 : case POSIX_FADV_NORMAL:
55 : case POSIX_FADV_RANDOM:
56 : case POSIX_FADV_SEQUENTIAL:
57 : case POSIX_FADV_WILLNEED:
58 : case POSIX_FADV_NOREUSE:
59 : case POSIX_FADV_DONTNEED:
60 : /* no bad return value, but ignore advice */
61 : break;
62 : default:
63 : return -EINVAL;
64 : }
65 0 : return 0;
66 : }
67 :
68 : /*
69 : * Careful about overflows. Len == 0 means "as much as possible". Use
70 : * unsigned math because signed overflows are undefined and UBSan
71 : * complains.
72 : */
73 0 : endbyte = (u64)offset + (u64)len;
74 0 : if (!len || endbyte < len)
75 : endbyte = -1;
76 : else
77 0 : endbyte--; /* inclusive */
78 :
79 0 : switch (advice) {
80 : case POSIX_FADV_NORMAL:
81 0 : file->f_ra.ra_pages = bdi->ra_pages;
82 0 : spin_lock(&file->f_lock);
83 0 : file->f_mode &= ~FMODE_RANDOM;
84 0 : spin_unlock(&file->f_lock);
85 : break;
86 : case POSIX_FADV_RANDOM:
87 0 : spin_lock(&file->f_lock);
88 0 : file->f_mode |= FMODE_RANDOM;
89 0 : spin_unlock(&file->f_lock);
90 : break;
91 : case POSIX_FADV_SEQUENTIAL:
92 0 : file->f_ra.ra_pages = bdi->ra_pages * 2;
93 0 : spin_lock(&file->f_lock);
94 0 : file->f_mode &= ~FMODE_RANDOM;
95 0 : spin_unlock(&file->f_lock);
96 : break;
97 : case POSIX_FADV_WILLNEED:
98 : /* First and last PARTIAL page! */
99 0 : start_index = offset >> PAGE_SHIFT;
100 0 : end_index = endbyte >> PAGE_SHIFT;
101 :
102 : /* Careful about overflow on the "+1" */
103 0 : nrpages = end_index - start_index + 1;
104 0 : if (!nrpages)
105 0 : nrpages = ~0UL;
106 :
107 : force_page_cache_readahead(mapping, file, start_index, nrpages);
108 : break;
109 : case POSIX_FADV_NOREUSE:
110 : break;
111 : case POSIX_FADV_DONTNEED:
112 0 : __filemap_fdatawrite_range(mapping, offset, endbyte,
113 : WB_SYNC_NONE);
114 :
115 : /*
116 : * First and last FULL page! Partial pages are deliberately
117 : * preserved on the expectation that it is better to preserve
118 : * needed memory than to discard unneeded memory.
119 : */
120 0 : start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
121 0 : end_index = (endbyte >> PAGE_SHIFT);
122 : /*
123 : * The page at end_index will be inclusively discarded according
124 : * by invalidate_mapping_pages(), so subtracting 1 from
125 : * end_index means we will skip the last page. But if endbyte
126 : * is page aligned or is at the end of file, we should not skip
127 : * that page - discarding the last page is safe enough.
128 : */
129 0 : if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
130 0 : endbyte != inode->i_size - 1) {
131 : /* First page is tricky as 0 - 1 = -1, but pgoff_t
132 : * is unsigned, so the end_index >= start_index
133 : * check below would be true and we'll discard the whole
134 : * file cache which is not what was asked.
135 : */
136 0 : if (end_index == 0)
137 : break;
138 :
139 0 : end_index--;
140 : }
141 :
142 0 : if (end_index >= start_index) {
143 0 : unsigned long nr_pagevec = 0;
144 :
145 : /*
146 : * It's common to FADV_DONTNEED right after
147 : * the read or write that instantiates the
148 : * pages, in which case there will be some
149 : * sitting on the local LRU cache. Try to
150 : * avoid the expensive remote drain and the
151 : * second cache tree walk below by flushing
152 : * them out right away.
153 : */
154 0 : lru_add_drain();
155 :
156 0 : invalidate_mapping_pagevec(mapping,
157 : start_index, end_index,
158 : &nr_pagevec);
159 :
160 : /*
161 : * If fewer pages were invalidated than expected then
162 : * it is possible that some of the pages were on
163 : * a per-cpu pagevec for a remote CPU. Drain all
164 : * pagevecs and try again.
165 : */
166 0 : if (nr_pagevec) {
167 0 : lru_add_drain_all();
168 0 : invalidate_mapping_pages(mapping, start_index,
169 : end_index);
170 : }
171 : }
172 : break;
173 : default:
174 : return -EINVAL;
175 : }
176 : return 0;
177 : }
178 : EXPORT_SYMBOL(generic_fadvise);
179 :
180 0 : int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
181 : {
182 0 : if (file->f_op->fadvise)
183 0 : return file->f_op->fadvise(file, offset, len, advice);
184 :
185 0 : return generic_fadvise(file, offset, len, advice);
186 : }
187 : EXPORT_SYMBOL(vfs_fadvise);
188 :
189 : #ifdef CONFIG_ADVISE_SYSCALLS
190 :
191 0 : int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
192 : {
193 0 : struct fd f = fdget(fd);
194 : int ret;
195 :
196 0 : if (!f.file)
197 : return -EBADF;
198 :
199 0 : ret = vfs_fadvise(f.file, offset, len, advice);
200 :
201 0 : fdput(f);
202 : return ret;
203 : }
204 :
205 0 : SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
206 : {
207 0 : return ksys_fadvise64_64(fd, offset, len, advice);
208 : }
209 :
210 : #ifdef __ARCH_WANT_SYS_FADVISE64
211 :
212 0 : SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
213 : {
214 0 : return ksys_fadvise64_64(fd, offset, len, advice);
215 : }
216 :
217 : #endif
218 : #endif
|