Line data Source code
1 : #include <linux/gfp.h> 2 : #include <linux/highmem.h> 3 : #include <linux/kernel.h> 4 : #include <linux/mmdebug.h> 5 : #include <linux/mm_types.h> 6 : #include <linux/mm_inline.h> 7 : #include <linux/pagemap.h> 8 : #include <linux/rcupdate.h> 9 : #include <linux/smp.h> 10 : #include <linux/swap.h> 11 : 12 : #include <asm/pgalloc.h> 13 : #include <asm/tlb.h> 14 : 15 : #ifndef CONFIG_MMU_GATHER_NO_GATHER 16 : 17 0 : static bool tlb_next_batch(struct mmu_gather *tlb) 18 : { 19 : struct mmu_gather_batch *batch; 20 : 21 0 : batch = tlb->active; 22 0 : if (batch->next) { 23 0 : tlb->active = batch->next; 24 0 : return true; 25 : } 26 : 27 0 : if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) 28 : return false; 29 : 30 0 : batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); 31 0 : if (!batch) 32 : return false; 33 : 34 0 : tlb->batch_count++; 35 0 : batch->next = NULL; 36 0 : batch->nr = 0; 37 0 : batch->max = MAX_GATHER_BATCH; 38 : 39 0 : tlb->active->next = batch; 40 0 : tlb->active = batch; 41 : 42 0 : return true; 43 : } 44 : 45 0 : static void tlb_batch_pages_flush(struct mmu_gather *tlb) 46 : { 47 : struct mmu_gather_batch *batch; 48 : 49 0 : for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { 50 0 : free_pages_and_swap_cache(batch->pages, batch->nr); 51 0 : batch->nr = 0; 52 : } 53 0 : tlb->active = &tlb->local; 54 0 : } 55 : 56 : static void tlb_batch_list_free(struct mmu_gather *tlb) 57 : { 58 : struct mmu_gather_batch *batch, *next; 59 : 60 0 : for (batch = tlb->local.next; batch; batch = next) { 61 0 : next = batch->next; 62 0 : free_pages((unsigned long)batch, 0); 63 : } 64 0 : tlb->local.next = NULL; 65 : } 66 : 67 0 : bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) 68 : { 69 : struct mmu_gather_batch *batch; 70 : 71 : VM_BUG_ON(!tlb->end); 72 : 73 : #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 74 : VM_WARN_ON(tlb->page_size != page_size); 75 : #endif 76 : 77 0 : batch = tlb->active; 78 : /* 79 : * Add the page and check if we are full. If so 80 : * force a flush. 81 : */ 82 0 : batch->pages[batch->nr++] = page; 83 0 : if (batch->nr == batch->max) { 84 0 : if (!tlb_next_batch(tlb)) 85 : return true; 86 0 : batch = tlb->active; 87 : } 88 : VM_BUG_ON_PAGE(batch->nr > batch->max, page); 89 : 90 : return false; 91 : } 92 : 93 : #endif /* MMU_GATHER_NO_GATHER */ 94 : 95 : #ifdef CONFIG_MMU_GATHER_TABLE_FREE 96 : 97 : static void __tlb_remove_table_free(struct mmu_table_batch *batch) 98 : { 99 : int i; 100 : 101 : for (i = 0; i < batch->nr; i++) 102 : __tlb_remove_table(batch->tables[i]); 103 : 104 : free_page((unsigned long)batch); 105 : } 106 : 107 : #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE 108 : 109 : /* 110 : * Semi RCU freeing of the page directories. 111 : * 112 : * This is needed by some architectures to implement software pagetable walkers. 113 : * 114 : * gup_fast() and other software pagetable walkers do a lockless page-table 115 : * walk and therefore needs some synchronization with the freeing of the page 116 : * directories. The chosen means to accomplish that is by disabling IRQs over 117 : * the walk. 118 : * 119 : * Architectures that use IPIs to flush TLBs will then automagically DTRT, 120 : * since we unlink the page, flush TLBs, free the page. Since the disabling of 121 : * IRQs delays the completion of the TLB flush we can never observe an already 122 : * freed page. 123 : * 124 : * Architectures that do not have this (PPC) need to delay the freeing by some 125 : * other means, this is that means. 126 : * 127 : * What we do is batch the freed directory pages (tables) and RCU free them. 128 : * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling 129 : * holds off grace periods. 130 : * 131 : * However, in order to batch these pages we need to allocate storage, this 132 : * allocation is deep inside the MM code and can thus easily fail on memory 133 : * pressure. To guarantee progress we fall back to single table freeing, see 134 : * the implementation of tlb_remove_table_one(). 135 : * 136 : */ 137 : 138 : static void tlb_remove_table_smp_sync(void *arg) 139 : { 140 : /* Simply deliver the interrupt */ 141 : } 142 : 143 : static void tlb_remove_table_sync_one(void) 144 : { 145 : /* 146 : * This isn't an RCU grace period and hence the page-tables cannot be 147 : * assumed to be actually RCU-freed. 148 : * 149 : * It is however sufficient for software page-table walkers that rely on 150 : * IRQ disabling. 151 : */ 152 : smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 153 : } 154 : 155 : static void tlb_remove_table_rcu(struct rcu_head *head) 156 : { 157 : __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu)); 158 : } 159 : 160 : static void tlb_remove_table_free(struct mmu_table_batch *batch) 161 : { 162 : call_rcu(&batch->rcu, tlb_remove_table_rcu); 163 : } 164 : 165 : #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 166 : 167 : static void tlb_remove_table_sync_one(void) { } 168 : 169 : static void tlb_remove_table_free(struct mmu_table_batch *batch) 170 : { 171 : __tlb_remove_table_free(batch); 172 : } 173 : 174 : #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ 175 : 176 : /* 177 : * If we want tlb_remove_table() to imply TLB invalidates. 178 : */ 179 : static inline void tlb_table_invalidate(struct mmu_gather *tlb) 180 : { 181 : if (tlb_needs_table_invalidate()) { 182 : /* 183 : * Invalidate page-table caches used by hardware walkers. Then 184 : * we still need to RCU-sched wait while freeing the pages 185 : * because software walkers can still be in-flight. 186 : */ 187 : tlb_flush_mmu_tlbonly(tlb); 188 : } 189 : } 190 : 191 : static void tlb_remove_table_one(void *table) 192 : { 193 : tlb_remove_table_sync_one(); 194 : __tlb_remove_table(table); 195 : } 196 : 197 : static void tlb_table_flush(struct mmu_gather *tlb) 198 : { 199 : struct mmu_table_batch **batch = &tlb->batch; 200 : 201 : if (*batch) { 202 : tlb_table_invalidate(tlb); 203 : tlb_remove_table_free(*batch); 204 : *batch = NULL; 205 : } 206 : } 207 : 208 : void tlb_remove_table(struct mmu_gather *tlb, void *table) 209 : { 210 : struct mmu_table_batch **batch = &tlb->batch; 211 : 212 : if (*batch == NULL) { 213 : *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); 214 : if (*batch == NULL) { 215 : tlb_table_invalidate(tlb); 216 : tlb_remove_table_one(table); 217 : return; 218 : } 219 : (*batch)->nr = 0; 220 : } 221 : 222 : (*batch)->tables[(*batch)->nr++] = table; 223 : if ((*batch)->nr == MAX_TABLE_BATCH) 224 : tlb_table_flush(tlb); 225 : } 226 : 227 : static inline void tlb_table_init(struct mmu_gather *tlb) 228 : { 229 : tlb->batch = NULL; 230 : } 231 : 232 : #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ 233 : 234 : static inline void tlb_table_flush(struct mmu_gather *tlb) { } 235 : static inline void tlb_table_init(struct mmu_gather *tlb) { } 236 : 237 : #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ 238 : 239 : static void tlb_flush_mmu_free(struct mmu_gather *tlb) 240 : { 241 0 : tlb_table_flush(tlb); 242 : #ifndef CONFIG_MMU_GATHER_NO_GATHER 243 0 : tlb_batch_pages_flush(tlb); 244 : #endif 245 : } 246 : 247 0 : void tlb_flush_mmu(struct mmu_gather *tlb) 248 : { 249 0 : tlb_flush_mmu_tlbonly(tlb); 250 0 : tlb_flush_mmu_free(tlb); 251 0 : } 252 : 253 : static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, 254 : bool fullmm) 255 : { 256 0 : tlb->mm = mm; 257 0 : tlb->fullmm = fullmm; 258 : 259 : #ifndef CONFIG_MMU_GATHER_NO_GATHER 260 0 : tlb->need_flush_all = 0; 261 0 : tlb->local.next = NULL; 262 0 : tlb->local.nr = 0; 263 0 : tlb->local.max = ARRAY_SIZE(tlb->__pages); 264 0 : tlb->active = &tlb->local; 265 0 : tlb->batch_count = 0; 266 : #endif 267 : 268 0 : tlb_table_init(tlb); 269 : #ifdef CONFIG_MMU_GATHER_PAGE_SIZE 270 : tlb->page_size = 0; 271 : #endif 272 : 273 0 : __tlb_reset_range(tlb); 274 0 : inc_tlb_flush_pending(tlb->mm); 275 : } 276 : 277 : /** 278 : * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down 279 : * @tlb: the mmu_gather structure to initialize 280 : * @mm: the mm_struct of the target address space 281 : * 282 : * Called to initialize an (on-stack) mmu_gather structure for page-table 283 : * tear-down from @mm. 284 : */ 285 0 : void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) 286 : { 287 0 : __tlb_gather_mmu(tlb, mm, false); 288 0 : } 289 : 290 : /** 291 : * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down 292 : * @tlb: the mmu_gather structure to initialize 293 : * @mm: the mm_struct of the target address space 294 : * 295 : * In this case, @mm is without users and we're going to destroy the 296 : * full address space (exit/execve). 297 : * 298 : * Called to initialize an (on-stack) mmu_gather structure for page-table 299 : * tear-down from @mm. 300 : */ 301 0 : void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) 302 : { 303 0 : __tlb_gather_mmu(tlb, mm, true); 304 0 : } 305 : 306 : /** 307 : * tlb_finish_mmu - finish an mmu_gather structure 308 : * @tlb: the mmu_gather structure to finish 309 : * 310 : * Called at the end of the shootdown operation to free up any resources that 311 : * were required. 312 : */ 313 0 : void tlb_finish_mmu(struct mmu_gather *tlb) 314 : { 315 : /* 316 : * If there are parallel threads are doing PTE changes on same range 317 : * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB 318 : * flush by batching, one thread may end up seeing inconsistent PTEs 319 : * and result in having stale TLB entries. So flush TLB forcefully 320 : * if we detect parallel PTE batching threads. 321 : * 322 : * However, some syscalls, e.g. munmap(), may free page tables, this 323 : * needs force flush everything in the given range. Otherwise this 324 : * may result in having stale TLB entries for some architectures, 325 : * e.g. aarch64, that could specify flush what level TLB. 326 : */ 327 0 : if (mm_tlb_flush_nested(tlb->mm)) { 328 : /* 329 : * The aarch64 yields better performance with fullmm by 330 : * avoiding multiple CPUs spamming TLBI messages at the 331 : * same time. 332 : * 333 : * On x86 non-fullmm doesn't yield significant difference 334 : * against fullmm. 335 : */ 336 0 : tlb->fullmm = 1; 337 0 : __tlb_reset_range(tlb); 338 0 : tlb->freed_tables = 1; 339 : } 340 : 341 0 : tlb_flush_mmu(tlb); 342 : 343 : #ifndef CONFIG_MMU_GATHER_NO_GATHER 344 0 : tlb_batch_list_free(tlb); 345 : #endif 346 0 : dec_tlb_flush_pending(tlb->mm); 347 0 : }