11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * mm/truncate.c - code for taking down pages from address_spaces 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2002, Linus Torvalds 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * 10Sep2002 akpm@zip.com.au 71da177e4SLinus Torvalds * Initial version. 81da177e4SLinus Torvalds */ 91da177e4SLinus Torvalds 101da177e4SLinus Torvalds #include <linux/kernel.h> 111da177e4SLinus Torvalds #include <linux/mm.h> 121da177e4SLinus Torvalds #include <linux/module.h> 131da177e4SLinus Torvalds #include <linux/pagemap.h> 141da177e4SLinus Torvalds #include <linux/pagevec.h> 151da177e4SLinus Torvalds #include <linux/buffer_head.h> /* grr. try_to_release_page, 161da177e4SLinus Torvalds block_invalidatepage */ 171da177e4SLinus Torvalds 181da177e4SLinus Torvalds 191da177e4SLinus Torvalds static int do_invalidatepage(struct page *page, unsigned long offset) 201da177e4SLinus Torvalds { 211da177e4SLinus Torvalds int (*invalidatepage)(struct page *, unsigned long); 221da177e4SLinus Torvalds invalidatepage = page->mapping->a_ops->invalidatepage; 231da177e4SLinus Torvalds if (invalidatepage == NULL) 241da177e4SLinus Torvalds invalidatepage = block_invalidatepage; 251da177e4SLinus Torvalds return (*invalidatepage)(page, offset); 261da177e4SLinus Torvalds } 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds static inline void truncate_partial_page(struct page *page, unsigned partial) 291da177e4SLinus Torvalds { 301da177e4SLinus Torvalds memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); 311da177e4SLinus Torvalds if (PagePrivate(page)) 321da177e4SLinus Torvalds do_invalidatepage(page, partial); 331da177e4SLinus Torvalds } 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds /* 361da177e4SLinus Torvalds * If truncate cannot remove the fs-private metadata from the page, the page 371da177e4SLinus Torvalds * becomes anonymous. It will be left on the LRU and may even be mapped into 381da177e4SLinus Torvalds * user pagetables if we're racing with filemap_nopage(). 391da177e4SLinus Torvalds * 401da177e4SLinus Torvalds * We need to bale out if page->mapping is no longer equal to the original 411da177e4SLinus Torvalds * mapping. This happens a) when the VM reclaimed the page while we waited on 421da177e4SLinus Torvalds * its lock, b) when a concurrent invalidate_inode_pages got there first and 431da177e4SLinus Torvalds * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 441da177e4SLinus Torvalds */ 451da177e4SLinus Torvalds static void 461da177e4SLinus Torvalds truncate_complete_page(struct address_space *mapping, struct page *page) 471da177e4SLinus Torvalds { 481da177e4SLinus Torvalds if (page->mapping != mapping) 491da177e4SLinus Torvalds return; 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds if (PagePrivate(page)) 521da177e4SLinus Torvalds do_invalidatepage(page, 0); 531da177e4SLinus Torvalds 541da177e4SLinus Torvalds clear_page_dirty(page); 551da177e4SLinus Torvalds ClearPageUptodate(page); 561da177e4SLinus Torvalds ClearPageMappedToDisk(page); 571da177e4SLinus Torvalds remove_from_page_cache(page); 581da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 591da177e4SLinus Torvalds } 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds /* 621da177e4SLinus Torvalds * This is for invalidate_inode_pages(). That function can be called at 631da177e4SLinus Torvalds * any time, and is not supposed to throw away dirty pages. But pages can 641da177e4SLinus Torvalds * be marked dirty at any time too. So we re-check the dirtiness inside 651da177e4SLinus Torvalds * ->tree_lock. That provides exclusion against the __set_page_dirty 661da177e4SLinus Torvalds * functions. 671da177e4SLinus Torvalds * 681da177e4SLinus Torvalds * Returns non-zero if the page was successfully invalidated. 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds static int 711da177e4SLinus Torvalds invalidate_complete_page(struct address_space *mapping, struct page *page) 721da177e4SLinus Torvalds { 731da177e4SLinus Torvalds if (page->mapping != mapping) 741da177e4SLinus Torvalds return 0; 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds if (PagePrivate(page) && !try_to_release_page(page, 0)) 771da177e4SLinus Torvalds return 0; 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds write_lock_irq(&mapping->tree_lock); 801da177e4SLinus Torvalds if (PageDirty(page)) { 811da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 821da177e4SLinus Torvalds return 0; 831da177e4SLinus Torvalds } 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds BUG_ON(PagePrivate(page)); 861da177e4SLinus Torvalds __remove_from_page_cache(page); 871da177e4SLinus Torvalds write_unlock_irq(&mapping->tree_lock); 881da177e4SLinus Torvalds ClearPageUptodate(page); 891da177e4SLinus Torvalds page_cache_release(page); /* pagecache ref */ 901da177e4SLinus Torvalds return 1; 911da177e4SLinus Torvalds } 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds /** 941da177e4SLinus Torvalds * truncate_inode_pages - truncate *all* the pages from an offset 951da177e4SLinus Torvalds * @mapping: mapping to truncate 961da177e4SLinus Torvalds * @lstart: offset from which to truncate 971da177e4SLinus Torvalds * 981da177e4SLinus Torvalds * Truncate the page cache at a set offset, removing the pages that are beyond 991da177e4SLinus Torvalds * that offset (and zeroing out partial pages). 1001da177e4SLinus Torvalds * 1011da177e4SLinus Torvalds * Truncate takes two passes - the first pass is nonblocking. It will not 1021da177e4SLinus Torvalds * block on page locks and it will not block on writeback. The second pass 1031da177e4SLinus Torvalds * will wait. This is to prevent as much IO as possible in the affected region. 1041da177e4SLinus Torvalds * The first pass will remove most pages, so the search cost of the second pass 1051da177e4SLinus Torvalds * is low. 1061da177e4SLinus Torvalds * 1071da177e4SLinus Torvalds * When looking at page->index outside the page lock we need to be careful to 1081da177e4SLinus Torvalds * copy it into a local to avoid races (it could change at any time). 1091da177e4SLinus Torvalds * 1101da177e4SLinus Torvalds * We pass down the cache-hot hint to the page freeing code. Even if the 1111da177e4SLinus Torvalds * mapping is large, it is probably the case that the final pages are the most 1121da177e4SLinus Torvalds * recently touched, and freeing happens in ascending file offset order. 1131da177e4SLinus Torvalds * 1141da177e4SLinus Torvalds * Called under (and serialised by) inode->i_sem. 1151da177e4SLinus Torvalds */ 1161da177e4SLinus Torvalds void truncate_inode_pages(struct address_space *mapping, loff_t lstart) 1171da177e4SLinus Torvalds { 1181da177e4SLinus Torvalds const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1191da177e4SLinus Torvalds const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 1201da177e4SLinus Torvalds struct pagevec pvec; 1211da177e4SLinus Torvalds pgoff_t next; 1221da177e4SLinus Torvalds int i; 1231da177e4SLinus Torvalds 1241da177e4SLinus Torvalds if (mapping->nrpages == 0) 1251da177e4SLinus Torvalds return; 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds pagevec_init(&pvec, 0); 1281da177e4SLinus Torvalds next = start; 1291da177e4SLinus Torvalds while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1301da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1311da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1321da177e4SLinus Torvalds pgoff_t page_index = page->index; 1331da177e4SLinus Torvalds 1341da177e4SLinus Torvalds if (page_index > next) 1351da177e4SLinus Torvalds next = page_index; 1361da177e4SLinus Torvalds next++; 1371da177e4SLinus Torvalds if (TestSetPageLocked(page)) 1381da177e4SLinus Torvalds continue; 1391da177e4SLinus Torvalds if (PageWriteback(page)) { 1401da177e4SLinus Torvalds unlock_page(page); 1411da177e4SLinus Torvalds continue; 1421da177e4SLinus Torvalds } 1431da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1441da177e4SLinus Torvalds unlock_page(page); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds pagevec_release(&pvec); 1471da177e4SLinus Torvalds cond_resched(); 1481da177e4SLinus Torvalds } 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds if (partial) { 1511da177e4SLinus Torvalds struct page *page = find_lock_page(mapping, start - 1); 1521da177e4SLinus Torvalds if (page) { 1531da177e4SLinus Torvalds wait_on_page_writeback(page); 1541da177e4SLinus Torvalds truncate_partial_page(page, partial); 1551da177e4SLinus Torvalds unlock_page(page); 1561da177e4SLinus Torvalds page_cache_release(page); 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds } 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds next = start; 1611da177e4SLinus Torvalds for ( ; ; ) { 1621da177e4SLinus Torvalds cond_resched(); 1631da177e4SLinus Torvalds if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 1641da177e4SLinus Torvalds if (next == start) 1651da177e4SLinus Torvalds break; 1661da177e4SLinus Torvalds next = start; 1671da177e4SLinus Torvalds continue; 1681da177e4SLinus Torvalds } 1691da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 1701da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds lock_page(page); 1731da177e4SLinus Torvalds wait_on_page_writeback(page); 1741da177e4SLinus Torvalds if (page->index > next) 1751da177e4SLinus Torvalds next = page->index; 1761da177e4SLinus Torvalds next++; 1771da177e4SLinus Torvalds truncate_complete_page(mapping, page); 1781da177e4SLinus Torvalds unlock_page(page); 1791da177e4SLinus Torvalds } 1801da177e4SLinus Torvalds pagevec_release(&pvec); 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds } 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds EXPORT_SYMBOL(truncate_inode_pages); 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds /** 1871da177e4SLinus Torvalds * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode 1881da177e4SLinus Torvalds * @mapping: the address_space which holds the pages to invalidate 1891da177e4SLinus Torvalds * @start: the offset 'from' which to invalidate 1901da177e4SLinus Torvalds * @end: the offset 'to' which to invalidate (inclusive) 1911da177e4SLinus Torvalds * 1921da177e4SLinus Torvalds * This function only removes the unlocked pages, if you want to 1931da177e4SLinus Torvalds * remove all the pages of one inode, you must call truncate_inode_pages. 1941da177e4SLinus Torvalds * 1951da177e4SLinus Torvalds * invalidate_mapping_pages() will not block on IO activity. It will not 1961da177e4SLinus Torvalds * invalidate pages which are dirty, locked, under writeback or mapped into 1971da177e4SLinus Torvalds * pagetables. 1981da177e4SLinus Torvalds */ 1991da177e4SLinus Torvalds unsigned long invalidate_mapping_pages(struct address_space *mapping, 2001da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2011da177e4SLinus Torvalds { 2021da177e4SLinus Torvalds struct pagevec pvec; 2031da177e4SLinus Torvalds pgoff_t next = start; 2041da177e4SLinus Torvalds unsigned long ret = 0; 2051da177e4SLinus Torvalds int i; 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2081da177e4SLinus Torvalds while (next <= end && 2091da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 2101da177e4SLinus Torvalds for (i = 0; i < pagevec_count(&pvec); i++) { 2111da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds if (TestSetPageLocked(page)) { 2141da177e4SLinus Torvalds next++; 2151da177e4SLinus Torvalds continue; 2161da177e4SLinus Torvalds } 2171da177e4SLinus Torvalds if (page->index > next) 2181da177e4SLinus Torvalds next = page->index; 2191da177e4SLinus Torvalds next++; 2201da177e4SLinus Torvalds if (PageDirty(page) || PageWriteback(page)) 2211da177e4SLinus Torvalds goto unlock; 2221da177e4SLinus Torvalds if (page_mapped(page)) 2231da177e4SLinus Torvalds goto unlock; 2241da177e4SLinus Torvalds ret += invalidate_complete_page(mapping, page); 2251da177e4SLinus Torvalds unlock: 2261da177e4SLinus Torvalds unlock_page(page); 2271da177e4SLinus Torvalds if (next > end) 2281da177e4SLinus Torvalds break; 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds pagevec_release(&pvec); 2311da177e4SLinus Torvalds cond_resched(); 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds return ret; 2341da177e4SLinus Torvalds } 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds unsigned long invalidate_inode_pages(struct address_space *mapping) 2371da177e4SLinus Torvalds { 2381da177e4SLinus Torvalds return invalidate_mapping_pages(mapping, 0, ~0UL); 2391da177e4SLinus Torvalds } 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds EXPORT_SYMBOL(invalidate_inode_pages); 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds /** 2441da177e4SLinus Torvalds * invalidate_inode_pages2_range - remove range of pages from an address_space 245*67be2dd1SMartin Waitz * @mapping: the address_space 2461da177e4SLinus Torvalds * @start: the page offset 'from' which to invalidate 2471da177e4SLinus Torvalds * @end: the page offset 'to' which to invalidate (inclusive) 2481da177e4SLinus Torvalds * 2491da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 2501da177e4SLinus Torvalds * invalidation. 2511da177e4SLinus Torvalds * 2521da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 2531da177e4SLinus Torvalds */ 2541da177e4SLinus Torvalds int invalidate_inode_pages2_range(struct address_space *mapping, 2551da177e4SLinus Torvalds pgoff_t start, pgoff_t end) 2561da177e4SLinus Torvalds { 2571da177e4SLinus Torvalds struct pagevec pvec; 2581da177e4SLinus Torvalds pgoff_t next; 2591da177e4SLinus Torvalds int i; 2601da177e4SLinus Torvalds int ret = 0; 2611da177e4SLinus Torvalds int did_range_unmap = 0; 2621da177e4SLinus Torvalds int wrapped = 0; 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds pagevec_init(&pvec, 0); 2651da177e4SLinus Torvalds next = start; 2661da177e4SLinus Torvalds while (next <= end && !ret && !wrapped && 2671da177e4SLinus Torvalds pagevec_lookup(&pvec, mapping, next, 2681da177e4SLinus Torvalds min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 2691da177e4SLinus Torvalds for (i = 0; !ret && i < pagevec_count(&pvec); i++) { 2701da177e4SLinus Torvalds struct page *page = pvec.pages[i]; 2711da177e4SLinus Torvalds pgoff_t page_index; 2721da177e4SLinus Torvalds int was_dirty; 2731da177e4SLinus Torvalds 2741da177e4SLinus Torvalds lock_page(page); 2751da177e4SLinus Torvalds if (page->mapping != mapping) { 2761da177e4SLinus Torvalds unlock_page(page); 2771da177e4SLinus Torvalds continue; 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds page_index = page->index; 2801da177e4SLinus Torvalds next = page_index + 1; 2811da177e4SLinus Torvalds if (next == 0) 2821da177e4SLinus Torvalds wrapped = 1; 2831da177e4SLinus Torvalds if (page_index > end) { 2841da177e4SLinus Torvalds unlock_page(page); 2851da177e4SLinus Torvalds break; 2861da177e4SLinus Torvalds } 2871da177e4SLinus Torvalds wait_on_page_writeback(page); 2881da177e4SLinus Torvalds while (page_mapped(page)) { 2891da177e4SLinus Torvalds if (!did_range_unmap) { 2901da177e4SLinus Torvalds /* 2911da177e4SLinus Torvalds * Zap the rest of the file in one hit. 2921da177e4SLinus Torvalds */ 2931da177e4SLinus Torvalds unmap_mapping_range(mapping, 2941da177e4SLinus Torvalds page_index << PAGE_CACHE_SHIFT, 2951da177e4SLinus Torvalds (end - page_index + 1) 2961da177e4SLinus Torvalds << PAGE_CACHE_SHIFT, 2971da177e4SLinus Torvalds 0); 2981da177e4SLinus Torvalds did_range_unmap = 1; 2991da177e4SLinus Torvalds } else { 3001da177e4SLinus Torvalds /* 3011da177e4SLinus Torvalds * Just zap this page 3021da177e4SLinus Torvalds */ 3031da177e4SLinus Torvalds unmap_mapping_range(mapping, 3041da177e4SLinus Torvalds page_index << PAGE_CACHE_SHIFT, 3051da177e4SLinus Torvalds PAGE_CACHE_SIZE, 0); 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds } 3081da177e4SLinus Torvalds was_dirty = test_clear_page_dirty(page); 3091da177e4SLinus Torvalds if (!invalidate_complete_page(mapping, page)) { 3101da177e4SLinus Torvalds if (was_dirty) 3111da177e4SLinus Torvalds set_page_dirty(page); 3121da177e4SLinus Torvalds ret = -EIO; 3131da177e4SLinus Torvalds } 3141da177e4SLinus Torvalds unlock_page(page); 3151da177e4SLinus Torvalds } 3161da177e4SLinus Torvalds pagevec_release(&pvec); 3171da177e4SLinus Torvalds cond_resched(); 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds return ret; 3201da177e4SLinus Torvalds } 3211da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds /** 3241da177e4SLinus Torvalds * invalidate_inode_pages2 - remove all pages from an address_space 325*67be2dd1SMartin Waitz * @mapping: the address_space 3261da177e4SLinus Torvalds * 3271da177e4SLinus Torvalds * Any pages which are found to be mapped into pagetables are unmapped prior to 3281da177e4SLinus Torvalds * invalidation. 3291da177e4SLinus Torvalds * 3301da177e4SLinus Torvalds * Returns -EIO if any pages could not be invalidated. 3311da177e4SLinus Torvalds */ 3321da177e4SLinus Torvalds int invalidate_inode_pages2(struct address_space *mapping) 3331da177e4SLinus Torvalds { 3341da177e4SLinus Torvalds return invalidate_inode_pages2_range(mapping, 0, -1); 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 337