1a5d76b54SKAMEZAWA Hiroyuki /* 2a5d76b54SKAMEZAWA Hiroyuki * linux/mm/page_isolation.c 3a5d76b54SKAMEZAWA Hiroyuki */ 4a5d76b54SKAMEZAWA Hiroyuki 5a5d76b54SKAMEZAWA Hiroyuki #include <linux/mm.h> 6a5d76b54SKAMEZAWA Hiroyuki #include <linux/page-isolation.h> 7a5d76b54SKAMEZAWA Hiroyuki #include <linux/pageblock-flags.h> 8ee6f509cSMinchan Kim #include <linux/memory.h> 9c8721bbbSNaoya Horiguchi #include <linux/hugetlb.h> 10a5d76b54SKAMEZAWA Hiroyuki #include "internal.h" 11a5d76b54SKAMEZAWA Hiroyuki 12c5b4e1b0SNaoya Horiguchi static int set_migratetype_isolate(struct page *page, 13c5b4e1b0SNaoya Horiguchi bool skip_hwpoisoned_pages) 14ee6f509cSMinchan Kim { 15ee6f509cSMinchan Kim struct zone *zone; 16ee6f509cSMinchan Kim unsigned long flags, pfn; 17ee6f509cSMinchan Kim struct memory_isolate_notify arg; 18ee6f509cSMinchan Kim int notifier_ret; 19ee6f509cSMinchan Kim int ret = -EBUSY; 20ee6f509cSMinchan Kim 21ee6f509cSMinchan Kim zone = page_zone(page); 22ee6f509cSMinchan Kim 23ee6f509cSMinchan Kim spin_lock_irqsave(&zone->lock, flags); 24ee6f509cSMinchan Kim 25ee6f509cSMinchan Kim pfn = page_to_pfn(page); 26ee6f509cSMinchan Kim arg.start_pfn = pfn; 27ee6f509cSMinchan Kim arg.nr_pages = pageblock_nr_pages; 28ee6f509cSMinchan Kim arg.pages_found = 0; 29ee6f509cSMinchan Kim 30ee6f509cSMinchan Kim /* 31ee6f509cSMinchan Kim * It may be possible to isolate a pageblock even if the 32ee6f509cSMinchan Kim * migratetype is not MIGRATE_MOVABLE. The memory isolation 33ee6f509cSMinchan Kim * notifier chain is used by balloon drivers to return the 34ee6f509cSMinchan Kim * number of pages in a range that are held by the balloon 35ee6f509cSMinchan Kim * driver to shrink memory. If all the pages are accounted for 36ee6f509cSMinchan Kim * by balloons, are free, or on the LRU, isolation can continue. 37ee6f509cSMinchan Kim * Later, for example, when memory hotplug notifier runs, these 38ee6f509cSMinchan Kim * pages reported as "can be isolated" should be isolated(freed) 39ee6f509cSMinchan Kim * by the balloon driver through the memory notifier chain. 40ee6f509cSMinchan Kim */ 41ee6f509cSMinchan Kim notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); 42ee6f509cSMinchan Kim notifier_ret = notifier_to_errno(notifier_ret); 43ee6f509cSMinchan Kim if (notifier_ret) 44ee6f509cSMinchan Kim goto out; 45ee6f509cSMinchan Kim /* 46ee6f509cSMinchan Kim * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. 47ee6f509cSMinchan Kim * We just check MOVABLE pages. 48ee6f509cSMinchan Kim */ 49b023f468SWen Congyang if (!has_unmovable_pages(zone, page, arg.pages_found, 50b023f468SWen Congyang skip_hwpoisoned_pages)) 51ee6f509cSMinchan Kim ret = 0; 52ee6f509cSMinchan Kim 53ee6f509cSMinchan Kim /* 54ee6f509cSMinchan Kim * immobile means "not-on-lru" paes. If immobile is larger than 55ee6f509cSMinchan Kim * removable-by-driver pages reported by notifier, we'll fail. 56ee6f509cSMinchan Kim */ 57ee6f509cSMinchan Kim 58ee6f509cSMinchan Kim out: 59ee6f509cSMinchan Kim if (!ret) { 602139cbe6SBartlomiej Zolnierkiewicz unsigned long nr_pages; 61d1ce749aSBartlomiej Zolnierkiewicz int migratetype = get_pageblock_migratetype(page); 622139cbe6SBartlomiej Zolnierkiewicz 63a458431eSBartlomiej Zolnierkiewicz set_pageblock_migratetype(page, MIGRATE_ISOLATE); 64ad53f92eSJoonsoo Kim zone->nr_isolate_pageblock++; 652139cbe6SBartlomiej Zolnierkiewicz nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); 662139cbe6SBartlomiej Zolnierkiewicz 67d1ce749aSBartlomiej Zolnierkiewicz __mod_zone_freepage_state(zone, -nr_pages, migratetype); 68ee6f509cSMinchan Kim } 69ee6f509cSMinchan Kim 70ee6f509cSMinchan Kim spin_unlock_irqrestore(&zone->lock, flags); 71ee6f509cSMinchan Kim if (!ret) 72ec25af84SVlastimil Babka drain_all_pages(zone); 73ee6f509cSMinchan Kim return ret; 74ee6f509cSMinchan Kim } 75ee6f509cSMinchan Kim 76c5b4e1b0SNaoya Horiguchi static void unset_migratetype_isolate(struct page *page, unsigned migratetype) 77ee6f509cSMinchan Kim { 78ee6f509cSMinchan Kim struct zone *zone; 792139cbe6SBartlomiej Zolnierkiewicz unsigned long flags, nr_pages; 803c605096SJoonsoo Kim struct page *isolated_page = NULL; 813c605096SJoonsoo Kim unsigned int order; 823c605096SJoonsoo Kim unsigned long page_idx, buddy_idx; 833c605096SJoonsoo Kim struct page *buddy; 842139cbe6SBartlomiej Zolnierkiewicz 85ee6f509cSMinchan Kim zone = page_zone(page); 86ee6f509cSMinchan Kim spin_lock_irqsave(&zone->lock, flags); 87ee6f509cSMinchan Kim if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 88ee6f509cSMinchan Kim goto out; 893c605096SJoonsoo Kim 903c605096SJoonsoo Kim /* 913c605096SJoonsoo Kim * Because freepage with more than pageblock_order on isolated 923c605096SJoonsoo Kim * pageblock is restricted to merge due to freepage counting problem, 933c605096SJoonsoo Kim * it is possible that there is free buddy page. 943c605096SJoonsoo Kim * move_freepages_block() doesn't care of merge so we need other 953c605096SJoonsoo Kim * approach in order to merge them. Isolation and free will make 963c605096SJoonsoo Kim * these pages to be merged. 973c605096SJoonsoo Kim */ 983c605096SJoonsoo Kim if (PageBuddy(page)) { 993c605096SJoonsoo Kim order = page_order(page); 1003c605096SJoonsoo Kim if (order >= pageblock_order) { 1013c605096SJoonsoo Kim page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 1023c605096SJoonsoo Kim buddy_idx = __find_buddy_index(page_idx, order); 1033c605096SJoonsoo Kim buddy = page + (buddy_idx - page_idx); 1043c605096SJoonsoo Kim 1051ae7013dSHui Zhu if (pfn_valid_within(page_to_pfn(buddy)) && 1061ae7013dSHui Zhu !is_migrate_isolate_page(buddy)) { 1073c605096SJoonsoo Kim __isolate_free_page(page, order); 108cfa86943SLaura Abbott kernel_map_pages(page, (1 << order), 1); 1093c605096SJoonsoo Kim set_page_refcounted(page); 1103c605096SJoonsoo Kim isolated_page = page; 1113c605096SJoonsoo Kim } 1123c605096SJoonsoo Kim } 1133c605096SJoonsoo Kim } 1143c605096SJoonsoo Kim 1153c605096SJoonsoo Kim /* 1163c605096SJoonsoo Kim * If we isolate freepage with more than pageblock_order, there 1173c605096SJoonsoo Kim * should be no freepage in the range, so we could avoid costly 1183c605096SJoonsoo Kim * pageblock scanning for freepage moving. 1193c605096SJoonsoo Kim */ 1203c605096SJoonsoo Kim if (!isolated_page) { 1212139cbe6SBartlomiej Zolnierkiewicz nr_pages = move_freepages_block(zone, page, migratetype); 122d1ce749aSBartlomiej Zolnierkiewicz __mod_zone_freepage_state(zone, nr_pages, migratetype); 1233c605096SJoonsoo Kim } 124a458431eSBartlomiej Zolnierkiewicz set_pageblock_migratetype(page, migratetype); 125ad53f92eSJoonsoo Kim zone->nr_isolate_pageblock--; 126ee6f509cSMinchan Kim out: 127ee6f509cSMinchan Kim spin_unlock_irqrestore(&zone->lock, flags); 1283c605096SJoonsoo Kim if (isolated_page) 1293c605096SJoonsoo Kim __free_pages(isolated_page, order); 130ee6f509cSMinchan Kim } 131ee6f509cSMinchan Kim 132a5d76b54SKAMEZAWA Hiroyuki static inline struct page * 133a5d76b54SKAMEZAWA Hiroyuki __first_valid_page(unsigned long pfn, unsigned long nr_pages) 134a5d76b54SKAMEZAWA Hiroyuki { 135a5d76b54SKAMEZAWA Hiroyuki int i; 136a5d76b54SKAMEZAWA Hiroyuki for (i = 0; i < nr_pages; i++) 137a5d76b54SKAMEZAWA Hiroyuki if (pfn_valid_within(pfn + i)) 138a5d76b54SKAMEZAWA Hiroyuki break; 139a5d76b54SKAMEZAWA Hiroyuki if (unlikely(i == nr_pages)) 140a5d76b54SKAMEZAWA Hiroyuki return NULL; 141a5d76b54SKAMEZAWA Hiroyuki return pfn_to_page(pfn + i); 142a5d76b54SKAMEZAWA Hiroyuki } 143a5d76b54SKAMEZAWA Hiroyuki 144a5d76b54SKAMEZAWA Hiroyuki /* 145a5d76b54SKAMEZAWA Hiroyuki * start_isolate_page_range() -- make page-allocation-type of range of pages 146a5d76b54SKAMEZAWA Hiroyuki * to be MIGRATE_ISOLATE. 147a5d76b54SKAMEZAWA Hiroyuki * @start_pfn: The lower PFN of the range to be isolated. 148a5d76b54SKAMEZAWA Hiroyuki * @end_pfn: The upper PFN of the range to be isolated. 1490815f3d8SMichal Nazarewicz * @migratetype: migrate type to set in error recovery. 150a5d76b54SKAMEZAWA Hiroyuki * 151a5d76b54SKAMEZAWA Hiroyuki * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in 152a5d76b54SKAMEZAWA Hiroyuki * the range will never be allocated. Any free pages and pages freed in the 153a5d76b54SKAMEZAWA Hiroyuki * future will not be allocated again. 154a5d76b54SKAMEZAWA Hiroyuki * 155a5d76b54SKAMEZAWA Hiroyuki * start_pfn/end_pfn must be aligned to pageblock_order. 156a5d76b54SKAMEZAWA Hiroyuki * Returns 0 on success and -EBUSY if any part of range cannot be isolated. 157a5d76b54SKAMEZAWA Hiroyuki */ 1580815f3d8SMichal Nazarewicz int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 159b023f468SWen Congyang unsigned migratetype, bool skip_hwpoisoned_pages) 160a5d76b54SKAMEZAWA Hiroyuki { 161a5d76b54SKAMEZAWA Hiroyuki unsigned long pfn; 162a5d76b54SKAMEZAWA Hiroyuki unsigned long undo_pfn; 163a5d76b54SKAMEZAWA Hiroyuki struct page *page; 164a5d76b54SKAMEZAWA Hiroyuki 165a5d76b54SKAMEZAWA Hiroyuki BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); 166a5d76b54SKAMEZAWA Hiroyuki BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); 167a5d76b54SKAMEZAWA Hiroyuki 168a5d76b54SKAMEZAWA Hiroyuki for (pfn = start_pfn; 169a5d76b54SKAMEZAWA Hiroyuki pfn < end_pfn; 170a5d76b54SKAMEZAWA Hiroyuki pfn += pageblock_nr_pages) { 171a5d76b54SKAMEZAWA Hiroyuki page = __first_valid_page(pfn, pageblock_nr_pages); 172b023f468SWen Congyang if (page && 173b023f468SWen Congyang set_migratetype_isolate(page, skip_hwpoisoned_pages)) { 174a5d76b54SKAMEZAWA Hiroyuki undo_pfn = pfn; 175a5d76b54SKAMEZAWA Hiroyuki goto undo; 176a5d76b54SKAMEZAWA Hiroyuki } 177a5d76b54SKAMEZAWA Hiroyuki } 178a5d76b54SKAMEZAWA Hiroyuki return 0; 179a5d76b54SKAMEZAWA Hiroyuki undo: 180a5d76b54SKAMEZAWA Hiroyuki for (pfn = start_pfn; 181dbc0e4ceSKAMEZAWA Hiroyuki pfn < undo_pfn; 182a5d76b54SKAMEZAWA Hiroyuki pfn += pageblock_nr_pages) 1830815f3d8SMichal Nazarewicz unset_migratetype_isolate(pfn_to_page(pfn), migratetype); 184a5d76b54SKAMEZAWA Hiroyuki 185a5d76b54SKAMEZAWA Hiroyuki return -EBUSY; 186a5d76b54SKAMEZAWA Hiroyuki } 187a5d76b54SKAMEZAWA Hiroyuki 188a5d76b54SKAMEZAWA Hiroyuki /* 189a5d76b54SKAMEZAWA Hiroyuki * Make isolated pages available again. 190a5d76b54SKAMEZAWA Hiroyuki */ 1910815f3d8SMichal Nazarewicz int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, 1920815f3d8SMichal Nazarewicz unsigned migratetype) 193a5d76b54SKAMEZAWA Hiroyuki { 194a5d76b54SKAMEZAWA Hiroyuki unsigned long pfn; 195a5d76b54SKAMEZAWA Hiroyuki struct page *page; 196a5d76b54SKAMEZAWA Hiroyuki BUG_ON((start_pfn) & (pageblock_nr_pages - 1)); 197a5d76b54SKAMEZAWA Hiroyuki BUG_ON((end_pfn) & (pageblock_nr_pages - 1)); 198a5d76b54SKAMEZAWA Hiroyuki for (pfn = start_pfn; 199a5d76b54SKAMEZAWA Hiroyuki pfn < end_pfn; 200a5d76b54SKAMEZAWA Hiroyuki pfn += pageblock_nr_pages) { 201a5d76b54SKAMEZAWA Hiroyuki page = __first_valid_page(pfn, pageblock_nr_pages); 202dbc0e4ceSKAMEZAWA Hiroyuki if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 203a5d76b54SKAMEZAWA Hiroyuki continue; 2040815f3d8SMichal Nazarewicz unset_migratetype_isolate(page, migratetype); 205a5d76b54SKAMEZAWA Hiroyuki } 206a5d76b54SKAMEZAWA Hiroyuki return 0; 207a5d76b54SKAMEZAWA Hiroyuki } 208a5d76b54SKAMEZAWA Hiroyuki /* 209a5d76b54SKAMEZAWA Hiroyuki * Test all pages in the range is free(means isolated) or not. 210a5d76b54SKAMEZAWA Hiroyuki * all pages in [start_pfn...end_pfn) must be in the same zone. 211a5d76b54SKAMEZAWA Hiroyuki * zone->lock must be held before call this. 212a5d76b54SKAMEZAWA Hiroyuki * 2130815f3d8SMichal Nazarewicz * Returns 1 if all pages in the range are isolated. 214a5d76b54SKAMEZAWA Hiroyuki */ 215a5d76b54SKAMEZAWA Hiroyuki static int 216b023f468SWen Congyang __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, 217b023f468SWen Congyang bool skip_hwpoisoned_pages) 218a5d76b54SKAMEZAWA Hiroyuki { 219a5d76b54SKAMEZAWA Hiroyuki struct page *page; 220a5d76b54SKAMEZAWA Hiroyuki 221a5d76b54SKAMEZAWA Hiroyuki while (pfn < end_pfn) { 222a5d76b54SKAMEZAWA Hiroyuki if (!pfn_valid_within(pfn)) { 223a5d76b54SKAMEZAWA Hiroyuki pfn++; 224a5d76b54SKAMEZAWA Hiroyuki continue; 225a5d76b54SKAMEZAWA Hiroyuki } 226a5d76b54SKAMEZAWA Hiroyuki page = pfn_to_page(pfn); 227aa016d14SVlastimil Babka if (PageBuddy(page)) 228435b405cSMinchan Kim /* 229aa016d14SVlastimil Babka * If the page is on a free list, it has to be on 230aa016d14SVlastimil Babka * the correct MIGRATE_ISOLATE freelist. There is no 231aa016d14SVlastimil Babka * simple way to verify that as VM_BUG_ON(), though. 232435b405cSMinchan Kim */ 233a5d76b54SKAMEZAWA Hiroyuki pfn += 1 << page_order(page); 234aa016d14SVlastimil Babka else if (skip_hwpoisoned_pages && PageHWPoison(page)) 235aa016d14SVlastimil Babka /* A HWPoisoned page cannot be also PageBuddy */ 236b023f468SWen Congyang pfn++; 237a5d76b54SKAMEZAWA Hiroyuki else 238a5d76b54SKAMEZAWA Hiroyuki break; 239a5d76b54SKAMEZAWA Hiroyuki } 240a5d76b54SKAMEZAWA Hiroyuki if (pfn < end_pfn) 241a5d76b54SKAMEZAWA Hiroyuki return 0; 242a5d76b54SKAMEZAWA Hiroyuki return 1; 243a5d76b54SKAMEZAWA Hiroyuki } 244a5d76b54SKAMEZAWA Hiroyuki 245b023f468SWen Congyang int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, 246b023f468SWen Congyang bool skip_hwpoisoned_pages) 247a5d76b54SKAMEZAWA Hiroyuki { 2486c1b7f68SGerald Schaefer unsigned long pfn, flags; 249a5d76b54SKAMEZAWA Hiroyuki struct page *page; 2506c1b7f68SGerald Schaefer struct zone *zone; 2516c1b7f68SGerald Schaefer int ret; 252a5d76b54SKAMEZAWA Hiroyuki 253a5d76b54SKAMEZAWA Hiroyuki /* 25485dbe706STang Chen * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages 25585dbe706STang Chen * are not aligned to pageblock_nr_pages. 25685dbe706STang Chen * Then we just check migratetype first. 257a5d76b54SKAMEZAWA Hiroyuki */ 258a5d76b54SKAMEZAWA Hiroyuki for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 259a5d76b54SKAMEZAWA Hiroyuki page = __first_valid_page(pfn, pageblock_nr_pages); 260dbc0e4ceSKAMEZAWA Hiroyuki if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 261a5d76b54SKAMEZAWA Hiroyuki break; 262a5d76b54SKAMEZAWA Hiroyuki } 263a70dcb96SGerald Schaefer page = __first_valid_page(start_pfn, end_pfn - start_pfn); 264a70dcb96SGerald Schaefer if ((pfn < end_pfn) || !page) 265a5d76b54SKAMEZAWA Hiroyuki return -EBUSY; 26685dbe706STang Chen /* Check all pages are free or marked as ISOLATED */ 267a70dcb96SGerald Schaefer zone = page_zone(page); 2686c1b7f68SGerald Schaefer spin_lock_irqsave(&zone->lock, flags); 269b023f468SWen Congyang ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn, 270b023f468SWen Congyang skip_hwpoisoned_pages); 2716c1b7f68SGerald Schaefer spin_unlock_irqrestore(&zone->lock, flags); 2726c1b7f68SGerald Schaefer return ret ? 0 : -EBUSY; 273a5d76b54SKAMEZAWA Hiroyuki } 274723a0644SMinchan Kim 275723a0644SMinchan Kim struct page *alloc_migrate_target(struct page *page, unsigned long private, 276723a0644SMinchan Kim int **resultp) 277723a0644SMinchan Kim { 278723a0644SMinchan Kim gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; 279723a0644SMinchan Kim 280c8721bbbSNaoya Horiguchi /* 281c8721bbbSNaoya Horiguchi * TODO: allocate a destination hugepage from a nearest neighbor node, 282c8721bbbSNaoya Horiguchi * accordance with memory policy of the user process if possible. For 283c8721bbbSNaoya Horiguchi * now as a simple work-around, we use the next node for destination. 284c8721bbbSNaoya Horiguchi */ 285c8721bbbSNaoya Horiguchi if (PageHuge(page)) { 286c8721bbbSNaoya Horiguchi nodemask_t src = nodemask_of_node(page_to_nid(page)); 287c8721bbbSNaoya Horiguchi nodemask_t dst; 288c8721bbbSNaoya Horiguchi nodes_complement(dst, src); 289c8721bbbSNaoya Horiguchi return alloc_huge_page_node(page_hstate(compound_head(page)), 290c8721bbbSNaoya Horiguchi next_node(page_to_nid(page), dst)); 291c8721bbbSNaoya Horiguchi } 292c8721bbbSNaoya Horiguchi 293723a0644SMinchan Kim if (PageHighMem(page)) 294723a0644SMinchan Kim gfp_mask |= __GFP_HIGHMEM; 295723a0644SMinchan Kim 296723a0644SMinchan Kim return alloc_page(gfp_mask); 297723a0644SMinchan Kim } 298