linux-6.8/mm/page_alloc.c

1 // SPDX-License-Identifier: GPL-2.0-only
38 #include <linux/fault-inject.h>
61 /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
70  * reporting it and marking it "reported" -  it only skips notifying
79  * page shuffling (relevant code - e.g., memory onlining - is expected to
82  * Note: No code should rely on this flag for correctness - it's purely
89 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
102 /* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
124  * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
132 	spin_lock(&_ret->member);					\
141 	if (!spin_trylock(&_ret->member)) {				\
150 	spin_unlock(&ptr->member);					\
214  * other index - this ensures that it will be put on the correct CMA freelist.
218 	return page->index;  in get_pcppage_migratetype()
223 	page->index = migratetype;  in set_pcppage_migratetype()
235  *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
236  *	1G machine -> (16M dma, 784M normal, 224M high)
241  * TBD: should special case ZONE_DMA32 machines here - in those we normally
289 int user_min_free_kbytes = -1;
314  * During boot we initialize deferred pages on-demand, as needed, but once
350 	return page_zone(page)->pageblock_flags;  in get_pageblock_bitmap()
357 	pfn &= (PAGES_PER_SECTION-1);  in pfn_to_bitidx()
359 	pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn);  in pfn_to_bitidx()
365 …* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block o…
382 	bitidx &= (BITS_PER_LONG-1);  in get_pfnblock_flags_mask()
399 …* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pag…
419 	bitidx &= (BITS_PER_LONG-1);  in set_pfnblock_flags_mask()
451 		start_pfn = zone->zone_start_pfn;  in page_outside_zone_boundaries()
452 		sp = zone->spanned_pages;  in page_outside_zone_boundaries()
457 		pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",  in page_outside_zone_boundaries()
458 			pfn, zone_to_nid(zone), zone->name,  in page_outside_zone_boundaries()
510 		current->comm, page_to_pfn(page));  in bad_page()
569  * Higher-order pages are called "compound pages".  They are structured thusly:
574  * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
576  * The first tail page's ->compound_order holds the order of allocation.
577  * This usage means that zero-order pages may not be compound.
603 	free_the_page(&folio->page, folio_order(folio));  in destroy_large_folio()
615 	struct capture_control *capc = current->capture_control;  in task_capc()
618 		!(current->flags & PF_KTHREAD) &&  in task_capc()
619 		!capc->page &&  in task_capc()
620 		capc->cc->zone == zone ? capc : NULL;  in task_capc()
627 	if (!capc || order != capc->cc->order)  in compaction_capture()
638 	 * and vice-versa but no more than normal fallback logic which can  in compaction_capture()
639 	 * have trouble finding a high-order free page.  in compaction_capture()
644 	capc->page = page;  in compaction_capture()
666 	struct free_area *area = &zone->free_area[order];  in add_to_free_list()
668 	list_add(&page->buddy_list, &area->free_list[migratetype]);  in add_to_free_list()
669 	area->nr_free++;  in add_to_free_list()
676 	struct free_area *area = &zone->free_area[order];  in add_to_free_list_tail()
678 	list_add_tail(&page->buddy_list, &area->free_list[migratetype]);  in add_to_free_list_tail()
679 	area->nr_free++;  in add_to_free_list_tail()
684  * of the list - so the moved pages won't immediately be considered for
690 	struct free_area *area = &zone->free_area[order];  in move_to_free_list()
692 	list_move_tail(&page->buddy_list, &area->free_list[migratetype]);  in move_to_free_list()
702 	list_del(&page->buddy_list);  in del_page_from_free_list()
705 	zone->free_area[order].nr_free--;  in del_page_from_free_list()
711 	return list_first_entry_or_null(&area->free_list[migratetype],  in get_page_from_free_area()
717  * of the next-highest order is free. If it is, it's possible
730 	if (order >= MAX_PAGE_ORDER - 1)  in buddy_merge_likely()
734 	higher_page = page + (higher_page_pfn - pfn);  in buddy_merge_likely()
743  * The concept of a buddy system is to maintain direct-mapped table
761  * -- nyc
776 	VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);  in __free_one_page()
778 	VM_BUG_ON(migratetype == -1);  in __free_one_page()
782 	VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);  in __free_one_page()
787 			__mod_zone_freepage_state(zone, -(1 << order),  in __free_one_page()
820 		page = page + (combined_pfn - pfn);  in __free_one_page()
846  * split_free_page() -- split a free page at split_pfn_offset
851  * Return -ENOENT if the free page is changed, otherwise 0
872 	spin_lock_irqsave(&zone->lock, flags);  in split_free_page()
875 		ret = -ENOENT;  in split_free_page()
881 		__mod_zone_freepage_state(zone, -(1UL << order), mt);  in split_free_page()
894 		split_pfn_offset -= (1UL << free_page_order);  in split_free_page()
897 			split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);  in split_free_page()
900 	spin_unlock_irqrestore(&zone->lock, flags);  in split_free_page()
911 	if (unlikely(atomic_read(&page->_mapcount) != -1))  in page_expected_state()
914 	if (unlikely((unsigned long)page->mapping |  in page_expected_state()
917 			page->memcg_data |  in page_expected_state()
920 			((page->pp_magic & ~0x3UL) == PP_SIGNATURE) |  in page_expected_state()
922 			(page->flags & check_flags)))  in page_expected_state()
932 	if (unlikely(atomic_read(&page->_mapcount) != -1))  in page_bad_reason()
934 	if (unlikely(page->mapping != NULL))  in page_bad_reason()
935 		bad_reason = "non-NULL mapping";  in page_bad_reason()
938 	if (unlikely(page->flags & flags)) {  in page_bad_reason()
945 	if (unlikely(page->memcg_data))  in page_bad_reason()
949 	if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE))  in page_bad_reason()
982 	 * We rely page->lru.next never has bit 0 set, unless the page  in free_tail_page_prepare()
983 	 * is PageTail(). Let's make sure that's true even for poisoned ->lru.  in free_tail_page_prepare()
991 	switch (page - head_page) {  in free_tail_page_prepare()
993 		/* the first tail page: these may be in place of ->mapping */  in free_tail_page_prepare()
998 		if (unlikely(atomic_read(&folio->_nr_pages_mapped))) {  in free_tail_page_prepare()
1002 		if (unlikely(atomic_read(&folio->_pincount))) {  in free_tail_page_prepare()
1009 		 * the second tail page: ->mapping is  in free_tail_page_prepare()
1010 		 * deferred_list.next -- ignore value.  in free_tail_page_prepare()
1014 		if (page->mapping != TAIL_MAPPING) {  in free_tail_page_prepare()
1030 	page->mapping = NULL;  in free_tail_page_prepare()
1039  *    Tag-based KASAN modes skip pages freed via deferred memory initialization
1041  * 2. For tag-based KASAN modes: the page has a match-all KASAN tag, indicating
1044  * Pages will have match-all tags in the following circumstances:
1061  * on-demand allocation and then freed again before the deferred pages
1110 	 * avoid checking PageCompound for order-0 pages.  in free_pages_prepare()
1126 			(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;  in free_pages_prepare()
1130 		page->mapping = NULL;  in free_pages_prepare()
1139 	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;  in free_pages_prepare()
1157 	 * With hardware tag-based KASAN, memory tags must be set before the  in free_pages_prepare()
1200 	count = min(pcp->count, count);  in free_pcppages_bulk()
1203 	pindex = pindex - 1;  in free_pcppages_bulk()
1205 	spin_lock_irqsave(&zone->lock, flags);  in free_pcppages_bulk()
1212 		/* Remove pages from lists in a round-robin fashion. */  in free_pcppages_bulk()
1214 			if (++pindex > NR_PCP_LISTS - 1)  in free_pcppages_bulk()
1216 			list = &pcp->lists[pindex];  in free_pcppages_bulk()
1228 			list_del(&page->pcp_list);  in free_pcppages_bulk()
1229 			count -= nr_pages;  in free_pcppages_bulk()
1230 			pcp->count -= nr_pages;  in free_pcppages_bulk()
1243 	spin_unlock_irqrestore(&zone->lock, flags);  in free_pcppages_bulk()
1253 	spin_lock_irqsave(&zone->lock, flags);  in free_one_page()
1259 	spin_unlock_irqrestore(&zone->lock, flags);  in free_one_page()
1296 	for (loop = 0; loop < (nr_pages - 1); loop++, p++) {  in __free_pages_core()
1304 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);  in __free_pages_core()
1335  * Note: the function may return non-NULL struct page even for a page block
1338  * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
1351 	end_pfn--;  in __pageblock_pfn_to_page()
1384  * -- nyc
1392 		high--;  in expand()
1412 	if (unlikely(page->flags & __PG_HWPOISON)) {  in check_new_page_bad()
1456 	/* Skip, if hardware tag-based KASAN is not enabled. */  in should_skip_kasan_unpoison()
1461 	 * With hardware tag-based KASAN enabled, skip if this has been  in should_skip_kasan_unpoison()
1469 	/* Don't skip, if hardware tag-based KASAN is not enabled. */  in should_skip_init()
1473 	/* For hardware tag-based KASAN, skip if requested. */  in should_skip_init()
1571 		area = &(zone->free_area[current_order]);  in __rmqueue_smallest()
1594 static int fallbacks[MIGRATE_TYPES][MIGRATE_PCPTYPES - 1] = {
1663 	end_pfn = pageblock_end_pfn(pfn) - 1;  in move_freepages_block()
1678 	int nr_pageblocks = 1 << (start_order - pageblock_order);  in change_pageblock_range()
1680 	while (nr_pageblocks--) {  in change_pageblock_range()
1734 	max_boost = mult_frac(zone->_watermark[WMARK_HIGH],  in boost_watermark()
1750 	zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,  in boost_watermark()
1759  * pageblock to our migratetype and determine how many already-allocated pages
1792 		set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);  in steal_suitable_fallback()
1814 		 * to MOVABLE pageblock, consider all non-movable pages as  in steal_suitable_fallback()
1817 		 * exact migratetype of non-movable pages.  in steal_suitable_fallback()
1821 						- (free_pages + movable_pages);  in steal_suitable_fallback()
1829 	if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||  in steal_suitable_fallback()
1851 	if (area->nr_free == 0)  in find_suitable_fallback()
1852 		return -1;  in find_suitable_fallback()
1855 	for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {  in find_suitable_fallback()
1870 	return -1;  in find_suitable_fallback()
1874  * Reserve a pageblock for exclusive use of high-order atomic allocations if
1886 	 * Check is race-prone but harmless.  in reserve_highatomic_pageblock()
1891 	if (zone->nr_reserved_highatomic >= max_managed)  in reserve_highatomic_pageblock()
1894 	spin_lock_irqsave(&zone->lock, flags);  in reserve_highatomic_pageblock()
1897 	if (zone->nr_reserved_highatomic >= max_managed)  in reserve_highatomic_pageblock()
1904 		zone->nr_reserved_highatomic += pageblock_nr_pages;  in reserve_highatomic_pageblock()
1910 	spin_unlock_irqrestore(&zone->lock, flags);  in reserve_highatomic_pageblock()
1915  * potentially hurts the reliability of high-order allocations when under
1925 	struct zonelist *zonelist = ac->zonelist;  in unreserve_highatomic_pageblock()
1933 	for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,  in unreserve_highatomic_pageblock()
1934 								ac->nodemask) {  in unreserve_highatomic_pageblock()
1939 		if (!force && zone->nr_reserved_highatomic <=  in unreserve_highatomic_pageblock()
1943 		spin_lock_irqsave(&zone->lock, flags);  in unreserve_highatomic_pageblock()
1945 			struct free_area *area = &(zone->free_area[order]);  in unreserve_highatomic_pageblock()
1955 			 * from highatomic to ac->migratetype. So we should  in unreserve_highatomic_pageblock()
1961 				 * locking could inadvertently allow a per-cpu  in unreserve_highatomic_pageblock()
1966 				zone->nr_reserved_highatomic -= min(  in unreserve_highatomic_pageblock()
1968 						zone->nr_reserved_highatomic);  in unreserve_highatomic_pageblock()
1972 			 * Convert to ac->migratetype and avoid the normal  in unreserve_highatomic_pageblock()
1980 			set_pageblock_migratetype(page, ac->migratetype);  in unreserve_highatomic_pageblock()
1981 			ret = move_freepages_block(zone, page, ac->migratetype,  in unreserve_highatomic_pageblock()
1984 				spin_unlock_irqrestore(&zone->lock, flags);  in unreserve_highatomic_pageblock()
1988 		spin_unlock_irqrestore(&zone->lock, flags);  in unreserve_highatomic_pageblock()
2029 				--current_order) {  in __rmqueue_fallback()
2030 		area = &(zone->free_area[current_order]);  in __rmqueue_fallback()
2033 		if (fallback_mt == -1)  in __rmqueue_fallback()
2055 		area = &(zone->free_area[current_order]);  in __rmqueue_fallback()
2058 		if (fallback_mt != -1)  in __rmqueue_fallback()
2063 	 * This should not happen - we already found a suitable fallback  in __rmqueue_fallback()
2083  * Call me with the zone->lock already held.
2130 	spin_lock_irqsave(&zone->lock, flags);  in rmqueue_bulk()
2147 		list_add_tail(&page->pcp_list, list);  in rmqueue_bulk()
2150 					      -(1 << order));  in rmqueue_bulk()
2153 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));  in rmqueue_bulk()
2154 	spin_unlock_irqrestore(&zone->lock, flags);  in rmqueue_bulk()
2168 	high_min = READ_ONCE(pcp->high_min);  in decay_pcp_high()
2169 	batch = READ_ONCE(pcp->batch);  in decay_pcp_high()
2171 	 * Decrease pcp->high periodically to try to free possible  in decay_pcp_high()
2173 	 * control latency.  This caps pcp->high decrement too.  in decay_pcp_high()
2175 	if (pcp->high > high_min) {  in decay_pcp_high()
2176 		pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),  in decay_pcp_high()
2177 				 pcp->high - (pcp->high >> 3), high_min);  in decay_pcp_high()
2178 		if (pcp->high > high_min)  in decay_pcp_high()
2182 	to_drain = pcp->count - pcp->high;  in decay_pcp_high()
2184 		spin_lock(&pcp->lock);  in decay_pcp_high()
2186 		spin_unlock(&pcp->lock);  in decay_pcp_high()
2203 	batch = READ_ONCE(pcp->batch);  in drain_zone_pages()
2204 	to_drain = min(pcp->count, batch);  in drain_zone_pages()
2206 		spin_lock(&pcp->lock);  in drain_zone_pages()
2208 		spin_unlock(&pcp->lock);  in drain_zone_pages()
2220 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);  in drain_pages_zone()
2221 	if (pcp->count) {  in drain_pages_zone()
2222 		spin_lock(&pcp->lock);  in drain_pages_zone()
2223 		free_pcppages_bulk(zone, pcp->count, pcp, 0);  in drain_pages_zone()
2224 		spin_unlock(&pcp->lock);  in drain_pages_zone()
2241  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
2258  * not empty. The check for non-emptiness can however race with a free to
2259  * pcplist that has not yet increased the pcp->count from 0 to 1. Callers
2302 			pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);  in __drain_all_pages()
2303 			if (pcp->count)  in __drain_all_pages()
2307 				pcp = per_cpu_ptr(z->per_cpu_pageset, cpu);  in __drain_all_pages()
2308 				if (pcp->count) {  in __drain_all_pages()
2332  * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
2334  * When zone parameter is non-NULL, spill just the single zone's pages.
2358 	/* Free as much as possible if batch freeing high-order pages. */  in nr_pcp_free()
2360 		return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX);  in nr_pcp_free()
2366 	/* Leave at least pcp->batch pages on the list */  in nr_pcp_free()
2368 	max_nr_free = high - batch;  in nr_pcp_free()
2374 	batch = clamp_t(int, pcp->free_count, min_nr_free, max_nr_free);  in nr_pcp_free()
2384 	high_min = READ_ONCE(pcp->high_min);  in nr_pcp_high()
2385 	high_max = READ_ONCE(pcp->high_max);  in nr_pcp_high()
2386 	high = pcp->high = clamp(pcp->high, high_min, high_max);  in nr_pcp_high()
2392 		pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX),  in nr_pcp_high()
2401 	if (test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) {  in nr_pcp_high()
2402 		int free_count = max_t(int, pcp->free_count, batch);  in nr_pcp_high()
2404 		pcp->high = max(high - free_count, high_min);  in nr_pcp_high()
2405 		return min(batch << 2, pcp->high);  in nr_pcp_high()
2411 	if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) {  in nr_pcp_high()
2412 		int free_count = max_t(int, pcp->free_count, batch);  in nr_pcp_high()
2414 		pcp->high = max(high - free_count, high_min);  in nr_pcp_high()
2415 		high = max(pcp->count, high_min);  in nr_pcp_high()
2416 	} else if (pcp->count >= high) {  in nr_pcp_high()
2417 		int need_high = pcp->free_count + batch;  in nr_pcp_high()
2419 		/* pcp->high should be large enough to hold batch freed pages */  in nr_pcp_high()
2420 		if (pcp->high < need_high)  in nr_pcp_high()
2421 			pcp->high = clamp(need_high, high_min, high_max);  in nr_pcp_high()
2440 	pcp->alloc_factor >>= 1;  in free_unref_page_commit()
2443 	list_add(&page->pcp_list, &pcp->lists[pindex]);  in free_unref_page_commit()
2444 	pcp->count += 1 << order;  in free_unref_page_commit()
2446 	batch = READ_ONCE(pcp->batch);  in free_unref_page_commit()
2448 	 * As high-order pages other than THP's stored on PCP can contribute  in free_unref_page_commit()
2454 		free_high = (pcp->free_count >= batch &&  in free_unref_page_commit()
2455 			     (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&  in free_unref_page_commit()
2456 			     (!(pcp->flags & PCPF_FREE_HIGH_BATCH) ||  in free_unref_page_commit()
2457 			      pcp->count >= READ_ONCE(batch)));  in free_unref_page_commit()
2458 		pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER;  in free_unref_page_commit()
2459 	} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {  in free_unref_page_commit()
2460 		pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;  in free_unref_page_commit()
2462 	if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))  in free_unref_page_commit()
2463 		pcp->free_count += (1 << order);  in free_unref_page_commit()
2465 	if (pcp->count >= high) {  in free_unref_page_commit()
2468 		if (test_bit(ZONE_BELOW_HIGH, &zone->flags) &&  in free_unref_page_commit()
2471 			clear_bit(ZONE_BELOW_HIGH, &zone->flags);  in free_unref_page_commit()
2507 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);  in free_unref_page()
2518  * Free a list of 0-order pages
2533 			list_del(&page->lru);  in free_unref_page_list()
2543 			list_del(&page->lru);  in free_unref_page_list()
2552 		list_del(&page->lru);  in free_unref_page_list()
2573 			pcp = pcp_spin_trylock(zone->per_cpu_pageset);  in free_unref_page_list()
2585 		 * Non-isolated types over MIGRATE_PCPTYPES get added  in free_unref_page_list()
2603  * split_page takes a non-compound higher-order page, and splits it into
2604  * n (1<<order) sub-pages: page[0..n]
2605  * Each sub-page must be freed individually.
2633 		 * emulate a high-order watermark check with a raised order-0  in __isolate_free_page()
2634 		 * watermark, because we already know our high-order page  in __isolate_free_page()
2637 		watermark = zone->_watermark[WMARK_MIN] + (1UL << order);  in __isolate_free_page()
2641 		__mod_zone_freepage_state(zone, -(1UL << order), mt);  in __isolate_free_page()
2650 	if (order >= pageblock_order - 1) {  in __isolate_free_page()
2651 		struct page *endpage = page + (1 << order) - 1;  in __isolate_free_page()
2668  * __putback_isolated_page - Return a now-isolated page back where we got it
2681 	lockdep_assert_held(&zone->lock);  in __putback_isolated_page()
2724 		spin_lock_irqsave(&zone->lock, flags);  in rmqueue_buddy()
2733 			 * failing a high-order atomic allocation in the  in rmqueue_buddy()
2740 				spin_unlock_irqrestore(&zone->lock, flags);  in rmqueue_buddy()
2744 		__mod_zone_freepage_state(zone, -(1 << order),  in rmqueue_buddy()
2746 		spin_unlock_irqrestore(&zone->lock, flags);  in rmqueue_buddy()
2760 	base_batch = READ_ONCE(pcp->batch);  in nr_pcp_alloc()
2761 	high_min = READ_ONCE(pcp->high_min);  in nr_pcp_alloc()
2762 	high_max = READ_ONCE(pcp->high_max);  in nr_pcp_alloc()
2763 	high = pcp->high = clamp(pcp->high, high_min, high_max);  in nr_pcp_alloc()
2772 		batch = (base_batch << pcp->alloc_factor);  in nr_pcp_alloc()
2775 	 * If we had larger pcp->high, we could avoid to allocate from  in nr_pcp_alloc()
2778 	if (high_min != high_max && !test_bit(ZONE_BELOW_HIGH, &zone->flags))  in nr_pcp_alloc()
2779 		high = pcp->high = min(high + batch, high_max);  in nr_pcp_alloc()
2782 		max_nr_alloc = max(high - pcp->count - base_batch, base_batch);  in nr_pcp_alloc()
2785 		 * subsequent allocation of order-0 pages without any freeing.  in nr_pcp_alloc()
2788 		    pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX)  in nr_pcp_alloc()
2789 			pcp->alloc_factor++;  in nr_pcp_alloc()
2805 /* Remove page from the per-cpu list, caller must protect the list */
2824 			pcp->count += alloced << order;  in __rmqueue_pcplist()
2830 		list_del(&page->pcp_list);  in __rmqueue_pcplist()
2831 		pcp->count -= 1 << order;  in __rmqueue_pcplist()
2837 /* Lock and remove page from the per-cpu list */
2849 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);  in rmqueue_pcplist()
2860 	pcp->free_count >>= 1;  in rmqueue_pcplist()
2861 	list = &pcp->lists[order_to_pindex(migratetype, order)];  in rmqueue_pcplist()
2874  * Use pcplists for THP or "cheap" high-order allocations.
2894 	 * allocate greater than order-1 page units with __GFP_NOFAIL.  in rmqueue()
2911 	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {  in rmqueue()
2912 		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);  in rmqueue()
2929 	long unusable_free = (1 << order) - 1;  in __zone_watermark_unusable_free()
2933 	 * watermark then subtract the high-atomic reserves. This will  in __zone_watermark_unusable_free()
2934 	 * over-estimate the size of the atomic reserve but it avoids a search.  in __zone_watermark_unusable_free()
2937 		unusable_free += z->nr_reserved_highatomic;  in __zone_watermark_unusable_free()
2952  * Return true if free base pages are above 'mark'. For high-order checks it
2953  * will return true of the order-0 watermark is reached and there is at least
2964 	/* free_pages may go negative - that's OK */  in __zone_watermark_ok()
2965 	free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);  in __zone_watermark_ok()
2973 			min -= min / 2;  in __zone_watermark_ok()
2976 			 * Non-blocking allocations (e.g. GFP_ATOMIC) can  in __zone_watermark_ok()
2978 			 * non-blocking allocations requests such as GFP_NOWAIT  in __zone_watermark_ok()
2983 				min -= min / 4;  in __zone_watermark_ok()
2990 		 * makes during the free path will be small and short-lived.  in __zone_watermark_ok()
2993 			min -= min / 2;  in __zone_watermark_ok()
2997 	 * Check watermarks for an order-0 allocation request. If these  in __zone_watermark_ok()
2998 	 * are not met, then a high-order request also cannot go ahead  in __zone_watermark_ok()
3001 	if (free_pages <= min + z->lowmem_reserve[highest_zoneidx])  in __zone_watermark_ok()
3004 	/* If this is an order-0 request then the watermark is fine */  in __zone_watermark_ok()
3008 	/* For a high-order request, check at least one suitable page is free */  in __zone_watermark_ok()
3010 		struct free_area *area = &z->free_area[o];  in __zone_watermark_ok()
3013 		if (!area->nr_free)  in __zone_watermark_ok()
3051 	 * Fast check for order-0 only. If this fails then the reserves  in zone_watermark_fast()
3061 		/* reserved may over estimate high-atomic reserves. */  in zone_watermark_fast()
3062 		usable_free -= min(usable_free, reserved);  in zone_watermark_fast()
3063 		if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])  in zone_watermark_fast()
3072 	 * Ignore watermark boosting for __GFP_HIGH order-0 allocations  in zone_watermark_fast()
3077 	if (unlikely(!order && (alloc_flags & ALLOC_MIN_RESERVE) && z->watermark_boost  in zone_watermark_fast()
3079 		mark = z->_watermark[WMARK_MIN];  in zone_watermark_fast()
3092 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)  in zone_watermark_ok_safe()
3142 	 * the pointer is within zone->zone_pgdat->node_zones[]. Also assume  in alloc_flags_nofragment()
3145 	BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);  in alloc_flags_nofragment()
3146 	if (nr_online_nodes > 1 && !populated_zone(--zone))  in alloc_flags_nofragment()
3185 	z = ac->preferred_zoneref;  in get_page_from_freelist()
3186 	for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx,  in get_page_from_freelist()
3187 					ac->nodemask) {  in get_page_from_freelist()
3206 		 * exceed the per-node dirty limit in the slowpath  in get_page_from_freelist()
3212 		 * dirty-throttling and the flusher threads.  in get_page_from_freelist()
3214 		if (ac->spread_dirty_pages) {  in get_page_from_freelist()
3215 			if (last_pgdat != zone->zone_pgdat) {  in get_page_from_freelist()
3216 				last_pgdat = zone->zone_pgdat;  in get_page_from_freelist()
3217 				last_pgdat_dirty_ok = node_dirty_ok(zone->zone_pgdat);  in get_page_from_freelist()
3225 		    zone != ac->preferred_zoneref->zone) {  in get_page_from_freelist()
3233 			local_nid = zone_to_nid(ac->preferred_zoneref->zone);  in get_page_from_freelist()
3242 		 * watermark.  If so, we will decrease pcp->high and free  in get_page_from_freelist()
3247 		if (test_bit(ZONE_BELOW_HIGH, &zone->flags))  in get_page_from_freelist()
3252 					ac->highest_zoneidx, alloc_flags,  in get_page_from_freelist()
3256 			set_bit(ZONE_BELOW_HIGH, &zone->flags);  in get_page_from_freelist()
3261 				       ac->highest_zoneidx, alloc_flags,  in get_page_from_freelist()
3286 			    !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))  in get_page_from_freelist()
3289 			ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);  in get_page_from_freelist()
3300 					ac->highest_zoneidx, alloc_flags))  in get_page_from_freelist()
3308 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,  in get_page_from_freelist()
3309 				gfp_mask, alloc_flags, ac->migratetype);  in get_page_from_freelist()
3314 			 * If this is a high-order atomic allocation then check  in get_page_from_freelist()
3360 		    (current->flags & (PF_MEMALLOC | PF_EXITING)))  in warn_alloc_show_mem()
3383 			current->comm, &vaf, gfp_mask, &gfp_mask,  in warn_alloc()
3417 	struct oom_control oc = {  in __alloc_pages_may_oom()  local
3418 		.zonelist = ac->zonelist,  in __alloc_pages_may_oom()
3419 		.nodemask = ac->nodemask,  in __alloc_pages_may_oom()
3430 	 * making progress for us.  in __alloc_pages_may_oom()
3452 	if (current->flags & PF_DUMPCORE)  in __alloc_pages_may_oom()
3468 	if (ac->highest_zoneidx < ZONE_NORMAL)  in __alloc_pages_may_oom()
3483 	if (out_of_memory(&oc) ||  in __alloc_pages_may_oom()
3488 		 * Help non-failing allocations by giving them access to memory  in __alloc_pages_may_oom()
3507 /* Try memory compaction for high-order allocations before reclaim */
3550 		zone->compact_blockskip_flush = false;  in __alloc_pages_direct_compact()
3586 	 * Compaction was skipped due to a lack of free order-0  in should_compact_retry()
3624 		(*compact_priority)--;  in should_compact_retry()
3657 	 * Let's give them a good hope and keep retrying while the order-0  in should_compact_retry()
3660 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,  in should_compact_retry()
3661 				ac->highest_zoneidx, ac->nodemask) {  in should_compact_retry()
3663 					ac->highest_zoneidx, alloc_flags))  in should_compact_retry()
3681 	if (current->flags & PF_MEMALLOC)  in __need_reclaim()
3768 	progress = try_to_free_pages(ac->zonelist, order, gfp_mask,  in __perform_reclaim()
3769 								ac->nodemask);  in __perform_reclaim()
3799 	 * pages are pinned on the per-cpu lists or in high alloc reserves.  in __alloc_pages_direct_reclaim()
3820 	enum zone_type highest_zoneidx = ac->highest_zoneidx;  in wake_all_kswapds()
3822 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,  in wake_all_kswapds()
3823 					ac->nodemask) {  in wake_all_kswapds()
3826 		if (last_pgdat != zone->zone_pgdat) {  in wake_all_kswapds()
3828 			last_pgdat = zone->zone_pgdat;  in wake_all_kswapds()
3868 		 * Ignore cpuset mems for non-blocking __GFP_HIGH (probably  in gfp_to_alloc_flags()
3907 	if (in_serving_softirq() && (current->flags & PF_MEMALLOC))  in __gfp_pfmemalloc_flags()
3910 		if (current->flags & PF_MEMALLOC)  in __gfp_pfmemalloc_flags()
3963 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,  in should_reclaim_retry()
3964 				ac->highest_zoneidx, ac->nodemask) {  in should_reclaim_retry()
3978 				ac->highest_zoneidx, alloc_flags, available);  in should_reclaim_retry()
3994 	if (current->flags & PF_WQ_WORKER)  in should_reclaim_retry()
4015 	 * This assumes that for all allocations, ac->nodemask can come only  in check_retry_cpuset()
4020 	if (cpusets_enabled() && ac->nodemask &&  in check_retry_cpuset()
4021 			!cpuset_nodemask_valid_mems_allowed(ac->nodemask)) {  in check_retry_cpuset()
4022 		ac->nodemask = NULL;  in check_retry_cpuset()
4074 	 * there was a cpuset modification and we are retrying - otherwise we  in __alloc_pages_slowpath()
4075 	 * could end up iterating over non-eligible zones endlessly.  in __alloc_pages_slowpath()
4077 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,  in __alloc_pages_slowpath()
4078 					ac->highest_zoneidx, ac->nodemask);  in __alloc_pages_slowpath()
4079 	if (!ac->preferred_zoneref->zone)  in __alloc_pages_slowpath()
4084 	 * any suitable zone to satisfy the request - e.g. non-movable  in __alloc_pages_slowpath()
4088 		struct zoneref *z = first_zones_zonelist(ac->zonelist,  in __alloc_pages_slowpath()
4089 					ac->highest_zoneidx,  in __alloc_pages_slowpath()
4091 		if (!z->zone)  in __alloc_pages_slowpath()
4108 	 * that we have enough base pages and don't need to reclaim. For non-  in __alloc_pages_slowpath()
4109 	 * movable high-order allocations, do that as well, as compaction will  in __alloc_pages_slowpath()
4117 			   (order > 0 && ac->migratetype != MIGRATE_MOVABLE))  in __alloc_pages_slowpath()
4139 			 *  - potentially very expensive because zones are far  in __alloc_pages_slowpath()
4142 			 *  - not guaranteed to help because isolate_freepages()  in __alloc_pages_slowpath()
4145 			 *  - unlikely to make entire pageblocks free on its  in __alloc_pages_slowpath()
4177 		ac->nodemask = NULL;  in __alloc_pages_slowpath()
4178 		ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,  in __alloc_pages_slowpath()
4179 					ac->highest_zoneidx, ac->nodemask);  in __alloc_pages_slowpath()
4192 	if (current->flags & PF_MEMALLOC)  in __alloc_pages_slowpath()
4224 	 * It doesn't make any sense to retry for the compaction if the order-0  in __alloc_pages_slowpath()
4244 	/* Reclaim has failed us, start killing things */  in __alloc_pages_slowpath()
4285 		 * for somebody to do a work for us  in __alloc_pages_slowpath()
4287 		WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);  in __alloc_pages_slowpath()
4298 		 * Help non-failing allocations by giving some access to memory  in __alloc_pages_slowpath()
4299 		 * reserves normally used for high priority non-blocking  in __alloc_pages_slowpath()
4312 	warn_alloc(gfp_mask, ac->nodemask,  in __alloc_pages_slowpath()
4323 	ac->highest_zoneidx = gfp_zone(gfp_mask);  in prepare_alloc_pages()
4324 	ac->zonelist = node_zonelist(preferred_nid, gfp_mask);  in prepare_alloc_pages()
4325 	ac->nodemask = nodemask;  in prepare_alloc_pages()
4326 	ac->migratetype = gfp_migratetype(gfp_mask);  in prepare_alloc_pages()
4334 		if (in_task() && !ac->nodemask)  in prepare_alloc_pages()
4335 			ac->nodemask = &cpuset_current_mems_allowed;  in prepare_alloc_pages()
4348 	ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);  in prepare_alloc_pages()
4355 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,  in prepare_alloc_pages()
4356 					ac->highest_zoneidx, ac->nodemask);  in prepare_alloc_pages()
4362  * __alloc_pages_bulk - Allocate a number of order-0 pages to a list or array
4409 	if (unlikely(page_array && nr_pages - nr_populated == 0))  in __alloc_pages_bulk()
4417 	if (nr_pages - nr_populated == 1)  in __alloc_pages_bulk()
4448 		if (nr_online_nodes > 1 && zone != ac.preferred_zoneref->zone &&  in __alloc_pages_bulk()
4449 		    zone_to_nid(zone) != zone_to_nid(ac.preferred_zoneref->zone)) {  in __alloc_pages_bulk()
4470 	pcp = pcp_spin_trylock(zone->per_cpu_pageset);  in __alloc_pages_bulk()
4475 	pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];  in __alloc_pages_bulk()
4498 			list_add(&page->lru, page_list);  in __alloc_pages_bulk()
4508 	zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);  in __alloc_pages_bulk()
4520 			list_add(&page->lru, page_list);  in __alloc_pages_bulk()
4566 	alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);  in __alloc_pages()
4578 	 * &cpuset_current_mems_allowed to optimize the fast-path attempt.  in __alloc_pages()
4630  * __free_pages - Free pages allocated with alloc_pages().
4634  * This function can free multi-page allocations that are not compound
4640  * by put_page() which only frees the first page of a non-compound
4657 		while (order-- > 0)  in __free_pages()
4674  *  An arbitrary-length arbitrary-offset area of memory which resides
4681  * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
4694 	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;  in __page_frag_cache_refill()
4699 	nc->va = page ? page_address(page) : NULL;  in __page_frag_cache_refill()
4721 	if (unlikely(!nc->va)) {  in page_frag_alloc_align()
4729 		size = nc->size;  in page_frag_alloc_align()
4737 		nc->pfmemalloc = page_is_pfmemalloc(page);  in page_frag_alloc_align()
4738 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;  in page_frag_alloc_align()
4739 		nc->offset = size;  in page_frag_alloc_align()
4742 	offset = nc->offset - fragsz;  in page_frag_alloc_align()
4744 		page = virt_to_page(nc->va);  in page_frag_alloc_align()
4746 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))  in page_frag_alloc_align()
4749 		if (unlikely(nc->pfmemalloc)) {  in page_frag_alloc_align()
4756 		size = nc->size;  in page_frag_alloc_align()
4762 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;  in page_frag_alloc_align()
4763 		offset = size - fragsz;  in page_frag_alloc_align()
4778 	nc->pagecnt_bias--;  in page_frag_alloc_align()
4780 	nc->offset = offset;  in page_frag_alloc_align()
4782 	return nc->va + offset;  in page_frag_alloc_align()
4808 		while (page < --last)  in make_alloc_exact()
4819  * alloc_pages_exact - allocate an exact number physically-contiguous pages.
4825  * allocate memory in power-of-two pages.
4847  * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
4873  * free_pages_exact - release memory allocated via alloc_pages_exact()
4892  * nr_free_zone_pages - count number of pages beyond high watermark
4899  *     nr_free_zone_pages = managed_pages - high_pages
4917 			sum += size - high;  in nr_free_zone_pages()
4924  * nr_free_buffer_pages - count number of pages beyond high watermark
4940 	zoneref->zone = zone;  in zoneref_set_zone()
4941 	zoneref->zone_idx = zone_idx(zone);  in zoneref_set_zone()
4956 		zone_type--;  in build_zonerefs_node()
4957 		zone = pgdat->node_zones + zone_type;  in build_zonerefs_node()
4979 		return -EINVAL;  in __parse_numa_zonelist_order()
5000  * find_next_best_node - find the next node that should appear in a given node's fallback list
5038 		/* Penalize nodes under us ("prefer the next node") */  in find_next_best_node()
5064  * This results in maximum locality--normal zone overflows into local
5065  * DMA zone, if any--but risks exhausting DMA zone.
5073 	zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;  in build_zonelists_in_node_order()
5083 	zonerefs->zone = NULL;  in build_zonelists_in_node_order()
5084 	zonerefs->zone_idx = 0;  in build_zonelists_in_node_order()
5095 	zonerefs = pgdat->node_zonelists[ZONELIST_NOFALLBACK]._zonerefs;  in build_thisnode_zonelists()
5098 	zonerefs->zone = NULL;  in build_thisnode_zonelists()
5099 	zonerefs->zone_idx = 0;  in build_thisnode_zonelists()
5116 	/* NUMA-aware ordering of nodes */  in build_zonelists()
5117 	local_node = pgdat->node_id;  in build_zonelists()
5125 		 * distance group to make it round-robin.  in build_zonelists()
5157 	return zone_to_nid(z->zone);  in local_memory_node()
5171 	local_node = pgdat->node_id;  in build_zonelists()
5173 	zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;  in build_zonelists()
5198 	zonerefs->zone = NULL;  in build_zonelists()
5199 	zonerefs->zone_idx = 0;  in build_zonelists()
5240 	 * trying to hold port->lock, for  in __build_all_zonelists()
5242 	 * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.  in __build_all_zonelists()
5252 	 * building zonelists is fine - no need to touch other nodes.  in __build_all_zonelists()
5254 	if (self && !node_online(self->node_id)) {  in __build_all_zonelists()
5269 		 * We now know the "local memory node" for each node--  in __build_all_zonelists()
5271 		 * Set up numa_mem percpu variable for on-line cpus.  During  in __build_all_zonelists()
5272 		 * boot, only the boot cpu should be on-line;  we'll init the  in __build_all_zonelists()
5273 		 * secondary cpus' numa_mem as they come on-line.  During  in __build_all_zonelists()
5274 		 * node/memory hotplug, we'll fixup all on-line cpus.  in __build_all_zonelists()
5303 	 * (a chicken-egg dilemma).  in build_all_zonelists_init()
5333 	 * more accurate, but expensive to check per-zone. This check is  in build_all_zonelists()
5334 	 * made on memory-hotadd so a system can start with mobility  in build_all_zonelists()
5368 	 * Clamp the batch to a 2^n - 1 value. Having a power  in zone_batchsize()
5377 	batch = rounddown_pow_of_two(batch + batch/2) - 1;  in zone_batchsize()
5391 	 * can be a significant delay between the individual batches being  in zone_batchsize()
5393 	 * fragmented and becoming unavailable for high-order allocations.  in zone_batchsize()
5450  * pcp->high and pcp->batch values are related and generally batch is lower
5451  * than high. They are also related to pcp->count such that count is lower
5457  * store tearing. Any new users of pcp->batch, pcp->high_min and pcp->high_max
5459  * fully trust only the pcp->count field on the local CPU with interrupts
5469 	WRITE_ONCE(pcp->batch, batch);  in pageset_update()
5470 	WRITE_ONCE(pcp->high_min, high_min);  in pageset_update()
5471 	WRITE_ONCE(pcp->high_max, high_max);  in pageset_update()
5481 	spin_lock_init(&pcp->lock);  in per_cpu_pages_init()
5483 		INIT_LIST_HEAD(&pcp->lists[pindex]);  in per_cpu_pages_init()
5491 	pcp->high_min = BOOT_PAGESET_HIGH;  in per_cpu_pages_init()
5492 	pcp->high_max = BOOT_PAGESET_HIGH;  in per_cpu_pages_init()
5493 	pcp->batch = BOOT_PAGESET_BATCH;  in per_cpu_pages_init()
5494 	pcp->free_count = 0;  in per_cpu_pages_init()
5504 		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);  in __zone_set_pageset_high_and_batch()
5510  * Calculate and set new high and batch values for all per-cpu pagesets of a
5522 		 * PCP high is tuned manually, disable auto-tuning via  in zone_set_pageset_high_and_batch()
5532 	if (zone->pageset_high_min == new_high_min &&  in zone_set_pageset_high_and_batch()
5533 	    zone->pageset_high_max == new_high_max &&  in zone_set_pageset_high_and_batch()
5534 	    zone->pageset_batch == new_batch)  in zone_set_pageset_high_and_batch()
5537 	zone->pageset_high_min = new_high_min;  in zone_set_pageset_high_and_batch()
5538 	zone->pageset_high_max = new_high_max;  in zone_set_pageset_high_and_batch()
5539 	zone->pageset_batch = new_batch;  in zone_set_pageset_high_and_batch()
5551 		zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);  in setup_zone_pageset()
5553 	zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages);  in setup_zone_pageset()
5558 		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);  in setup_zone_pageset()
5559 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);  in setup_zone_pageset()
5584 		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);  in zone_pcp_update_cacheinfo()
5587 		 * If data cache slice of CPU is large enough, "pcp->batch"  in zone_pcp_update_cacheinfo()
5589 		 * consecutive high-order pages freeing without allocation.  in zone_pcp_update_cacheinfo()
5591 		 * cache-hot pages sharing.  in zone_pcp_update_cacheinfo()
5593 		spin_lock(&pcp->lock);  in zone_pcp_update_cacheinfo()
5594 		if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)  in zone_pcp_update_cacheinfo()
5595 			pcp->flags |= PCPF_FREE_HIGH_BATCH;  in zone_pcp_update_cacheinfo()
5597 			pcp->flags &= ~PCPF_FREE_HIGH_BATCH;  in zone_pcp_update_cacheinfo()
5598 		spin_unlock(&pcp->lock);  in zone_pcp_update_cacheinfo()
5632 		memset(pzstats->vm_numa_event, 0,  in setup_per_cpu_pageset()
5633 		       sizeof(pzstats->vm_numa_event));  in setup_per_cpu_pageset()
5638 		pgdat->per_cpu_nodestats =  in setup_per_cpu_pageset()
5649 	zone->per_cpu_pageset = &boot_pageset;  in zone_pcp_init()
5650 	zone->per_cpu_zonestats = &boot_zonestats;  in zone_pcp_init()
5651 	zone->pageset_high_min = BOOT_PAGESET_HIGH;  in zone_pcp_init()
5652 	zone->pageset_high_max = BOOT_PAGESET_HIGH;  in zone_pcp_init()
5653 	zone->pageset_batch = BOOT_PAGESET_BATCH;  in zone_pcp_init()
5656 		pr_debug("  %s zone: %lu pages, LIFO batch:%u\n", zone->name,  in zone_pcp_init()
5657 			 zone->present_pages, zone_batchsize(zone));  in zone_pcp_init()
5662 	atomic_long_add(count, &page_zone(page)->managed_pages);  in adjust_managed_page_count()
5691 		 * Perform a kasan-unchecked memset() since this memory  in free_reserved_area()
5759  * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
5770 		pgdat->totalreserve_pages = 0;  in calculate_totalreserve_pages()
5773 			struct zone *zone = pgdat->node_zones + i;  in calculate_totalreserve_pages()
5779 				if (zone->lowmem_reserve[j] > max)  in calculate_totalreserve_pages()
5780 					max = zone->lowmem_reserve[j];  in calculate_totalreserve_pages()
5789 			pgdat->totalreserve_pages += max;  in calculate_totalreserve_pages()
5798  * setup_per_zone_lowmem_reserve - called whenever
5809 		for (i = 0; i < MAX_NR_ZONES - 1; i++) {  in setup_per_zone_lowmem_reserve()
5810 			struct zone *zone = &pgdat->node_zones[i];  in setup_per_zone_lowmem_reserve()
5816 				struct zone *upper_zone = &pgdat->node_zones[j];  in setup_per_zone_lowmem_reserve()
5821 					zone->lowmem_reserve[j] = 0;  in setup_per_zone_lowmem_reserve()
5823 					zone->lowmem_reserve[j] = managed_pages / ratio;  in setup_per_zone_lowmem_reserve()
5834 	unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);  in __setup_per_zone_wmarks()
5848 		spin_lock_irqsave(&zone->lock, flags);  in __setup_per_zone_wmarks()
5857 			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)  in __setup_per_zone_wmarks()
5865 			zone->_watermark[WMARK_MIN] = min_pages;  in __setup_per_zone_wmarks()
5871 			zone->_watermark[WMARK_MIN] = tmp;  in __setup_per_zone_wmarks()
5883 		zone->watermark_boost = 0;  in __setup_per_zone_wmarks()
5884 		zone->_watermark[WMARK_LOW]  = min_wmark_pages(zone) + tmp;  in __setup_per_zone_wmarks()
5885 		zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp;  in __setup_per_zone_wmarks()
5886 		zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp;  in __setup_per_zone_wmarks()
5888 		spin_unlock_irqrestore(&zone->lock, flags);  in __setup_per_zone_wmarks()
5896  * setup_per_zone_wmarks - called when min_free_kbytes changes
5897  * or when memory is hot-{added|removed}
5978  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so  in postcore_initcall()
6020 		pgdat->min_unmapped_pages = 0;  in setup_min_unmapped_ratio()
6023 		zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *  in setup_min_unmapped_ratio()
6048 		pgdat->min_slab_pages = 0;  in setup_min_slab_ratio()
6051 		zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *  in setup_min_slab_ratio()
6071  * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
6096  * percpu_pagelist_high_fraction - changes the pcp->high for each zone on each
6118 		ret = -EINVAL;  in percpu_pagelist_high_fraction_sysctl_handler()
6210 /* Usage: See admin-guide/dynamic-debug-howto.rst */
6234 		.nid = zone_to_nid(cc->zone),  in __alloc_contig_migrate_range()
6240 	while (pfn < end || !list_empty(&cc->migratepages)) {  in __alloc_contig_migrate_range()
6242 			ret = -EINTR;  in __alloc_contig_migrate_range()
6246 		if (list_empty(&cc->migratepages)) {  in __alloc_contig_migrate_range()
6247 			cc->nr_migratepages = 0;  in __alloc_contig_migrate_range()
6249 			if (ret && ret != -EAGAIN)  in __alloc_contig_migrate_range()
6251 			pfn = cc->migrate_pfn;  in __alloc_contig_migrate_range()
6254 			ret = -EBUSY;  in __alloc_contig_migrate_range()
6258 		nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,  in __alloc_contig_migrate_range()
6259 							&cc->migratepages);  in __alloc_contig_migrate_range()
6260 		cc->nr_migratepages -= nr_reclaimed;  in __alloc_contig_migrate_range()
6262 		ret = migrate_pages(&cc->migratepages, alloc_migration_target,  in __alloc_contig_migrate_range()
6263 			NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);  in __alloc_contig_migrate_range()
6266 		 * On -ENOMEM, migrate_pages() bails out right away. It is pointless  in __alloc_contig_migrate_range()
6269 		if (ret == -ENOMEM)  in __alloc_contig_migrate_range()
6275 		if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY)  in __alloc_contig_migrate_range()
6276 			alloc_contig_dump_pages(&cc->migratepages);  in __alloc_contig_migrate_range()
6277 		putback_movable_pages(&cc->migratepages);  in __alloc_contig_migrate_range()
6284  * alloc_contig_range() -- tries to allocate given range of pages
6286  * @end:	one-past-the-last PFN to allocate
6313 		.order = -1,  in alloc_contig_range()
6338 	 * This lets us mark the pageblocks back as  in alloc_contig_range()
6351 	 * In case of -EBUSY, we'd like to know which page causes problem.  in alloc_contig_range()
6358 	 * -EBUSY is not accidentally used or returned to caller.  in alloc_contig_range()
6361 	if (ret && ret != -EBUSY)  in alloc_contig_range()
6378 	 * We don't have to hold zone->lock here because the pages are  in alloc_contig_range()
6407 		ret = -EBUSY;  in alloc_contig_range()
6414 		ret = -EBUSY;  in alloc_contig_range()
6420 		free_contig_range(outer_start, start - outer_start);  in alloc_contig_range()
6422 		free_contig_range(end, outer_end - end);  in alloc_contig_range()
6465 	unsigned long last_pfn = start_pfn + nr_pages - 1;  in zone_spans_last_pfn()
6471  * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
6502 		spin_lock_irqsave(&zone->lock, flags);  in alloc_contig_pages()
6504 		pfn = ALIGN(zone->zone_start_pfn, nr_pages);  in alloc_contig_pages()
6514 				spin_unlock_irqrestore(&zone->lock, flags);  in alloc_contig_pages()
6519 				spin_lock_irqsave(&zone->lock, flags);  in alloc_contig_pages()
6523 		spin_unlock_irqrestore(&zone->lock, flags);  in alloc_contig_pages()
6533 	for (; nr_pages--; pfn++) {  in free_contig_range()
6560 	__zone_set_pageset_high_and_batch(zone, zone->pageset_high_min,  in zone_pcp_enable()
6561 		zone->pageset_high_max, zone->pageset_batch);  in zone_pcp_enable()
6570 	if (zone->per_cpu_pageset != &boot_pageset) {  in zone_pcp_reset()
6572 			pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);  in zone_pcp_reset()
6575 		free_percpu(zone->per_cpu_pageset);  in zone_pcp_reset()
6576 		zone->per_cpu_pageset = &boot_pageset;  in zone_pcp_reset()
6577 		if (zone->per_cpu_zonestats != &boot_zonestats) {  in zone_pcp_reset()
6578 			free_percpu(zone->per_cpu_zonestats);  in zone_pcp_reset()
6579 			zone->per_cpu_zonestats = &boot_zonestats;  in zone_pcp_reset()
6599 	spin_lock_irqsave(&zone->lock, flags);  in __offline_isolated_pages()
6627 	spin_unlock_irqrestore(&zone->lock, flags);  in __offline_isolated_pages()
6640 		struct page *page_head = page - (pfn & ((1 << order) - 1));  in is_free_buddy_page()
6653  * Break down a higher-order page in sub-pages, and keep our target out of
6664 		high--;  in break_down_buddy_pages()
6693 	spin_lock_irqsave(&zone->lock, flags);  in take_page_off_buddy()
6695 		struct page *page_head = page - (pfn & ((1 << order) - 1));  in take_page_off_buddy()
6708 				__mod_zone_freepage_state(zone, -1, migratetype);  in take_page_off_buddy()
6715 	spin_unlock_irqrestore(&zone->lock, flags);  in take_page_off_buddy()
6730 	spin_lock_irqsave(&zone->lock, flags);  in put_page_back_buddy()
6738 	spin_unlock_irqrestore(&zone->lock, flags);  in put_page_back_buddy()
6750 		struct zone *zone = &pgdat->node_zones[ZONE_DMA];  in has_managed_dma()
6775 		return -EINVAL;  in accept_memory_parse()
6801 	if (list_empty(&zone->unaccepted_pages))  in try_to_accept_memory_one()
6804 	spin_lock_irqsave(&zone->lock, flags);  in try_to_accept_memory_one()
6805 	page = list_first_entry_or_null(&zone->unaccepted_pages,  in try_to_accept_memory_one()
6808 		spin_unlock_irqrestore(&zone->lock, flags);  in try_to_accept_memory_one()
6812 	list_del(&page->lru);  in try_to_accept_memory_one()
6813 	last = list_empty(&zone->unaccepted_pages);  in try_to_accept_memory_one()
6815 	__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);  in try_to_accept_memory_one()
6816 	__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);  in try_to_accept_memory_one()
6817 	spin_unlock_irqrestore(&zone->lock, flags);  in try_to_accept_memory_one()
6835 	to_accept = high_wmark_pages(zone) -  in try_to_accept_memory()
6836 		    (zone_page_state(zone, NR_FREE_PAGES) -  in try_to_accept_memory()
6844 		to_accept -= MAX_ORDER_NR_PAGES;  in try_to_accept_memory()
6864 	spin_lock_irqsave(&zone->lock, flags);  in __free_unaccepted()
6865 	first = list_empty(&zone->unaccepted_pages);  in __free_unaccepted()
6866 	list_add_tail(&page->lru, &zone->unaccepted_pages);  in __free_unaccepted()
6869 	spin_unlock_irqrestore(&zone->lock, flags);  in __free_unaccepted()