Blame - mm/memory_hotplug.c - yocto/kernel/common

blob: 248e2ba4ac5900b6d347854ac989a1f9f71343c2 [file] [log] [blame]

Thomas Gleixner	457c899	2019-05-19 13:08:55 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	2	/*
				3	* linux/mm/memory_hotplug.c
				4	*
				5	* Copyright (C)
				6	*/
				7
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	8	#include <linux/stddef.h>
				9	#include <linux/mm.h>
Ingo Molnar	174cd4b	2017-02-02 19:15:33 +0100	[diff] [blame]	10	#include <linux/sched/signal.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	11	#include <linux/swap.h>
				12	#include <linux/interrupt.h>
				13	#include <linux/pagemap.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	14	#include <linux/compiler.h>
Paul Gortmaker	b95f1b31	2011-10-16 02:01:52 -0400	[diff] [blame]	15	#include <linux/export.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	16	#include <linux/pagevec.h>
Chandra Seetharaman	2d1d43f	2006-09-29 02:01:25 -0700	[diff] [blame]	17	#include <linux/writeback.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	18	#include <linux/slab.h>
				19	#include <linux/sysctl.h>
				20	#include <linux/cpu.h>
				21	#include <linux/memory.h>
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	22	#include <linux/memremap.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	23	#include <linux/memory_hotplug.h>
				24	#include <linux/highmem.h>
				25	#include <linux/vmalloc.h>
KAMEZAWA Hiroyuki	0a54703	2006-06-27 02:53:35 -0700	[diff] [blame]	26	#include <linux/ioport.h>
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	27	#include <linux/delay.h>
				28	#include <linux/migrate.h>
				29	#include <linux/page-isolation.h>
Badari Pulavarty	7108878	2008-10-18 20:25:58 -0700	[diff] [blame]	30	#include <linux/pfn.h>
Andi Kleen	6ad696d	2009-11-17 14:06:22 -0800	[diff] [blame]	31	#include <linux/suspend.h>
KOSAKI Motohiro	6d9c285	2009-12-14 17:58:11 -0800	[diff] [blame]	32	#include <linux/mm_inline.h>
akpm@linux-foundation.org	d96ae53	2010-03-05 13:41:58 -0800	[diff] [blame]	33	#include <linux/firmware-map.h>
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	34	#include <linux/stop_machine.h>
Naoya Horiguchi	c8721bb	2013-09-11 14:22:09 -0700	[diff] [blame]	35	#include <linux/hugetlb.h>
Tang Chen	c532092	2013-11-12 15:08:10 -0800	[diff] [blame]	36	#include <linux/memblock.h>
Vlastimil Babka	698b1b3	2016-03-17 14:18:08 -0700	[diff] [blame]	37	#include <linux/compaction.h>
Michal Hocko	b15c872	2018-12-28 00:38:01 -0800	[diff] [blame]	38	#include <linux/rmap.h>
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	39
				40	#include <asm/tlbflush.h>
				41
Adrian Bunk	1e5ad9a	2008-04-28 20:40:08 +0300	[diff] [blame]	42	#include "internal.h"
Dan Williams	e900a91	2019-05-14 15:41:28 -0700	[diff] [blame]	43	#include "shuffle.h"
Adrian Bunk	1e5ad9a	2008-04-28 20:40:08 +0300	[diff] [blame]	44
Oscar Salvador	e3a9d9f	2021-05-04 18:39:48 -0700	[diff] [blame]	45
				46	/*
				47	* memory_hotplug.memmap_on_memory parameter
				48	*/
				49	static bool memmap_on_memory __ro_after_init;
				50	#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
				51	module_param(memmap_on_memory, bool, 0444);
				52	MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
				53	#endif
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	54
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	55	enum {
				56	ONLINE_POLICY_CONTIG_ZONES = 0,
				57	ONLINE_POLICY_AUTO_MOVABLE,
				58	};
				59
				60	const char *online_policy_to_str[] = {
				61	[ONLINE_POLICY_CONTIG_ZONES] = "contig-zones",
				62	[ONLINE_POLICY_AUTO_MOVABLE] = "auto-movable",
				63	};
				64
				65	static int set_online_policy(const char val, const struct kernel_param kp)
				66	{
				67	int ret = sysfs_match_string(online_policy_to_str, val);
				68
				69	if (ret < 0)
				70	return ret;
				71	((int )kp->arg) = ret;
				72	return 0;
				73	}
				74
				75	static int get_online_policy(char buffer, const struct kernel_param kp)
				76	{
				77	return sprintf(buffer, "%s\n", online_policy_to_str[((int )kp->arg)]);
				78	}
				79
				80	/*
				81	* memory_hotplug.online_policy: configure online behavior when onlining without
				82	* specifying a zone (MMOP_ONLINE)
				83	*
				84	* "contig-zones": keep zone contiguous
				85	* "auto-movable": online memory to ZONE_MOVABLE if the configuration
				86	* (auto_movable_ratio, auto_movable_numa_aware) allows for it
				87	*/
				88	static int online_policy __read_mostly = ONLINE_POLICY_CONTIG_ZONES;
				89	static const struct kernel_param_ops online_policy_ops = {
				90	.set = set_online_policy,
				91	.get = get_online_policy,
				92	};
				93	module_param_cb(online_policy, &online_policy_ops, &online_policy, 0644);
				94	MODULE_PARM_DESC(online_policy,
				95	"Set the online policy (\"contig-zones\", \"auto-movable\") "
				96	"Default: \"contig-zones\"");
				97
				98	/*
				99	* memory_hotplug.auto_movable_ratio: specify maximum MOVABLE:KERNEL ratio
				100	*
				101	* The ratio represent an upper limit and the kernel might decide to not
				102	* online some memory to ZONE_MOVABLE -- e.g., because hotplugged KERNEL memory
				103	* doesn't allow for more MOVABLE memory.
				104	*/
				105	static unsigned int auto_movable_ratio __read_mostly = 301;
				106	module_param(auto_movable_ratio, uint, 0644);
				107	MODULE_PARM_DESC(auto_movable_ratio,
				108	"Set the maximum ratio of MOVABLE:KERNEL memory in the system "
				109	"in percent for \"auto-movable\" online policy. Default: 301");
				110
				111	/*
				112	* memory_hotplug.auto_movable_numa_aware: consider numa node stats
				113	*/
				114	#ifdef CONFIG_NUMA
				115	static bool auto_movable_numa_aware __read_mostly = true;
				116	module_param(auto_movable_numa_aware, bool, 0644);
				117	MODULE_PARM_DESC(auto_movable_numa_aware,
				118	"Consider numa node stats in addition to global stats in "
				119	"\"auto-movable\" online policy. Default: true");
				120	#endif /* CONFIG_NUMA */
				121
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	122	/*
				123	* online_page_callback contains pointer to current page onlining function.
				124	* Initially it is generic_online_page(). If it is required it could be
				125	* changed by calling set_online_page_callback() for callback registration
				126	* and restore_online_page_callback() for generic callback restore.
				127	*/
				128
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	129	static online_page_callback_t online_page_callback = generic_online_page;
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	130	static DEFINE_MUTEX(online_page_callback_lock);
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	131
Thomas Gleixner	3f906ba	2017-07-10 15:50:09 -0700	[diff] [blame]	132	DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
KOSAKI Motohiro	20d6c96	2010-12-02 14:31:19 -0800	[diff] [blame]	133
Thomas Gleixner	3f906ba	2017-07-10 15:50:09 -0700	[diff] [blame]	134	void get_online_mems(void)
				135	{
				136	percpu_down_read(&mem_hotplug_lock);
				137	}
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	138
Thomas Gleixner	3f906ba	2017-07-10 15:50:09 -0700	[diff] [blame]	139	void put_online_mems(void)
				140	{
				141	percpu_up_read(&mem_hotplug_lock);
				142	}
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	143
Michal Hocko	4932381	2017-07-06 15:41:05 -0700	[diff] [blame]	144	bool movable_node_enabled = false;
				145
Vitaly Kuznetsov	8604d9e	2016-05-19 17:13:03 -0700	[diff] [blame]	146	#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	147	int mhp_default_online_type = MMOP_OFFLINE;
Vitaly Kuznetsov	8604d9e	2016-05-19 17:13:03 -0700	[diff] [blame]	148	#else
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	149	int mhp_default_online_type = MMOP_ONLINE;
Vitaly Kuznetsov	8604d9e	2016-05-19 17:13:03 -0700	[diff] [blame]	150	#endif
Vitaly Kuznetsov	31bc385	2016-03-15 14:56:48 -0700	[diff] [blame]	151
Vitaly Kuznetsov	86dd995	2016-05-19 17:13:06 -0700	[diff] [blame]	152	static int __init setup_memhp_default_state(char *str)
				153	{
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	154	const int online_type = mhp_online_type_from_str(str);
David Hildenbrand	5f47adf	2020-04-06 20:07:44 -0700	[diff] [blame]	155
				156	if (online_type >= 0)
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	157	mhp_default_online_type = online_type;
Vitaly Kuznetsov	86dd995	2016-05-19 17:13:06 -0700	[diff] [blame]	158
				159	return 1;
				160	}
				161	__setup("memhp_default_state=", setup_memhp_default_state);
				162
David Rientjes	30467e0	2015-04-14 15:45:11 -0700	[diff] [blame]	163	void mem_hotplug_begin(void)
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	164	{
Thomas Gleixner	3f906ba	2017-07-10 15:50:09 -0700	[diff] [blame]	165	cpus_read_lock();
				166	percpu_down_write(&mem_hotplug_lock);
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	167	}
				168
David Rientjes	30467e0	2015-04-14 15:45:11 -0700	[diff] [blame]	169	void mem_hotplug_done(void)
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	170	{
Thomas Gleixner	3f906ba	2017-07-10 15:50:09 -0700	[diff] [blame]	171	percpu_up_write(&mem_hotplug_lock);
				172	cpus_read_unlock();
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	173	}
KOSAKI Motohiro	20d6c96	2010-12-02 14:31:19 -0800	[diff] [blame]	174
Juergen Gross	357b4da	2019-02-14 11:42:39 +0100	[diff] [blame]	175	u64 max_mem_size = U64_MAX;
				176
Keith Mannthey	45e0b78	2006-09-30 23:27:09 -0700	[diff] [blame]	177	/* add this memory to iomem resource */
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	178	static struct resource *register_memory_resource(u64 start, u64 size,
				179	const char *resource_name)
Keith Mannthey	45e0b78	2006-09-30 23:27:09 -0700	[diff] [blame]	180	{
Dave Hansen	2794129	2019-02-25 10:57:36 -0800	[diff] [blame]	181	struct resource *res;
				182	unsigned long flags = IORESOURCE_SYSTEM_RAM \| IORESOURCE_BUSY;
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	183
				184	if (strcmp(resource_name, "System RAM"))
David Hildenbrand	7cf603d	2020-10-15 20:08:33 -0700	[diff] [blame]	185	flags \|= IORESOURCE_SYSRAM_DRIVER_MANAGED;
Juergen Gross	357b4da	2019-02-14 11:42:39 +0100	[diff] [blame]	186
Anshuman Khandual	bca3fea	2021-02-25 17:17:33 -0800	[diff] [blame]	187	if (!mhp_range_allowed(start, size, true))
				188	return ERR_PTR(-E2BIG);
				189
Baoquan He	f3cd4c8	2020-04-06 20:06:50 -0700	[diff] [blame]	190	/*
				191	* Make sure value parsed from 'mem=' only restricts memory adding
				192	* while booting, so that memory hotplug won't be impacted. Please
				193	* refer to document of 'mem=' in kernel-parameters.txt for more
				194	* details.
				195	*/
				196	if (start + size > max_mem_size && system_state < SYSTEM_RUNNING)
Juergen Gross	357b4da	2019-02-14 11:42:39 +0100	[diff] [blame]	197	return ERR_PTR(-E2BIG);
				198
Dave Hansen	2794129	2019-02-25 10:57:36 -0800	[diff] [blame]	199	/*
				200	* Request ownership of the new memory range. This might be
				201	* a child of an existing resource that was present but
				202	* not marked as busy.
				203	*/
				204	res = __request_region(&iomem_resource, start, size,
				205	resource_name, flags);
Keith Mannthey	45e0b78	2006-09-30 23:27:09 -0700	[diff] [blame]	206
Dave Hansen	2794129	2019-02-25 10:57:36 -0800	[diff] [blame]	207	if (!res) {
				208	pr_debug("Unable to reserve System RAM region: %016llx->%016llx\n",
				209	start, start + size);
Vitaly Kuznetsov	6f754ba	2016-01-14 15:21:55 -0800	[diff] [blame]	210	return ERR_PTR(-EEXIST);
Keith Mannthey	45e0b78	2006-09-30 23:27:09 -0700	[diff] [blame]	211	}
				212	return res;
				213	}
				214
				215	static void release_memory_resource(struct resource *res)
				216	{
				217	if (!res)
				218	return;
				219	release_resource(res);
				220	kfree(res);
Keith Mannthey	45e0b78	2006-09-30 23:27:09 -0700	[diff] [blame]	221	}
				222
Keith Mannthey	5394702	2006-09-30 23:27:08 -0700	[diff] [blame]	223	#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	224	static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
				225	const char *reason)
				226	{
				227	/*
				228	* Disallow all operations smaller than a sub-section and only
				229	* allow operations smaller than a section for
				230	* SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range()
				231	* enforces a larger memory_block_size_bytes() granularity for
				232	* memory that will be marked online, so this check should only
				233	* fire for direct arch_{add,remove}_memory() users outside of
				234	* add_memory_resource().
				235	*/
				236	unsigned long min_align;
				237
				238	if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
				239	min_align = PAGES_PER_SUBSECTION;
				240	else
				241	min_align = PAGES_PER_SECTION;
				242	if (!IS_ALIGNED(pfn, min_align)
				243	\|\| !IS_ALIGNED(nr_pages, min_align)) {
				244	WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n",
				245	reason, pfn, pfn + nr_pages - 1);
				246	return -EINVAL;
				247	}
				248	return 0;
				249	}
				250
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	251	/*
Dan Williams	9f605f2	2021-02-25 17:16:57 -0800	[diff] [blame]	252	* Return page for the valid pfn only if the page is online. All pfn
				253	* walkers which rely on the fully initialized page->flags and others
				254	* should use this rather than pfn_valid && pfn_to_page
				255	*/
				256	struct page *pfn_to_online_page(unsigned long pfn)
				257	{
				258	unsigned long nr = pfn_to_section_nr(pfn);
Dan Williams	1f90a34	2021-02-25 17:17:05 -0800	[diff] [blame]	259	struct dev_pagemap *pgmap;
Dan Williams	9f9b02e	2021-02-25 17:17:01 -0800	[diff] [blame]	260	struct mem_section *ms;
Dan Williams	9f605f2	2021-02-25 17:16:57 -0800	[diff] [blame]	261
Dan Williams	9f9b02e	2021-02-25 17:17:01 -0800	[diff] [blame]	262	if (nr >= NR_MEM_SECTIONS)
				263	return NULL;
				264
				265	ms = __nr_to_section(nr);
				266	if (!online_section(ms))
				267	return NULL;
				268
				269	/*
				270	* Save some code text when online_section() +
				271	* pfn_section_valid() are sufficient.
				272	*/
				273	if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
				274	return NULL;
				275
				276	if (!pfn_section_valid(ms, pfn))
				277	return NULL;
				278
Dan Williams	1f90a34	2021-02-25 17:17:05 -0800	[diff] [blame]	279	if (!online_device_section(ms))
				280	return pfn_to_page(pfn);
				281
				282	/*
				283	* Slowpath: when ZONE_DEVICE collides with
				284	* ZONE_{NORMAL,MOVABLE} within the same section some pfns in
				285	* the section may be 'offline' but 'valid'. Only
				286	* get_dev_pagemap() can determine sub-section online status.
				287	*/
				288	pgmap = get_dev_pagemap(pfn, NULL);
				289	put_dev_pagemap(pgmap);
				290
				291	/* The presence of a pgmap indicates ZONE_DEVICE offline pfn */
				292	if (pgmap)
				293	return NULL;
				294
Dan Williams	9f9b02e	2021-02-25 17:17:01 -0800	[diff] [blame]	295	return pfn_to_page(pfn);
Dan Williams	9f605f2	2021-02-25 17:16:57 -0800	[diff] [blame]	296	}
				297	EXPORT_SYMBOL_GPL(pfn_to_online_page);
				298
				299	/*
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	300	* Reasonably generic function for adding memory. It is
				301	* expected that archs that support memory hotplug will
				302	* call this function after deciding the zone to which to
				303	* add the new pages.
				304	*/
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	305	int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
Logan Gunthorpe	f5637d3	2020-04-10 14:33:21 -0700	[diff] [blame]	306	struct mhp_params *params)
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	307	{
David Hildenbrand	6cdd0b3	2020-04-06 20:06:56 -0700	[diff] [blame]	308	const unsigned long end_pfn = pfn + nr_pages;
				309	unsigned long cur_nr_pages;
Dan Williams	9a84503	2019-07-18 15:58:43 -0700	[diff] [blame]	310	int err;
Logan Gunthorpe	f5637d3	2020-04-10 14:33:21 -0700	[diff] [blame]	311	struct vmem_altmap *altmap = params->altmap;
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	312
Logan Gunthorpe	bfeb022	2020-04-10 14:33:36 -0700	[diff] [blame]	313	if (WARN_ON_ONCE(!params->pgprot.pgprot))
				314	return -EINVAL;
				315
Anshuman Khandual	bca3fea	2021-02-25 17:17:33 -0800	[diff] [blame]	316	VM_BUG_ON(!mhp_range_allowed(PFN_PHYS(pfn), nr_pages * PAGE_SIZE, false));
Alastair D'Silva	dca4436	2019-11-30 17:53:48 -0800	[diff] [blame]	317
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	318	if (altmap) {
				319	/*
				320	* Validate altmap is within bounds of the total request
				321	*/
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	322	if (altmap->base_pfn != pfn
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	323	\|\| vmem_altmap_offset(altmap) > nr_pages) {
				324	pr_warn_once("memory add fail, invalid altmap\n");
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	325	return -EINVAL;
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	326	}
				327	altmap->alloc = 0;
				328	}
				329
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	330	err = check_pfn_span(pfn, nr_pages, "add");
				331	if (err)
				332	return err;
				333
David Hildenbrand	6cdd0b3	2020-04-06 20:06:56 -0700	[diff] [blame]	334	for (; pfn < end_pfn; pfn += cur_nr_pages) {
				335	/* Select all remaining pages up to the next section boundary */
				336	cur_nr_pages = min(end_pfn - pfn,
				337	SECTION_ALIGN_UP(pfn + 1) - pfn);
				338	err = sparse_add_section(nid, pfn, cur_nr_pages, altmap);
Dan Williams	ba72b4c	2019-07-18 15:58:26 -0700	[diff] [blame]	339	if (err)
				340	break;
Michal Hocko	f64ac5e	2017-10-03 16:16:16 -0700	[diff] [blame]	341	cond_resched();
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	342	}
Zhu Guihua	c435a39	2015-06-24 16:58:42 -0700	[diff] [blame]	343	vmemmap_populate_print_last();
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	344	return err;
				345	}
David Rientjes	4edd7ce	2013-04-29 15:08:22 -0700	[diff] [blame]	346
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	347	/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
YASUAKI ISHIMATSU	d09b013	2017-10-03 16:16:32 -0700	[diff] [blame]	348	static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	349	unsigned long start_pfn,
				350	unsigned long end_pfn)
				351	{
Dan Williams	49ba3c6	2019-07-18 15:58:07 -0700	[diff] [blame]	352	for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) {
David Hildenbrand	7ce700b	2019-11-21 17:53:56 -0800	[diff] [blame]	353	if (unlikely(!pfn_to_online_page(start_pfn)))
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	354	continue;
				355
				356	if (unlikely(pfn_to_nid(start_pfn) != nid))
				357	continue;
				358
David Hildenbrand	9b05158	2020-02-03 17:34:12 -0800	[diff] [blame]	359	if (zone != page_zone(pfn_to_page(start_pfn)))
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	360	continue;
				361
				362	return start_pfn;
				363	}
				364
				365	return 0;
				366	}
				367
				368	/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
YASUAKI ISHIMATSU	d09b013	2017-10-03 16:16:32 -0700	[diff] [blame]	369	static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	370	unsigned long start_pfn,
				371	unsigned long end_pfn)
				372	{
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	373	unsigned long pfn;
				374
				375	/* pfn is the end pfn of a memory section. */
				376	pfn = end_pfn - 1;
Dan Williams	49ba3c6	2019-07-18 15:58:07 -0700	[diff] [blame]	377	for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) {
David Hildenbrand	7ce700b	2019-11-21 17:53:56 -0800	[diff] [blame]	378	if (unlikely(!pfn_to_online_page(pfn)))
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	379	continue;
				380
				381	if (unlikely(pfn_to_nid(pfn) != nid))
				382	continue;
				383
David Hildenbrand	9b05158	2020-02-03 17:34:12 -0800	[diff] [blame]	384	if (zone != page_zone(pfn_to_page(pfn)))
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	385	continue;
				386
				387	return pfn;
				388	}
				389
				390	return 0;
				391	}
				392
				393	static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
				394	unsigned long end_pfn)
				395	{
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	396	unsigned long pfn;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	397	int nid = zone_to_nid(zone);
				398
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	399	if (zone->zone_start_pfn == start_pfn) {
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	400	/*
				401	* If the section is smallest section in the zone, it need
				402	* shrink zone->zone_start_pfn and zone->zone_spanned_pages.
				403	* In this case, we find second smallest valid mem_section
				404	* for shrinking zone.
				405	*/
				406	pfn = find_smallest_section_pfn(nid, zone, end_pfn,
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	407	zone_end_pfn(zone));
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	408	if (pfn) {
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	409	zone->spanned_pages = zone_end_pfn(zone) - pfn;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	410	zone->zone_start_pfn = pfn;
David Hildenbrand	950b68d	2020-02-03 17:34:16 -0800	[diff] [blame]	411	} else {
				412	zone->zone_start_pfn = 0;
				413	zone->spanned_pages = 0;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	414	}
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	415	} else if (zone_end_pfn(zone) == end_pfn) {
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	416	/*
				417	* If the section is biggest section in the zone, it need
				418	* shrink zone->spanned_pages.
				419	* In this case, we find second biggest valid mem_section for
				420	* shrinking zone.
				421	*/
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	422	pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	423	start_pfn);
				424	if (pfn)
David Hildenbrand	5d12071	2020-02-03 17:34:19 -0800	[diff] [blame]	425	zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
David Hildenbrand	950b68d	2020-02-03 17:34:16 -0800	[diff] [blame]	426	else {
				427	zone->zone_start_pfn = 0;
				428	zone->spanned_pages = 0;
				429	}
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	430	}
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	431	}
				432
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	433	static void update_pgdat_span(struct pglist_data *pgdat)
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	434	{
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	435	unsigned long node_start_pfn = 0, node_end_pfn = 0;
				436	struct zone *zone;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	437
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	438	for (zone = pgdat->node_zones;
				439	zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
Miaohe Lin	6c922cf	2021-02-25 17:17:21 -0800	[diff] [blame]	440	unsigned long end_pfn = zone_end_pfn(zone);
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	441
				442	/* No need to lock the zones, they can't change. */
David Hildenbrand	656d571	2019-11-05 21:17:10 -0800	[diff] [blame]	443	if (!zone->spanned_pages)
				444	continue;
				445	if (!node_end_pfn) {
				446	node_start_pfn = zone->zone_start_pfn;
Miaohe Lin	6c922cf	2021-02-25 17:17:21 -0800	[diff] [blame]	447	node_end_pfn = end_pfn;
David Hildenbrand	656d571	2019-11-05 21:17:10 -0800	[diff] [blame]	448	continue;
				449	}
				450
Miaohe Lin	6c922cf	2021-02-25 17:17:21 -0800	[diff] [blame]	451	if (end_pfn > node_end_pfn)
				452	node_end_pfn = end_pfn;
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	453	if (zone->zone_start_pfn < node_start_pfn)
				454	node_start_pfn = zone->zone_start_pfn;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	455	}
				456
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	457	pgdat->node_start_pfn = node_start_pfn;
				458	pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	459	}
				460
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	461	void __ref remove_pfn_range_from_zone(struct zone *zone,
				462	unsigned long start_pfn,
				463	unsigned long nr_pages)
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	464	{
Ben Widawsky	b7e3deb	2020-06-25 20:30:51 -0700	[diff] [blame]	465	const unsigned long end_pfn = start_pfn + nr_pages;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	466	struct pglist_data *pgdat = zone->zone_pgdat;
Oscar Salvador	27cacaa	2021-06-30 18:52:46 -0700	[diff] [blame]	467	unsigned long pfn, cur_nr_pages;
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	468
David Hildenbrand	d33695b	2020-02-03 17:34:09 -0800	[diff] [blame]	469	/* Poison struct pages because they are now uninitialized again. */
Ben Widawsky	b7e3deb	2020-06-25 20:30:51 -0700	[diff] [blame]	470	for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
				471	cond_resched();
				472
				473	/* Select all remaining pages up to the next section boundary */
				474	cur_nr_pages =
				475	min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
				476	page_init_poison(pfn_to_page(pfn),
				477	sizeof(struct page) * cur_nr_pages);
				478	}
David Hildenbrand	d33695b	2020-02-03 17:34:09 -0800	[diff] [blame]	479
David Hildenbrand	7ce700b	2019-11-21 17:53:56 -0800	[diff] [blame]	480	#ifdef CONFIG_ZONE_DEVICE
				481	/*
				482	* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
				483	* we will not try to shrink the zones - which is okay as
				484	* set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
				485	*/
				486	if (zone_idx(zone) == ZONE_DEVICE)
				487	return;
				488	#endif
				489
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	490	clear_zone_contiguous(zone);
				491
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	492	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
David Hildenbrand	00d6c01	2019-10-18 20:19:33 -0700	[diff] [blame]	493	update_pgdat_span(pgdat);
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	494
				495	set_zone_contiguous(zone);
Yasuaki Ishimatsu	815121d	2013-02-22 16:33:12 -0800	[diff] [blame]	496	}
				497
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	498	static void __remove_section(unsigned long pfn, unsigned long nr_pages,
				499	unsigned long map_offset,
				500	struct vmem_altmap *altmap)
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	501	{
chenqiwu	1040490	2020-04-06 20:07:48 -0700	[diff] [blame]	502	struct mem_section *ms = __pfn_to_section(pfn);
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	503
David Hildenbrand	9d1d887	2019-05-13 17:21:41 -0700	[diff] [blame]	504	if (WARN_ON_ONCE(!valid_section(ms)))
				505	return;
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	506
Dan Williams	ba72b4c	2019-07-18 15:58:26 -0700	[diff] [blame]	507	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	508	}
				509
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	510	/**
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	511	* __remove_pages() - remove sections of pages
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	512	* @pfn: starting pageframe (must be aligned to start of a section)
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	513	* @nr_pages: number of pages to remove (must be multiple of section size)
Mike Rapoport	e8b098f	2018-04-05 16:24:57 -0700	[diff] [blame]	514	* @altmap: alternative device page map or %NULL if default memmap is used
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	515	*
				516	* Generic helper function to remove section mappings and sysfs entries
				517	* for the section of the memory we are removing. Caller needs to make
				518	* sure that pages are marked reserved and zones are adjust properly by
				519	* calling offline_pages().
				520	*/
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	521	void __remove_pages(unsigned long pfn, unsigned long nr_pages,
				522	struct vmem_altmap *altmap)
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	523	{
David Hildenbrand	52fb87c	2020-02-03 17:34:23 -0800	[diff] [blame]	524	const unsigned long end_pfn = pfn + nr_pages;
				525	unsigned long cur_nr_pages;
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	526	unsigned long map_offset = 0;
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	527
Dan Williams	96da435	2019-07-18 15:58:15 -0700	[diff] [blame]	528	map_offset = vmem_altmap_offset(altmap);
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	529
Dan Williams	7ea6216	2019-07-18 15:58:22 -0700	[diff] [blame]	530	if (check_pfn_span(pfn, nr_pages, "remove"))
				531	return;
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	532
David Hildenbrand	52fb87c	2020-02-03 17:34:23 -0800	[diff] [blame]	533	for (; pfn < end_pfn; pfn += cur_nr_pages) {
Michal Hocko	dd33ad7	2018-11-02 15:48:46 -0700	[diff] [blame]	534	cond_resched();
David Hildenbrand	52fb87c	2020-02-03 17:34:23 -0800	[diff] [blame]	535	/* Select all remaining pages up to the next section boundary */
David Hildenbrand	a11b941	2020-04-06 20:06:53 -0700	[diff] [blame]	536	cur_nr_pages = min(end_pfn - pfn,
				537	SECTION_ALIGN_UP(pfn + 1) - pfn);
David Hildenbrand	52fb87c	2020-02-03 17:34:23 -0800	[diff] [blame]	538	__remove_section(pfn, cur_nr_pages, map_offset, altmap);
Dan Williams	4b94ffd	2016-01-15 16:56:22 -0800	[diff] [blame]	539	map_offset = 0;
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	540	}
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	541	}
Badari Pulavarty	ea01ea9	2008-04-28 02:12:01 -0700	[diff] [blame]	542
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	543	int set_online_page_callback(online_page_callback_t callback)
				544	{
				545	int rc = -EINVAL;
				546
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	547	get_online_mems();
				548	mutex_lock(&online_page_callback_lock);
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	549
				550	if (online_page_callback == generic_online_page) {
				551	online_page_callback = callback;
				552	rc = 0;
				553	}
				554
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	555	mutex_unlock(&online_page_callback_lock);
				556	put_online_mems();
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	557
				558	return rc;
				559	}
				560	EXPORT_SYMBOL_GPL(set_online_page_callback);
				561
				562	int restore_online_page_callback(online_page_callback_t callback)
				563	{
				564	int rc = -EINVAL;
				565
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	566	get_online_mems();
				567	mutex_lock(&online_page_callback_lock);
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	568
				569	if (online_page_callback == callback) {
				570	online_page_callback = generic_online_page;
				571	rc = 0;
				572	}
				573
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	574	mutex_unlock(&online_page_callback_lock);
				575	put_online_mems();
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	576
				577	return rc;
				578	}
				579	EXPORT_SYMBOL_GPL(restore_online_page_callback);
				580
David Hildenbrand	18db149	2019-11-30 17:53:51 -0800	[diff] [blame]	581	void generic_online_page(struct page *page, unsigned int order)
Daniel Kiper	9d0ad8c	2011-07-25 17:12:05 -0700	[diff] [blame]	582	{
Vlastimil Babka	c87cbc1	2020-03-05 22:28:42 -0800	[diff] [blame]	583	/*
				584	* Freeing the page with debug_pagealloc enabled will try to unmap it,
				585	* so we should map it first. This is better than introducing a special
				586	* case in page freeing fast path.
				587	*/
Mike Rapoport	77bc7fd	2020-12-14 19:10:20 -0800	[diff] [blame]	588	debug_pagealloc_map_pages(page, 1 << order);
Arun KS	a9cd410	2019-03-05 15:42:14 -0800	[diff] [blame]	589	__free_pages_core(page, order);
				590	totalram_pages_add(1UL << order);
				591	#ifdef CONFIG_HIGHMEM
				592	if (PageHighMem(page))
				593	totalhigh_pages_add(1UL << order);
				594	#endif
				595	}
David Hildenbrand	18db149	2019-11-30 17:53:51 -0800	[diff] [blame]	596	EXPORT_SYMBOL_GPL(generic_online_page);
Arun KS	a9cd410	2019-03-05 15:42:14 -0800	[diff] [blame]	597
David Hildenbrand	aac6532	2020-10-15 20:08:11 -0700	[diff] [blame]	598	static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages)
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	599	{
David Hildenbrand	b2c2ab2	2019-09-23 15:36:02 -0700	[diff] [blame]	600	const unsigned long end_pfn = start_pfn + nr_pages;
				601	unsigned long pfn;
Michal Hocko	2d070ea	2017-07-06 15:37:56 -0700	[diff] [blame]	602
David Hildenbrand	b2c2ab2	2019-09-23 15:36:02 -0700	[diff] [blame]	603	/*
David Hildenbrand	aac6532	2020-10-15 20:08:11 -0700	[diff] [blame]	604	* Online the pages in MAX_ORDER - 1 aligned chunks. The callback might
				605	* decide to not expose all pages to the buddy (e.g., expose them
				606	* later). We account all pages as being online and belonging to this
				607	* zone ("present").
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	608	* When using memmap_on_memory, the range might not be aligned to
				609	* MAX_ORDER_NR_PAGES - 1, but pageblock aligned. __ffs() will detect
				610	* this and the first chunk to online will be pageblock_nr_pages.
David Hildenbrand	b2c2ab2	2019-09-23 15:36:02 -0700	[diff] [blame]	611	*/
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	612	for (pfn = start_pfn; pfn < end_pfn;) {
				613	int order = min(MAX_ORDER - 1UL, __ffs(pfn));
				614
				615	(*online_page_callback)(pfn_to_page(pfn), order);
				616	pfn += (1UL << order);
				617	}
Michal Hocko	2d070ea	2017-07-06 15:37:56 -0700	[diff] [blame]	618
David Hildenbrand	b2c2ab2	2019-09-23 15:36:02 -0700	[diff] [blame]	619	/* mark all involved sections as online */
				620	online_mem_sections(start_pfn, end_pfn);
KAMEZAWA Hiroyuki	75884fb	2007-10-16 01:26:10 -0700	[diff] [blame]	621	}
				622
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	623	/* check which state of node_states will be changed when online memory */
				624	static void node_states_check_changes_online(unsigned long nr_pages,
				625	struct zone zone, struct memory_notify arg)
				626	{
				627	int nid = zone_to_nid(zone);
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	628
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	629	arg->status_change_nid = NUMA_NO_NODE;
				630	arg->status_change_nid_normal = NUMA_NO_NODE;
				631	arg->status_change_nid_high = NUMA_NO_NODE;
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	632
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	633	if (!node_state(nid, N_MEMORY))
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	634	arg->status_change_nid = nid;
Oscar Salvador	8efe33f	2018-10-26 15:07:34 -0700	[diff] [blame]	635	if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY))
				636	arg->status_change_nid_normal = nid;
				637	#ifdef CONFIG_HIGHMEM
Baoquan He	d3ba3ae	2019-05-13 17:17:35 -0700	[diff] [blame]	638	if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY))
Oscar Salvador	8efe33f	2018-10-26 15:07:34 -0700	[diff] [blame]	639	arg->status_change_nid_high = nid;
				640	#endif
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	641	}
				642
				643	static void node_states_set_node(int node, struct memory_notify *arg)
				644	{
				645	if (arg->status_change_nid_normal >= 0)
				646	node_set_state(node, N_NORMAL_MEMORY);
				647
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	648	if (arg->status_change_nid_high >= 0)
				649	node_set_state(node, N_HIGH_MEMORY);
				650
Oscar Salvador	83d8361	2018-10-26 15:07:25 -0700	[diff] [blame]	651	if (arg->status_change_nid >= 0)
				652	node_set_state(node, N_MEMORY);
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	653	}
				654
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	655	static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn,
				656	unsigned long nr_pages)
				657	{
				658	unsigned long old_end_pfn = zone_end_pfn(zone);
				659
				660	if (zone_is_empty(zone) \|\| start_pfn < zone->zone_start_pfn)
				661	zone->zone_start_pfn = start_pfn;
				662
				663	zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn;
				664	}
				665
				666	static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn,
				667	unsigned long nr_pages)
				668	{
				669	unsigned long old_end_pfn = pgdat_end_pfn(pgdat);
				670
				671	if (!pgdat->node_spanned_pages \|\| start_pfn < pgdat->node_start_pfn)
				672	pgdat->node_start_pfn = start_pfn;
				673
				674	pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	675
David Hildenbrand	3fccb74	2019-09-23 15:35:37 -0700	[diff] [blame]	676	}
Dan Williams	1f90a34	2021-02-25 17:17:05 -0800	[diff] [blame]	677
				678	static void section_taint_zone_device(unsigned long pfn)
				679	{
				680	struct mem_section *ms = __pfn_to_section(pfn);
				681
				682	ms->section_mem_map \|= SECTION_TAINT_ZONE_DEVICE;
				683	}
				684
David Hildenbrand	3fccb74	2019-09-23 15:35:37 -0700	[diff] [blame]	685	/*
				686	* Associate the pfn range with the given zone, initializing the memmaps
				687	* and resizing the pgdat/zone data to span the added pages. After this
				688	* call, all affected pages are PG_reserved.
David Hildenbrand	d882c00	2020-10-15 20:08:19 -0700	[diff] [blame]	689	*
				690	* All aligned pageblocks are initialized to the specified migratetype
				691	* (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
				692	* zone stats (e.g., nr_isolate_pageblock) are touched.
David Hildenbrand	3fccb74	2019-09-23 15:35:37 -0700	[diff] [blame]	693	*/
Christoph Hellwig	a99583e	2017-12-29 08:53:57 +0100	[diff] [blame]	694	void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
David Hildenbrand	d882c00	2020-10-15 20:08:19 -0700	[diff] [blame]	695	unsigned long nr_pages,
				696	struct vmem_altmap *altmap, int migratetype)
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	697	{
				698	struct pglist_data *pgdat = zone->zone_pgdat;
				699	int nid = pgdat->node_id;
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	700
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	701	clear_zone_contiguous(zone);
				702
Wei Yang	fa004ab	2018-12-28 00:37:10 -0800	[diff] [blame]	703	if (zone_is_empty(zone))
				704	init_currently_empty_zone(zone, start_pfn, nr_pages);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	705	resize_zone_range(zone, start_pfn, nr_pages);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	706	resize_pgdat_range(pgdat, start_pfn, nr_pages);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	707
				708	/*
Dan Williams	1f90a34	2021-02-25 17:17:05 -0800	[diff] [blame]	709	* Subsection population requires care in pfn_to_online_page().
				710	* Set the taint to enable the slow path detection of
				711	* ZONE_DEVICE pages in an otherwise ZONE_{NORMAL,MOVABLE}
				712	* section.
				713	*/
				714	if (zone_is_zone_device(zone)) {
				715	if (!IS_ALIGNED(start_pfn, PAGES_PER_SECTION))
				716	section_taint_zone_device(start_pfn);
				717	if (!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))
				718	section_taint_zone_device(start_pfn + nr_pages);
				719	}
				720
				721	/*
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	722	* TODO now we have a visible range of pages which are not associated
				723	* with their zone properly. Not nice but set_pfnblock_flags_mask
				724	* expects the zone spans the pfn range. All the pages in the range
				725	* are reserved so nobody should be touching them so we should be safe
				726	*/
Baoquan He	ab28cb6	2021-02-24 12:06:14 -0800	[diff] [blame]	727	memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
David Hildenbrand	d882c00	2020-10-15 20:08:19 -0700	[diff] [blame]	728	MEMINIT_HOTPLUG, altmap, migratetype);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	729
				730	set_zone_contiguous(zone);
				731	}
				732
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	733	struct auto_movable_stats {
				734	unsigned long kernel_early_pages;
				735	unsigned long movable_pages;
				736	};
				737
				738	static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
				739	struct zone *zone)
				740	{
				741	if (zone_idx(zone) == ZONE_MOVABLE) {
				742	stats->movable_pages += zone->present_pages;
				743	} else {
				744	stats->kernel_early_pages += zone->present_early_pages;
				745	#ifdef CONFIG_CMA
				746	/*
				747	* CMA pages (never on hotplugged memory) behave like
				748	* ZONE_MOVABLE.
				749	*/
				750	stats->movable_pages += zone->cma_pages;
				751	stats->kernel_early_pages -= zone->cma_pages;
				752	#endif /* CONFIG_CMA */
				753	}
				754	}
				755
				756	static bool auto_movable_can_online_movable(int nid, unsigned long nr_pages)
				757	{
				758	struct auto_movable_stats stats = {};
				759	unsigned long kernel_early_pages, movable_pages;
				760	pg_data_t *pgdat = NODE_DATA(nid);
				761	struct zone *zone;
				762	int i;
				763
				764	/* Walk all relevant zones and collect MOVABLE vs. KERNEL stats. */
				765	if (nid == NUMA_NO_NODE) {
				766	/* TODO: cache values */
				767	for_each_populated_zone(zone)
				768	auto_movable_stats_account_zone(&stats, zone);
				769	} else {
				770	for (i = 0; i < MAX_NR_ZONES; i++) {
				771	zone = pgdat->node_zones + i;
				772	if (populated_zone(zone))
				773	auto_movable_stats_account_zone(&stats, zone);
				774	}
				775	}
				776
				777	kernel_early_pages = stats.kernel_early_pages;
				778	movable_pages = stats.movable_pages;
				779
				780	/*
				781	* Test if we could online the given number of pages to ZONE_MOVABLE
				782	* and still stay in the configured ratio.
				783	*/
				784	movable_pages += nr_pages;
				785	return movable_pages <= (auto_movable_ratio * kernel_early_pages) / 100;
				786	}
				787
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	788	/*
Michal Hocko	c246a21	2017-07-06 15:38:18 -0700	[diff] [blame]	789	* Returns a default kernel memory zone for the given pfn range.
				790	* If no kernel zone covers this pfn range it will automatically go
				791	* to the ZONE_NORMAL.
				792	*/
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	793	static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn,
Michal Hocko	c246a21	2017-07-06 15:38:18 -0700	[diff] [blame]	794	unsigned long nr_pages)
				795	{
				796	struct pglist_data *pgdat = NODE_DATA(nid);
				797	int zid;
				798
				799	for (zid = 0; zid <= ZONE_NORMAL; zid++) {
				800	struct zone *zone = &pgdat->node_zones[zid];
				801
				802	if (zone_intersects(zone, start_pfn, nr_pages))
				803	return zone;
				804	}
				805
				806	return &pgdat->node_zones[ZONE_NORMAL];
				807	}
				808
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	809	/*
				810	* Determine to which zone to online memory dynamically based on user
				811	* configuration and system stats. We care about the following ratio:
				812	*
				813	* MOVABLE : KERNEL
				814	*
				815	* Whereby MOVABLE is memory in ZONE_MOVABLE and KERNEL is memory in
				816	* one of the kernel zones. CMA pages inside one of the kernel zones really
				817	* behaves like ZONE_MOVABLE, so we treat them accordingly.
				818	*
				819	* We don't allow for hotplugged memory in a KERNEL zone to increase the
				820	* amount of MOVABLE memory we can have, so we end up with:
				821	*
				822	* MOVABLE : KERNEL_EARLY
				823	*
				824	* Whereby KERNEL_EARLY is memory in one of the kernel zones, available sinze
				825	* boot. We base our calculation on KERNEL_EARLY internally, because:
				826	*
				827	* a) Hotplugged memory in one of the kernel zones can sometimes still get
				828	* hotunplugged, especially when hot(un)plugging individual memory blocks.
				829	* There is no coordination across memory devices, therefore "automatic"
				830	* hotunplugging, as implemented in hypervisors, could result in zone
				831	* imbalances.
				832	* b) Early/boot memory in one of the kernel zones can usually not get
				833	* hotunplugged again (e.g., no firmware interface to unplug, fragmented
				834	* with unmovable allocations). While there are corner cases where it might
				835	* still work, it is barely relevant in practice.
				836	*
				837	* We rely on "present pages" instead of "managed pages", as the latter is
				838	* highly unreliable and dynamic in virtualized environments, and does not
				839	* consider boot time allocations. For example, memory ballooning adjusts the
				840	* managed pages when inflating/deflating the balloon, and balloon compaction
				841	* can even migrate inflated pages between zones.
				842	*
				843	* Using "present pages" is better but some things to keep in mind are:
				844	*
				845	* a) Some memblock allocations, such as for the crashkernel area, are
				846	* effectively unused by the kernel, yet they account to "present pages".
				847	* Fortunately, these allocations are comparatively small in relevant setups
				848	* (e.g., fraction of system memory).
				849	* b) Some hotplugged memory blocks in virtualized environments, esecially
				850	* hotplugged by virtio-mem, look like they are completely present, however,
				851	* only parts of the memory block are actually currently usable.
				852	* "present pages" is an upper limit that can get reached at runtime. As
				853	* we base our calculations on KERNEL_EARLY, this is not an issue.
				854	*/
David Hildenbrand	445fcf7	2021-09-07 19:55:45 -0700	[diff] [blame^]	855	static struct zone *auto_movable_zone_for_pfn(int nid,
				856	struct memory_group *group,
				857	unsigned long pfn,
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	858	unsigned long nr_pages)
				859	{
David Hildenbrand	445fcf7	2021-09-07 19:55:45 -0700	[diff] [blame^]	860	unsigned long online_pages = 0, max_pages, end_pfn;
				861	struct page *page;
				862
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	863	if (!auto_movable_ratio)
				864	goto kernel_zone;
				865
David Hildenbrand	445fcf7	2021-09-07 19:55:45 -0700	[diff] [blame^]	866	if (group && !group->is_dynamic) {
				867	max_pages = group->s.max_pages;
				868	online_pages = group->present_movable_pages;
				869
				870	/* If anything is !MOVABLE online the rest !MOVABLE. */
				871	if (group->present_kernel_pages)
				872	goto kernel_zone;
				873	} else if (!group \|\| group->d.unit_pages == nr_pages) {
				874	max_pages = nr_pages;
				875	} else {
				876	max_pages = group->d.unit_pages;
				877	/*
				878	* Take a look at all online sections in the current unit.
				879	* We can safely assume that all pages within a section belong
				880	* to the same zone, because dynamic memory groups only deal
				881	* with hotplugged memory.
				882	*/
				883	pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
				884	end_pfn = pfn + group->d.unit_pages;
				885	for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
				886	page = pfn_to_online_page(pfn);
				887	if (!page)
				888	continue;
				889	/* If anything is !MOVABLE online the rest !MOVABLE. */
				890	if (page_zonenum(page) != ZONE_MOVABLE)
				891	goto kernel_zone;
				892	online_pages += PAGES_PER_SECTION;
				893	}
				894	}
				895
				896	/*
				897	* Online MOVABLE if we could currently online all remaining parts
				898	* MOVABLE. We expect to (add+) online them immediately next, so if
				899	* nobody interferes, all will be MOVABLE if possible.
				900	*/
				901	nr_pages = max_pages - online_pages;
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	902	if (!auto_movable_can_online_movable(NUMA_NO_NODE, nr_pages))
				903	goto kernel_zone;
				904
				905	#ifdef CONFIG_NUMA
				906	if (auto_movable_numa_aware &&
				907	!auto_movable_can_online_movable(nid, nr_pages))
				908	goto kernel_zone;
				909	#endif /* CONFIG_NUMA */
				910
				911	return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
				912	kernel_zone:
				913	return default_kernel_zone_for_pfn(nid, pfn, nr_pages);
				914	}
				915
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	916	static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
				917	unsigned long nr_pages)
Michal Hocko	e5e6893	2017-09-06 16:19:37 -0700	[diff] [blame]	918	{
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	919	struct zone *kernel_zone = default_kernel_zone_for_pfn(nid, start_pfn,
				920	nr_pages);
				921	struct zone *movable_zone = &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
				922	bool in_kernel = zone_intersects(kernel_zone, start_pfn, nr_pages);
				923	bool in_movable = zone_intersects(movable_zone, start_pfn, nr_pages);
Michal Hocko	e5e6893	2017-09-06 16:19:37 -0700	[diff] [blame]	924
				925	/*
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	926	* We inherit the existing zone in a simple case where zones do not
				927	* overlap in the given range
Michal Hocko	e5e6893	2017-09-06 16:19:37 -0700	[diff] [blame]	928	*/
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	929	if (in_kernel ^ in_movable)
				930	return (in_kernel) ? kernel_zone : movable_zone;
Michal Hocko	e5e6893	2017-09-06 16:19:37 -0700	[diff] [blame]	931
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	932	/*
				933	* If the range doesn't belong to any zone or two zones overlap in the
				934	* given range then we use movable zone only if movable_node is
				935	* enabled because we always online to a kernel zone by default.
				936	*/
				937	return movable_node_enabled ? movable_zone : kernel_zone;
Michal Hocko	9f123ab	2017-07-10 15:48:37 -0700	[diff] [blame]	938	}
				939
David Hildenbrand	7cf209b	2021-09-07 19:54:59 -0700	[diff] [blame]	940	struct zone *zone_for_pfn_range(int online_type, int nid,
David Hildenbrand	445fcf7	2021-09-07 19:55:45 -0700	[diff] [blame^]	941	struct memory_group *group, unsigned long start_pfn,
				942	unsigned long nr_pages)
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	943	{
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	944	if (online_type == MMOP_ONLINE_KERNEL)
				945	return default_kernel_zone_for_pfn(nid, start_pfn, nr_pages);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	946
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	947	if (online_type == MMOP_ONLINE_MOVABLE)
				948	return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
Reza Arbab	df429ac	2016-07-26 15:22:23 -0700	[diff] [blame]	949
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	950	if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
David Hildenbrand	445fcf7	2021-09-07 19:55:45 -0700	[diff] [blame^]	951	return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);
David Hildenbrand	e83a437	2021-09-07 19:55:23 -0700	[diff] [blame]	952
Michal Hocko	c6f03e2	2017-09-06 16:19:40 -0700	[diff] [blame]	953	return default_zone_for_pfn(nid, start_pfn, nr_pages);
Michal Hocko	e5e6893	2017-09-06 16:19:37 -0700	[diff] [blame]	954	}
				955
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	956	/*
				957	* This function should only be called by memory_block_{online,offline},
				958	* and {online,offline}_pages.
				959	*/
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	960	void adjust_present_page_count(struct page page, struct memory_group group,
				961	long nr_pages)
David Hildenbrand	f990114	2021-05-04 18:39:39 -0700	[diff] [blame]	962	{
David Hildenbrand	4b09700	2021-09-07 19:55:19 -0700	[diff] [blame]	963	struct zone *zone = page_zone(page);
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	964	const bool movable = zone_idx(zone) == ZONE_MOVABLE;
David Hildenbrand	4b09700	2021-09-07 19:55:19 -0700	[diff] [blame]	965
				966	/*
				967	* We only support onlining/offlining/adding/removing of complete
				968	* memory blocks; therefore, either all is either early or hotplugged.
				969	*/
				970	if (early_section(__pfn_to_section(page_to_pfn(page))))
				971	zone->present_early_pages += nr_pages;
David Hildenbrand	f990114	2021-05-04 18:39:39 -0700	[diff] [blame]	972	zone->present_pages += nr_pages;
David Hildenbrand	f990114	2021-05-04 18:39:39 -0700	[diff] [blame]	973	zone->zone_pgdat->node_present_pages += nr_pages;
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	974
				975	if (group && movable)
				976	group->present_movable_pages += nr_pages;
				977	else if (group && !movable)
				978	group->present_kernel_pages += nr_pages;
David Hildenbrand	f990114	2021-05-04 18:39:39 -0700	[diff] [blame]	979	}
				980
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	981	int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
				982	struct zone *zone)
				983	{
				984	unsigned long end_pfn = pfn + nr_pages;
				985	int ret;
				986
				987	ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
				988	if (ret)
				989	return ret;
				990
				991	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
				992
				993	/*
				994	* It might be that the vmemmap_pages fully span sections. If that is
				995	* the case, mark those sections online here as otherwise they will be
				996	* left offline.
				997	*/
				998	if (nr_pages >= PAGES_PER_SECTION)
				999	online_mem_sections(pfn, ALIGN_DOWN(end_pfn, PAGES_PER_SECTION));
				1000
				1001	return ret;
				1002	}
				1003
				1004	void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
				1005	{
				1006	unsigned long end_pfn = pfn + nr_pages;
				1007
				1008	/*
				1009	* It might be that the vmemmap_pages fully span sections. If that is
				1010	* the case, mark those sections offline here as otherwise they will be
				1011	* left online.
				1012	*/
				1013	if (nr_pages >= PAGES_PER_SECTION)
				1014	offline_mem_sections(pfn, ALIGN_DOWN(end_pfn, PAGES_PER_SECTION));
				1015
				1016	/*
				1017	* The pages associated with this vmemmap have been offlined, so
				1018	* we can reset its state here.
				1019	*/
				1020	remove_pfn_range_from_zone(page_zone(pfn_to_page(pfn)), pfn, nr_pages);
				1021	kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
				1022	}
				1023
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	1024	int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
				1025	struct zone zone, struct memory_group group)
KAMEZAWA Hiroyuki	75884fb	2007-10-16 01:26:10 -0700	[diff] [blame]	1026	{
Cody P Schafer	aa47228	2013-07-03 15:02:10 -0700	[diff] [blame]	1027	unsigned long flags;
Yasunori Goto	6811378	2006-06-23 02:03:11 -0700	[diff] [blame]	1028	int need_zonelists_rebuild = 0;
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	1029	const int nid = zone_to_nid(zone);
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1030	int ret;
				1031	struct memory_notify arg;
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	1032
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1033	/*
				1034	* {on,off}lining is constrained to full memory sections (or more
Zhen Lei	041711c	2021-06-30 18:53:17 -0700	[diff] [blame]	1035	* precisely to memory blocks from the user space POV).
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1036	* memmap_on_memory is an exception because it reserves initial part
				1037	* of the physical memory space for vmemmaps. That space is pageblock
				1038	* aligned.
				1039	*/
David Hildenbrand	4986fac	2020-10-15 20:07:50 -0700	[diff] [blame]	1040	if (WARN_ON_ONCE(!nr_pages \|\|
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1041	!IS_ALIGNED(pfn, pageblock_nr_pages) \|\|
				1042	!IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
David Hildenbrand	4986fac	2020-10-15 20:07:50 -0700	[diff] [blame]	1043	return -EINVAL;
				1044
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1045	mem_hotplug_begin();
				1046
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	1047	/* associate pfn range with the zone */
David Hildenbrand	b30c592	2020-10-15 20:08:23 -0700	[diff] [blame]	1048	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
Michal Hocko	f1dd2cd	2017-07-06 15:38:11 -0700	[diff] [blame]	1049
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1050	arg.start_pfn = pfn;
				1051	arg.nr_pages = nr_pages;
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1052	node_states_check_changes_online(nr_pages, zone, &arg);
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1053
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1054	ret = memory_notify(MEM_GOING_ONLINE, &arg);
				1055	ret = notifier_to_errno(ret);
Chen Yucong	e33e33b	2016-03-17 14:19:35 -0700	[diff] [blame]	1056	if (ret)
				1057	goto failed_addition;
				1058
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	1059	/*
David Hildenbrand	b30c592	2020-10-15 20:08:23 -0700	[diff] [blame]	1060	* Fixup the number of isolated pageblocks before marking the sections
				1061	* onlining, such that undo_isolate_page_range() works correctly.
				1062	*/
				1063	spin_lock_irqsave(&zone->lock, flags);
				1064	zone->nr_isolate_pageblock += nr_pages / pageblock_nr_pages;
				1065	spin_unlock_irqrestore(&zone->lock, flags);
				1066
				1067	/*
Yasunori Goto	6811378	2006-06-23 02:03:11 -0700	[diff] [blame]	1068	* If this zone is not populated, then it is not in zonelist.
				1069	* This means the page allocator ignores this zone.
				1070	* So, zonelist must be updated after online.
				1071	*/
Wen Congyang	6dcd73d	2012-12-11 16:01:01 -0800	[diff] [blame]	1072	if (!populated_zone(zone)) {
Yasunori Goto	6811378	2006-06-23 02:03:11 -0700	[diff] [blame]	1073	need_zonelists_rebuild = 1;
Michal Hocko	72675e1	2017-09-06 16:20:24 -0700	[diff] [blame]	1074	setup_zone_pageset(zone);
Wen Congyang	6dcd73d	2012-12-11 16:01:01 -0800	[diff] [blame]	1075	}
Yasunori Goto	6811378	2006-06-23 02:03:11 -0700	[diff] [blame]	1076
David Hildenbrand	aac6532	2020-10-15 20:08:11 -0700	[diff] [blame]	1077	online_pages_range(pfn, nr_pages);
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	1078	adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
Cody P Schafer	aa47228	2013-07-03 15:02:10 -0700	[diff] [blame]	1079
David Hildenbrand	b30c592	2020-10-15 20:08:23 -0700	[diff] [blame]	1080	node_states_set_node(nid, &arg);
				1081	if (need_zonelists_rebuild)
				1082	build_all_zonelists(NULL);
David Hildenbrand	b30c592	2020-10-15 20:08:23 -0700	[diff] [blame]	1083
				1084	/* Basic onlining is complete, allow allocation of onlined pages. */
				1085	undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
				1086
David Hildenbrand	93146d9	2020-08-06 23:25:35 -0700	[diff] [blame]	1087	/*
David Hildenbrand	b86c5fc	2020-10-15 20:09:39 -0700	[diff] [blame]	1088	* Freshly onlined pages aren't shuffled (e.g., all pages are placed to
				1089	* the tail of the freelist when undoing isolation). Shuffle the whole
				1090	* zone to make sure the just onlined pages are properly distributed
				1091	* across the whole freelist - to create an initial shuffle.
David Hildenbrand	93146d9	2020-08-06 23:25:35 -0700	[diff] [blame]	1092	*/
Dan Williams	e900a91	2019-05-14 15:41:28 -0700	[diff] [blame]	1093	shuffle_zone(zone);
				1094
Mel Gorman	b92ca18	2021-06-28 19:42:12 -0700	[diff] [blame]	1095	/* reinitialise watermarks and update pcp limits */
KOSAKI Motohiro	1b79acc	2011-05-24 17:11:32 -0700	[diff] [blame]	1096	init_per_zone_wmark_min();
				1097
David Hildenbrand	ca9a46f	2019-09-23 15:36:08 -0700	[diff] [blame]	1098	kswapd_run(nid);
				1099	kcompactd_run(nid);
Dave Hansen	61b1399	2005-10-29 18:16:56 -0700	[diff] [blame]	1100
Chandra Seetharaman	2d1d43f	2006-09-29 02:01:25 -0700	[diff] [blame]	1101	writeback_set_ratelimit();
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1102
David Hildenbrand	ca9a46f	2019-09-23 15:36:08 -0700	[diff] [blame]	1103	memory_notify(MEM_ONLINE, &arg);
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1104	mem_hotplug_done();
David Rientjes	30467e0	2015-04-14 15:45:11 -0700	[diff] [blame]	1105	return 0;
Chen Yucong	e33e33b	2016-03-17 14:19:35 -0700	[diff] [blame]	1106
				1107	failed_addition:
				1108	pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
				1109	(unsigned long long) pfn << PAGE_SHIFT,
				1110	(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
				1111	memory_notify(MEM_CANCEL_ONLINE, &arg);
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	1112	remove_pfn_range_from_zone(zone, pfn, nr_pages);
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1113	mem_hotplug_done();
Chen Yucong	e33e33b	2016-03-17 14:19:35 -0700	[diff] [blame]	1114	return ret;
Dave Hansen	3947be1	2005-10-29 18:16:54 -0700	[diff] [blame]	1115	}
Keith Mannthey	5394702	2006-09-30 23:27:08 -0700	[diff] [blame]	1116	#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1117
Tang Chen	0bd8542	2014-11-13 15:19:41 -0800	[diff] [blame]	1118	static void reset_node_present_pages(pg_data_t *pgdat)
				1119	{
				1120	struct zone *z;
				1121
				1122	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
				1123	z->present_pages = 0;
				1124
				1125	pgdat->node_present_pages = 0;
				1126	}
				1127
Hidetoshi Seto	e131933	2009-11-17 14:06:18 -0800	[diff] [blame]	1128	/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1129	static pg_data_t __ref *hotadd_new_pgdat(int nid)
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1130	{
				1131	struct pglist_data *pgdat;
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1132
Tang Chen	a1e565a	2013-02-22 16:33:18 -0800	[diff] [blame]	1133	pgdat = NODE_DATA(nid);
				1134	if (!pgdat) {
				1135	pgdat = arch_alloc_nodedata(nid);
				1136	if (!pgdat)
				1137	return NULL;
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1138
Wei Yang	33fce01	2019-09-23 15:35:52 -0700	[diff] [blame]	1139	pgdat->per_cpu_nodestats =
				1140	alloc_percpu(struct per_cpu_nodestat);
Tang Chen	a1e565a	2013-02-22 16:33:18 -0800	[diff] [blame]	1141	arch_refresh_nodedata(nid, pgdat);
Gu Zheng	b0dc3a3	2015-03-25 15:55:20 -0700	[diff] [blame]	1142	} else {
Wei Yang	33fce01	2019-09-23 15:35:52 -0700	[diff] [blame]	1143	int cpu;
Mel Gorman	e716f2e	2017-05-03 14:53:45 -0700	[diff] [blame]	1144	/*
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	1145	* Reset the nr_zones, order and highest_zoneidx before reuse.
				1146	* Note that kswapd will init kswapd_highest_zoneidx properly
Mel Gorman	e716f2e	2017-05-03 14:53:45 -0700	[diff] [blame]	1147	* when it starts in the near future.
				1148	*/
Gu Zheng	b0dc3a3	2015-03-25 15:55:20 -0700	[diff] [blame]	1149	pgdat->nr_zones = 0;
Mel Gorman	38087d9	2016-07-28 15:45:49 -0700	[diff] [blame]	1150	pgdat->kswapd_order = 0;
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	1151	pgdat->kswapd_highest_zoneidx = 0;
Wei Yang	33fce01	2019-09-23 15:35:52 -0700	[diff] [blame]	1152	for_each_online_cpu(cpu) {
				1153	struct per_cpu_nodestat *p;
				1154
				1155	p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
				1156	memset(p, 0, sizeof(*p));
				1157	}
Tang Chen	a1e565a	2013-02-22 16:33:18 -0800	[diff] [blame]	1158	}
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1159
				1160	/* we can use NODE_DATA(nid) from here */
Oscar Salvador	03e85f9	2018-08-21 21:53:43 -0700	[diff] [blame]	1161	pgdat->node_id = nid;
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1162	pgdat->node_start_pfn = 0;
Oscar Salvador	03e85f9	2018-08-21 21:53:43 -0700	[diff] [blame]	1163
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1164	/* init node's zones as empty zones, we don't have any present pages.*/
Oscar Salvador	03e85f9	2018-08-21 21:53:43 -0700	[diff] [blame]	1165	free_area_init_core_hotplug(nid);
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1166
KAMEZAWA Hiroyuki	959ecc4	2011-06-15 15:08:38 -0700	[diff] [blame]	1167	/*
				1168	* The node we allocated has no zone fallback lists. For avoiding
				1169	* to access not-initialized zonelist, build here.
				1170	*/
Michal Hocko	72675e1	2017-09-06 16:20:24 -0700	[diff] [blame]	1171	build_all_zonelists(pgdat);
KAMEZAWA Hiroyuki	959ecc4	2011-06-15 15:08:38 -0700	[diff] [blame]	1172
Tang Chen	f784a3f	2014-11-13 15:19:39 -0800	[diff] [blame]	1173	/*
Tang Chen	0bd8542	2014-11-13 15:19:41 -0800	[diff] [blame]	1174	* When memory is hot-added, all the memory is in offline state. So
				1175	* clear all zones' present_pages because they will be updated in
				1176	* online_pages() and offline_pages().
				1177	*/
Oscar Salvador	03e85f9	2018-08-21 21:53:43 -0700	[diff] [blame]	1178	reset_node_managed_pages(pgdat);
Tang Chen	0bd8542	2014-11-13 15:19:41 -0800	[diff] [blame]	1179	reset_node_present_pages(pgdat);
				1180
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1181	return pgdat;
				1182	}
				1183
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1184	static void rollback_node_hotadd(int nid)
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1185	{
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1186	pg_data_t *pgdat = NODE_DATA(nid);
				1187
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1188	arch_refresh_nodedata(nid, NULL);
Reza Arbab	5830169	2016-08-11 15:33:12 -0700	[diff] [blame]	1189	free_percpu(pgdat->per_cpu_nodestats);
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1190	arch_free_nodedata(pgdat);
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1191	}
				1192
KAMEZAWA Hiroyuki	0a54703	2006-06-27 02:53:35 -0700	[diff] [blame]	1193
Mel Gorman	ba2d266	2021-06-30 18:53:35 -0700	[diff] [blame]	1194	/*
				1195	* __try_online_node - online a node if offlined
Mike Rapoport	e8b098f	2018-04-05 16:24:57 -0700	[diff] [blame]	1196	* @nid: the node ID
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1197	* @set_node_online: Whether we want to online the node
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1198	* called by cpu_up() to online a node without onlined memory.
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1199	*
				1200	* Returns:
				1201	* 1 -> a new node has been allocated
				1202	* 0 -> the node is already online
				1203	* -ENOMEM -> the node could not be allocated
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1204	*/
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1205	static int __try_online_node(int nid, bool set_node_online)
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1206	{
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1207	pg_data_t *pgdat;
				1208	int ret = 1;
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1209
Toshi Kani	01b0f19	2013-11-12 15:07:25 -0800	[diff] [blame]	1210	if (node_online(nid))
				1211	return 0;
				1212
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1213	pgdat = hotadd_new_pgdat(nid);
David Rientjes	7553e8f	2011-06-22 18:13:01 -0700	[diff] [blame]	1214	if (!pgdat) {
Toshi Kani	01b0f19	2013-11-12 15:07:25 -0800	[diff] [blame]	1215	pr_err("Cannot online node %d due to NULL pgdat\n", nid);
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1216	ret = -ENOMEM;
				1217	goto out;
				1218	}
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1219
				1220	if (set_node_online) {
				1221	node_set_online(nid);
				1222	ret = register_one_node(nid);
				1223	BUG_ON(ret);
				1224	}
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1225	out:
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1226	return ret;
				1227	}
				1228
				1229	/*
				1230	* Users of this function always want to online/register the node
				1231	*/
				1232	int try_online_node(int nid)
				1233	{
				1234	int ret;
				1235
				1236	mem_hotplug_begin();
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1237	ret = __try_online_node(nid, true);
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	1238	mem_hotplug_done();
minskey guo	cf23422	2010-05-24 14:32:41 -0700	[diff] [blame]	1239	return ret;
				1240	}
				1241
Toshi Kani	27356f5	2013-09-11 14:21:49 -0700	[diff] [blame]	1242	static int check_hotplug_memory_range(u64 start, u64 size)
				1243	{
Pavel Tatashin	ba32558	2018-04-05 16:22:39 -0700	[diff] [blame]	1244	/* memory range must be block size aligned */
David Hildenbrand	cec3ebd	2019-07-18 15:56:25 -0700	[diff] [blame]	1245	if (!size \|\| !IS_ALIGNED(start, memory_block_size_bytes()) \|\|
				1246	!IS_ALIGNED(size, memory_block_size_bytes())) {
Pavel Tatashin	ba32558	2018-04-05 16:22:39 -0700	[diff] [blame]	1247	pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx",
David Hildenbrand	cec3ebd	2019-07-18 15:56:25 -0700	[diff] [blame]	1248	memory_block_size_bytes(), start, size);
Toshi Kani	27356f5	2013-09-11 14:21:49 -0700	[diff] [blame]	1249	return -EINVAL;
				1250	}
				1251
				1252	return 0;
				1253	}
				1254
Vitaly Kuznetsov	31bc385	2016-03-15 14:56:48 -0700	[diff] [blame]	1255	static int online_memory_block(struct memory_block mem, void arg)
				1256	{
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	1257	mem->online_type = mhp_default_online_type;
Nathan Fontenot	dc18d70	2017-02-24 15:00:02 -0800	[diff] [blame]	1258	return device_online(&mem->dev);
Vitaly Kuznetsov	31bc385	2016-03-15 14:56:48 -0700	[diff] [blame]	1259	}
				1260
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	1261	bool mhp_supports_memmap_on_memory(unsigned long size)
				1262	{
				1263	unsigned long nr_vmemmap_pages = size / PAGE_SIZE;
				1264	unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
				1265	unsigned long remaining_size = size - vmemmap_size;
				1266
				1267	/*
				1268	* Besides having arch support and the feature enabled at runtime, we
				1269	* need a few more assumptions to hold true:
				1270	*
				1271	* a) We span a single memory block: memory onlining/offlinin;g happens
				1272	* in memory block granularity. We don't want the vmemmap of online
				1273	* memory blocks to reside on offline memory blocks. In the future,
				1274	* we might want to support variable-sized memory blocks to make the
				1275	* feature more versatile.
				1276	*
				1277	* b) The vmemmap pages span complete PMDs: We don't want vmemmap code
				1278	* to populate memory from the altmap for unrelated parts (i.e.,
				1279	* other memory blocks)
				1280	*
				1281	* c) The vmemmap pages (and thereby the pages that will be exposed to
				1282	* the buddy) have to cover full pageblocks: memory onlining/offlining
				1283	* code requires applicable ranges to be page-aligned, for example, to
				1284	* set the migratetypes properly.
				1285	*
				1286	* TODO: Although we have a check here to make sure that vmemmap pages
				1287	* fully populate a PMD, it is not the right place to check for
				1288	* this. A much better solution involves improving vmemmap code
				1289	* to fallback to base pages when trying to populate vmemmap using
				1290	* altmap as an alternative source of memory, and we do not exactly
				1291	* populate a single PMD.
				1292	*/
				1293	return memmap_on_memory &&
Muchun Song	2d7a217	2021-06-30 18:48:25 -0700	[diff] [blame]	1294	!hugetlb_free_vmemmap_enabled &&
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	1295	IS_ENABLED(CONFIG_MHP_MEMMAP_ON_MEMORY) &&
				1296	size == memory_block_size_bytes() &&
				1297	IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
				1298	IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT));
				1299	}
				1300
David Hildenbrand	8df1d0e	2018-10-30 15:10:24 -0700	[diff] [blame]	1301	/*
				1302	* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
				1303	* and online/offline operations (triggered e.g. by sysfs).
				1304	*
				1305	* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
				1306	*/
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1307	int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1308	{
Catalin Marinas	d15dfd3	2021-03-09 12:26:01 +0000	[diff] [blame]	1309	struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	1310	struct vmem_altmap mhp_altmap = {};
David Hildenbrand	028fc57	2021-09-07 19:55:26 -0700	[diff] [blame]	1311	struct memory_group *group = NULL;
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1312	u64 start, size;
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1313	bool new_node = false;
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1314	int ret;
				1315
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1316	start = res->start;
				1317	size = resource_size(res);
				1318
Toshi Kani	27356f5	2013-09-11 14:21:49 -0700	[diff] [blame]	1319	ret = check_hotplug_memory_range(start, size);
				1320	if (ret)
				1321	return ret;
				1322
David Hildenbrand	028fc57	2021-09-07 19:55:26 -0700	[diff] [blame]	1323	if (mhp_flags & MHP_NID_IS_MGID) {
				1324	group = memory_group_find_by_id(nid);
				1325	if (!group)
				1326	return -EINVAL;
				1327	nid = group->nid;
				1328	}
				1329
Vishal Verma	fa6d9ec	2020-06-04 16:48:25 -0700	[diff] [blame]	1330	if (!node_possible(nid)) {
				1331	WARN(1, "node %d was absent from the node_possible_map\n", nid);
				1332	return -EINVAL;
				1333	}
				1334
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	1335	mem_hotplug_begin();
Nathan Zimmer	ac13c46	2014-01-23 15:53:26 -0800	[diff] [blame]	1336
David Hildenbrand	52219ae	2020-06-04 16:48:38 -0700	[diff] [blame]	1337	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
				1338	memblock_add_node(start, size, nid);
Tang Chen	7f36e3e	2015-09-04 15:42:32 -0700	[diff] [blame]	1339
David Hildenbrand	c68ab18	2020-06-04 16:48:35 -0700	[diff] [blame]	1340	ret = __try_online_node(nid, false);
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1341	if (ret < 0)
				1342	goto error;
				1343	new_node = ret;
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1344
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	1345	/*
				1346	* Self hosted memmap array
				1347	*/
				1348	if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
				1349	if (!mhp_supports_memmap_on_memory(size)) {
				1350	ret = -EINVAL;
				1351	goto error;
				1352	}
				1353	mhp_altmap.free = PHYS_PFN(size);
				1354	mhp_altmap.base_pfn = PHYS_PFN(start);
				1355	params.altmap = &mhp_altmap;
				1356	}
				1357
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1358	/* call arch's memory hotadd */
Logan Gunthorpe	f5637d3	2020-04-10 14:33:21 -0700	[diff] [blame]	1359	ret = arch_add_memory(nid, start, size, &params);
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1360	if (ret < 0)
				1361	goto error;
				1362
David Hildenbrand	db051a0	2019-07-18 15:56:56 -0700	[diff] [blame]	1363	/* create memory block devices after memory was added */
David Hildenbrand	028fc57	2021-09-07 19:55:26 -0700	[diff] [blame]	1364	ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
				1365	group);
David Hildenbrand	db051a0	2019-07-18 15:56:56 -0700	[diff] [blame]	1366	if (ret) {
David Hildenbrand	65a2aa5	2021-09-07 19:55:04 -0700	[diff] [blame]	1367	arch_remove_memory(start, size, NULL);
David Hildenbrand	db051a0	2019-07-18 15:56:56 -0700	[diff] [blame]	1368	goto error;
				1369	}
				1370
Tang Chen	a1e565a	2013-02-22 16:33:18 -0800	[diff] [blame]	1371	if (new_node) {
Oscar Salvador	d5b6f6a3	2018-08-17 15:46:18 -0700	[diff] [blame]	1372	/* If sysfs file of new node can't be created, cpu on the node
Yasunori Goto	0fc4415	2006-06-27 02:53:38 -0700	[diff] [blame]	1373	* can't be hot-added. There is no rollback way now.
				1374	* So, check by BUG_ON() to catch it reluctantly..
Oscar Salvador	d5b6f6a3	2018-08-17 15:46:18 -0700	[diff] [blame]	1375	* We online node here. We can't roll back from here.
Yasunori Goto	0fc4415	2006-06-27 02:53:38 -0700	[diff] [blame]	1376	*/
Oscar Salvador	d5b6f6a3	2018-08-17 15:46:18 -0700	[diff] [blame]	1377	node_set_online(nid);
				1378	ret = __register_one_node(nid);
Yasunori Goto	0fc4415	2006-06-27 02:53:38 -0700	[diff] [blame]	1379	BUG_ON(ret);
				1380	}
				1381
Oscar Salvador	d5b6f6a3	2018-08-17 15:46:18 -0700	[diff] [blame]	1382	/* link memory sections under this node.*/
Laurent Dufour	90c7eae	2020-10-15 20:09:15 -0700	[diff] [blame]	1383	link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
				1384	MEMINIT_HOTPLUG);
Oscar Salvador	d5b6f6a3	2018-08-17 15:46:18 -0700	[diff] [blame]	1385
akpm@linux-foundation.org	d96ae53	2010-03-05 13:41:58 -0800	[diff] [blame]	1386	/* create new memmap entry */
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1387	if (!strcmp(res->name, "System RAM"))
				1388	firmware_map_add_hotplug(start, start + size, "System RAM");
akpm@linux-foundation.org	d96ae53	2010-03-05 13:41:58 -0800	[diff] [blame]	1389
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1390	/* device_online() will take the lock when calling online_pages() */
				1391	mem_hotplug_done();
				1392
David Hildenbrand	9ca6551	2020-10-15 20:08:49 -0700	[diff] [blame]	1393	/*
				1394	* In case we're allowed to merge the resource, flag it and trigger
				1395	* merging now that adding succeeded.
				1396	*/
David Hildenbrand	2601126	2021-02-25 17:17:17 -0800	[diff] [blame]	1397	if (mhp_flags & MHP_MERGE_RESOURCE)
David Hildenbrand	9ca6551	2020-10-15 20:08:49 -0700	[diff] [blame]	1398	merge_system_ram_resource(res);
				1399
Vitaly Kuznetsov	31bc385	2016-03-15 14:56:48 -0700	[diff] [blame]	1400	/* online pages if requested */
Anshuman Khandual	1adf8b4	2021-02-25 17:17:13 -0800	[diff] [blame]	1401	if (mhp_default_online_type != MMOP_OFFLINE)
David Hildenbrand	fbcf73c	2019-07-18 15:57:46 -0700	[diff] [blame]	1402	walk_memory_blocks(start, size, NULL, online_memory_block);
Vitaly Kuznetsov	31bc385	2016-03-15 14:56:48 -0700	[diff] [blame]	1403
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1404	return ret;
Yasunori Goto	9af3c2d	2006-06-27 02:53:34 -0700	[diff] [blame]	1405	error:
				1406	/* rollback pgdat allocation and others */
Oscar Salvador	b9ff036	2018-08-17 15:46:15 -0700	[diff] [blame]	1407	if (new_node)
				1408	rollback_node_hotadd(nid);
David Hildenbrand	52219ae	2020-06-04 16:48:38 -0700	[diff] [blame]	1409	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
				1410	memblock_remove(start, size);
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	1411	mem_hotplug_done();
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1412	return ret;
				1413	}
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1414
David Hildenbrand	8df1d0e	2018-10-30 15:10:24 -0700	[diff] [blame]	1415	/* requires device_hotplug_lock, see add_memory_resource() */
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1416	int __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1417	{
				1418	struct resource *res;
				1419	int ret;
				1420
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1421	res = register_memory_resource(start, size, "System RAM");
Vitaly Kuznetsov	6f754ba	2016-01-14 15:21:55 -0800	[diff] [blame]	1422	if (IS_ERR(res))
				1423	return PTR_ERR(res);
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1424
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1425	ret = add_memory_resource(nid, res, mhp_flags);
David Vrabel	62cedb9	2015-06-25 16:35:49 +0100	[diff] [blame]	1426	if (ret < 0)
				1427	release_memory_resource(res);
				1428	return ret;
				1429	}
David Hildenbrand	8df1d0e	2018-10-30 15:10:24 -0700	[diff] [blame]	1430
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1431	int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags)
David Hildenbrand	8df1d0e	2018-10-30 15:10:24 -0700	[diff] [blame]	1432	{
				1433	int rc;
				1434
				1435	lock_device_hotplug();
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1436	rc = __add_memory(nid, start, size, mhp_flags);
David Hildenbrand	8df1d0e	2018-10-30 15:10:24 -0700	[diff] [blame]	1437	unlock_device_hotplug();
				1438
				1439	return rc;
				1440	}
Yasunori Goto	bc02af9	2006-06-27 02:53:30 -0700	[diff] [blame]	1441	EXPORT_SYMBOL_GPL(add_memory);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1442
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1443	/*
				1444	* Add special, driver-managed memory to the system as system RAM. Such
				1445	* memory is not exposed via the raw firmware-provided memmap as system
				1446	* RAM, instead, it is detected and added by a driver - during cold boot,
				1447	* after a reboot, and after kexec.
				1448	*
				1449	* Reasons why this memory should not be used for the initial memmap of a
				1450	* kexec kernel or for placing kexec images:
				1451	* - The booting kernel is in charge of determining how this memory will be
				1452	* used (e.g., use persistent memory as system RAM)
				1453	* - Coordination with a hypervisor is required before this memory
				1454	* can be used (e.g., inaccessible parts).
				1455	*
				1456	* For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided
				1457	* memory map") are created. Also, the created memory resource is flagged
David Hildenbrand	7cf603d	2020-10-15 20:08:33 -0700	[diff] [blame]	1458	* with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1459	* this memory as well (esp., not place kexec images onto it).
				1460	*
				1461	* The resource_name (visible via /proc/iomem) has to have the format
				1462	* "System RAM ($DRIVER)".
				1463	*/
				1464	int add_memory_driver_managed(int nid, u64 start, u64 size,
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1465	const char *resource_name, mhp_t mhp_flags)
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1466	{
				1467	struct resource *res;
				1468	int rc;
				1469
				1470	if (!resource_name \|\|
				1471	strstr(resource_name, "System RAM (") != resource_name \|\|
				1472	resource_name[strlen(resource_name) - 1] != ')')
				1473	return -EINVAL;
				1474
				1475	lock_device_hotplug();
				1476
				1477	res = register_memory_resource(start, size, resource_name);
				1478	if (IS_ERR(res)) {
				1479	rc = PTR_ERR(res);
				1480	goto out_unlock;
				1481	}
				1482
David Hildenbrand	b611719	2020-10-15 20:08:44 -0700	[diff] [blame]	1483	rc = add_memory_resource(nid, res, mhp_flags);
David Hildenbrand	7b7b272	2020-06-04 16:48:41 -0700	[diff] [blame]	1484	if (rc < 0)
				1485	release_memory_resource(res);
				1486
				1487	out_unlock:
				1488	unlock_device_hotplug();
				1489	return rc;
				1490	}
				1491	EXPORT_SYMBOL_GPL(add_memory_driver_managed);
				1492
Anshuman Khandual	bca3fea	2021-02-25 17:17:33 -0800	[diff] [blame]	1493	/*
				1494	* Platforms should define arch_get_mappable_range() that provides
				1495	* maximum possible addressable physical memory range for which the
				1496	* linear mapping could be created. The platform returned address
				1497	* range must adhere to these following semantics.
				1498	*
				1499	* - range.start <= range.end
				1500	* - Range includes both end points [range.start..range.end]
				1501	*
				1502	* There is also a fallback definition provided here, allowing the
				1503	* entire possible physical address range in case any platform does
				1504	* not define arch_get_mappable_range().
				1505	*/
				1506	struct range __weak arch_get_mappable_range(void)
				1507	{
				1508	struct range mhp_range = {
				1509	.start = 0UL,
				1510	.end = -1ULL,
				1511	};
				1512	return mhp_range;
				1513	}
				1514
				1515	struct range mhp_get_pluggable_range(bool need_mapping)
				1516	{
				1517	const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
				1518	struct range mhp_range;
				1519
				1520	if (need_mapping) {
				1521	mhp_range = arch_get_mappable_range();
				1522	if (mhp_range.start > max_phys) {
				1523	mhp_range.start = 0;
				1524	mhp_range.end = 0;
				1525	}
				1526	mhp_range.end = min_t(u64, mhp_range.end, max_phys);
				1527	} else {
				1528	mhp_range.start = 0;
				1529	mhp_range.end = max_phys;
				1530	}
				1531	return mhp_range;
				1532	}
				1533	EXPORT_SYMBOL_GPL(mhp_get_pluggable_range);
				1534
				1535	bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
				1536	{
				1537	struct range mhp_range = mhp_get_pluggable_range(need_mapping);
				1538	u64 end = start + size;
				1539
				1540	if (start < end && start >= mhp_range.start && (end - 1) <= mhp_range.end)
				1541	return true;
				1542
				1543	pr_warn("Hotplug memory [%#llx-%#llx] exceeds maximum addressable range [%#llx-%#llx]\n",
				1544	start, end, mhp_range.start, mhp_range.end);
				1545	return false;
				1546	}
				1547
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1548	#ifdef CONFIG_MEMORY_HOTREMOVE
				1549	/*
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1550	* Confirm all pages in a range [start, end) belong to the same zone (skipping
				1551	* memory holes). When true, return the zone.
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1552	*/
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1553	struct zone *test_pages_in_a_zone(unsigned long start_pfn,
				1554	unsigned long end_pfn)
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1555	{
Andrew Banman	5f0f288	2015-12-29 14:54:25 -0800	[diff] [blame]	1556	unsigned long pfn, sec_end_pfn;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1557	struct zone *zone = NULL;
				1558	struct page *page;
Mike Rapoport	673d40c	2021-09-07 19:54:55 -0700	[diff] [blame]	1559
Toshi Kani	deb88a2	2017-02-03 13:13:20 -0800	[diff] [blame]	1560	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1561	pfn < end_pfn;
Toshi Kani	deb88a2	2017-02-03 13:13:20 -0800	[diff] [blame]	1562	pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
Andrew Banman	5f0f288	2015-12-29 14:54:25 -0800	[diff] [blame]	1563	/* Make sure the memory section is present first */
				1564	if (!present_section_nr(pfn_to_section_nr(pfn)))
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1565	continue;
Andrew Banman	5f0f288	2015-12-29 14:54:25 -0800	[diff] [blame]	1566	for (; pfn < sec_end_pfn && pfn < end_pfn;
				1567	pfn += MAX_ORDER_NR_PAGES) {
Mikhail Zaslonko	24feb47	2019-02-01 14:20:38 -0800	[diff] [blame]	1568	/* Check if we got outside of the zone */
Mike Rapoport	673d40c	2021-09-07 19:54:55 -0700	[diff] [blame]	1569	if (zone && !zone_spans_pfn(zone, pfn))
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1570	return NULL;
Mike Rapoport	673d40c	2021-09-07 19:54:55 -0700	[diff] [blame]	1571	page = pfn_to_page(pfn);
Andrew Banman	5f0f288	2015-12-29 14:54:25 -0800	[diff] [blame]	1572	if (zone && page_zone(page) != zone)
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1573	return NULL;
Andrew Banman	5f0f288	2015-12-29 14:54:25 -0800	[diff] [blame]	1574	zone = page_zone(page);
				1575	}
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1576	}
Toshi Kani	deb88a2	2017-02-03 13:13:20 -0800	[diff] [blame]	1577
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1578	return zone;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1579	}
				1580
				1581	/*
Yisheng Xie	0efadf4	2017-02-24 14:57:39 -0800	[diff] [blame]	1582	* Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1583	* non-lru movable pages and hugepages). Will skip over most unmovable
				1584	* pages (esp., pages that can be skipped when offlining), but bail out on
				1585	* definitely unmovable pages.
				1586	*
				1587	* Returns:
				1588	* 0 in case a movable page is found and movable_pfn was updated.
				1589	* -ENOENT in case no movable page was found.
				1590	* -EBUSY in case a definitely unmovable page was found.
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1591	*/
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1592	static int scan_movable_pages(unsigned long start, unsigned long end,
				1593	unsigned long *movable_pfn)
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1594	{
				1595	unsigned long pfn;
Oscar Salvador	eeb0efd	2019-02-01 14:20:47 -0800	[diff] [blame]	1596
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1597	for (pfn = start; pfn < end; pfn++) {
Oscar Salvador	eeb0efd	2019-02-01 14:20:47 -0800	[diff] [blame]	1598	struct page page, head;
				1599	unsigned long skip;
				1600
				1601	if (!pfn_valid(pfn))
				1602	continue;
				1603	page = pfn_to_page(pfn);
				1604	if (PageLRU(page))
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1605	goto found;
Oscar Salvador	eeb0efd	2019-02-01 14:20:47 -0800	[diff] [blame]	1606	if (__PageMovable(page))
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1607	goto found;
				1608
				1609	/*
				1610	* PageOffline() pages that are not marked __PageMovable() and
				1611	* have a reference count > 0 (after MEM_GOING_OFFLINE) are
				1612	* definitely unmovable. If their reference count would be 0,
				1613	* they could at least be skipped when offlining memory.
				1614	*/
				1615	if (PageOffline(page) && page_count(page))
				1616	return -EBUSY;
Oscar Salvador	eeb0efd	2019-02-01 14:20:47 -0800	[diff] [blame]	1617
				1618	if (!PageHuge(page))
				1619	continue;
				1620	head = compound_head(page);
Mike Kravetz	8f251a3	2021-02-24 12:08:56 -0800	[diff] [blame]	1621	/*
				1622	* This test is racy as we hold no reference or lock. The
				1623	* hugetlb page could have been free'ed and head is no longer
				1624	* a hugetlb page before the following check. In such unlikely
				1625	* cases false positives and negatives are possible. Calling
				1626	* code must deal with these scenarios.
				1627	*/
				1628	if (HPageMigratable(head))
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1629	goto found;
Matthew Wilcox (Oracle)	d8c6546	2019-09-23 15:34:30 -0700	[diff] [blame]	1630	skip = compound_nr(head) - (page - head);
Oscar Salvador	eeb0efd	2019-02-01 14:20:47 -0800	[diff] [blame]	1631	pfn += skip - 1;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1632	}
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1633	return -ENOENT;
				1634	found:
				1635	*movable_pfn = pfn;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1636	return 0;
				1637	}
				1638
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1639	static int
				1640	do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
				1641	{
				1642	unsigned long pfn;
Matthew Wilcox (Oracle)	6c35784	2020-08-14 17:30:37 -0700	[diff] [blame]	1643	struct page page, head;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1644	int ret = 0;
				1645	LIST_HEAD(source);
Liam Mark	786dee8	2021-06-30 18:52:43 -0700	[diff] [blame]	1646	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
				1647	DEFAULT_RATELIMIT_BURST);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1648
Michal Hocko	a85009c	2018-12-28 00:38:29 -0800	[diff] [blame]	1649	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1650	if (!pfn_valid(pfn))
				1651	continue;
				1652	page = pfn_to_page(pfn);
Matthew Wilcox (Oracle)	6c35784	2020-08-14 17:30:37 -0700	[diff] [blame]	1653	head = compound_head(page);
Naoya Horiguchi	c8721bb	2013-09-11 14:22:09 -0700	[diff] [blame]	1654
				1655	if (PageHuge(page)) {
Matthew Wilcox (Oracle)	d8c6546	2019-09-23 15:34:30 -0700	[diff] [blame]	1656	pfn = page_to_pfn(head) + compound_nr(head) - 1;
Oscar Salvador	daf3538	2019-03-05 15:48:53 -0800	[diff] [blame]	1657	isolate_huge_page(head, &source);
Naoya Horiguchi	c8721bb	2013-09-11 14:22:09 -0700	[diff] [blame]	1658	continue;
Michal Hocko	94723aa	2018-04-10 16:30:07 -0700	[diff] [blame]	1659	} else if (PageTransHuge(page))
Matthew Wilcox (Oracle)	6c35784	2020-08-14 17:30:37 -0700	[diff] [blame]	1660	pfn = page_to_pfn(head) + thp_nr_pages(page) - 1;
Naoya Horiguchi	c8721bb	2013-09-11 14:22:09 -0700	[diff] [blame]	1661
Michal Hocko	b15c872	2018-12-28 00:38:01 -0800	[diff] [blame]	1662	/*
				1663	* HWPoison pages have elevated reference counts so the migration would
				1664	* fail on them. It also doesn't make any sense to migrate them in the
				1665	* first place. Still try to unmap such a page in case it is still mapped
				1666	* (e.g. current hwpoison implementation doesn't unmap KSM pages but keep
				1667	* the unmap as the catch all safety net).
				1668	*/
				1669	if (PageHWPoison(page)) {
				1670	if (WARN_ON(PageLRU(page)))
				1671	isolate_lru_page(page);
				1672	if (page_mapped(page))
Shakeel Butt	013339d	2020-12-14 19:06:39 -0800	[diff] [blame]	1673	try_to_unmap(page, TTU_IGNORE_MLOCK);
Michal Hocko	b15c872	2018-12-28 00:38:01 -0800	[diff] [blame]	1674	continue;
				1675	}
				1676
Konstantin Khlebnikov	700c2a4	2011-05-24 17:12:19 -0700	[diff] [blame]	1677	if (!get_page_unless_zero(page))
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1678	continue;
				1679	/*
Yisheng Xie	0efadf4	2017-02-24 14:57:39 -0800	[diff] [blame]	1680	* We can skip free pages. And we can deal with pages on
				1681	* LRU and non-lru movable pages.
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1682	*/
Yisheng Xie	0efadf4	2017-02-24 14:57:39 -0800	[diff] [blame]	1683	if (PageLRU(page))
				1684	ret = isolate_lru_page(page);
				1685	else
				1686	ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1687	if (!ret) { /* Success */
Nick Piggin	62695a8	2008-10-18 20:26:09 -0700	[diff] [blame]	1688	list_add_tail(&page->lru, &source);
Yisheng Xie	0efadf4	2017-02-24 14:57:39 -0800	[diff] [blame]	1689	if (!__PageMovable(page))
				1690	inc_node_page_state(page, NR_ISOLATED_ANON +
Huang Ying	9de4f22	2020-04-06 20:04:41 -0700	[diff] [blame]	1691	page_is_file_lru(page));
KOSAKI Motohiro	6d9c285	2009-12-14 17:58:11 -0800	[diff] [blame]	1692
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1693	} else {
Liam Mark	786dee8	2021-06-30 18:52:43 -0700	[diff] [blame]	1694	if (__ratelimit(&migrate_rs)) {
				1695	pr_warn("failed to isolate pfn %lx\n", pfn);
				1696	dump_page(page, "isolation failed");
				1697	}
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1698	}
Oscar Salvador	1723058	2019-02-01 14:19:57 -0800	[diff] [blame]	1699	put_page(page);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1700	}
Bob Liu	f3ab263	2010-10-26 14:22:10 -0700	[diff] [blame]	1701	if (!list_empty(&source)) {
Joonsoo Kim	203e6e5	2020-10-17 16:14:00 -0700	[diff] [blame]	1702	nodemask_t nmask = node_states[N_MEMORY];
				1703	struct migration_target_control mtc = {
				1704	.nmask = &nmask,
				1705	.gfp_mask = GFP_USER \| __GFP_MOVABLE \| __GFP_RETRY_MAYFAIL,
				1706	};
				1707
				1708	/*
				1709	* We have checked that migration range is on a single zone so
				1710	* we can use the nid of the first page to all the others.
				1711	*/
				1712	mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
				1713
				1714	/*
				1715	* try to allocate from a different node but reuse this node
				1716	* if there are no other online nodes to be used (e.g. we are
				1717	* offlining a part of the only existing node)
				1718	*/
				1719	node_clear(mtc.nid, nmask);
				1720	if (nodes_empty(nmask))
				1721	node_set(mtc.nid, nmask);
				1722	ret = migrate_pages(&source, alloc_migration_target, NULL,
				1723	(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
Michal Hocko	2932c8b	2018-12-28 00:33:53 -0800	[diff] [blame]	1724	if (ret) {
				1725	list_for_each_entry(page, &source, lru) {
Liam Mark	786dee8	2021-06-30 18:52:43 -0700	[diff] [blame]	1726	if (__ratelimit(&migrate_rs)) {
				1727	pr_warn("migrating pfn %lx failed ret:%d\n",
				1728	page_to_pfn(page), ret);
				1729	dump_page(page, "migration failure");
				1730	}
Michal Hocko	2932c8b	2018-12-28 00:33:53 -0800	[diff] [blame]	1731	}
Naoya Horiguchi	c8721bb	2013-09-11 14:22:09 -0700	[diff] [blame]	1732	putback_movable_pages(&source);
Michal Hocko	2932c8b	2018-12-28 00:33:53 -0800	[diff] [blame]	1733	}
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1734	}
Oscar Salvador	1723058	2019-02-01 14:19:57 -0800	[diff] [blame]	1735
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1736	return ret;
				1737	}
				1738
Tang Chen	c532092	2013-11-12 15:08:10 -0800	[diff] [blame]	1739	static int __init cmdline_parse_movable_node(char *p)
				1740	{
Tang Chen	55ac590	2014-01-21 15:49:35 -0800	[diff] [blame]	1741	movable_node_enabled = true;
Tang Chen	c532092	2013-11-12 15:08:10 -0800	[diff] [blame]	1742	return 0;
				1743	}
				1744	early_param("movable_node", cmdline_parse_movable_node);
				1745
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1746	/* check which state of node_states will be changed when offline memory */
				1747	static void node_states_check_changes_offline(unsigned long nr_pages,
				1748	struct zone zone, struct memory_notify arg)
				1749	{
				1750	struct pglist_data *pgdat = zone->zone_pgdat;
				1751	unsigned long present_pages = 0;
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1752	enum zone_type zt;
				1753
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	1754	arg->status_change_nid = NUMA_NO_NODE;
				1755	arg->status_change_nid_normal = NUMA_NO_NODE;
				1756	arg->status_change_nid_high = NUMA_NO_NODE;
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1757
				1758	/*
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1759	* Check whether node_states[N_NORMAL_MEMORY] will be changed.
				1760	* If the memory to be offline is within the range
				1761	* [0..ZONE_NORMAL], and it is the last present memory there,
				1762	* the zones in that range will become empty after the offlining,
				1763	* thus we can determine that we need to clear the node from
				1764	* node_states[N_NORMAL_MEMORY].
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1765	*/
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1766	for (zt = 0; zt <= ZONE_NORMAL; zt++)
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1767	present_pages += pgdat->node_zones[zt].present_pages;
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1768	if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1769	arg->status_change_nid_normal = zone_to_nid(zone);
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1770
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1771	#ifdef CONFIG_HIGHMEM
				1772	/*
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1773	* node_states[N_HIGH_MEMORY] contains nodes which
				1774	* have normal memory or high memory.
				1775	* Here we add the present_pages belonging to ZONE_HIGHMEM.
				1776	* If the zone is within the range of [0..ZONE_HIGHMEM), and
				1777	* we determine that the zones in that range become empty,
				1778	* we need to clear the node for N_HIGH_MEMORY.
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1779	*/
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1780	present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages;
				1781	if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages)
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1782	arg->status_change_nid_high = zone_to_nid(zone);
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1783	#endif
				1784
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1785	/*
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1786	* We have accounted the pages from [0..ZONE_NORMAL), and
				1787	* in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM
				1788	* as well.
				1789	* Here we count the possible pages from ZONE_MOVABLE.
				1790	* If after having accounted all the pages, we see that the nr_pages
				1791	* to be offlined is over or equal to the accounted pages,
				1792	* we know that the node will become empty, and so, we can clear
				1793	* it for N_MEMORY as well.
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1794	*/
Oscar Salvador	86b27be	2018-10-26 15:07:38 -0700	[diff] [blame]	1795	present_pages += pgdat->node_zones[ZONE_MOVABLE].present_pages;
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1796
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1797	if (nr_pages >= present_pages)
				1798	arg->status_change_nid = zone_to_nid(zone);
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1799	}
				1800
				1801	static void node_states_clear_node(int node, struct memory_notify *arg)
				1802	{
				1803	if (arg->status_change_nid_normal >= 0)
				1804	node_clear_state(node, N_NORMAL_MEMORY);
				1805
Oscar Salvador	cf01f6f5	2018-10-26 15:07:28 -0700	[diff] [blame]	1806	if (arg->status_change_nid_high >= 0)
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1807	node_clear_state(node, N_HIGH_MEMORY);
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1808
Oscar Salvador	cf01f6f5	2018-10-26 15:07:28 -0700	[diff] [blame]	1809	if (arg->status_change_nid >= 0)
Lai Jiangshan	6715ddf	2012-12-12 13:51:49 -0800	[diff] [blame]	1810	node_clear_state(node, N_MEMORY);
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1811	}
				1812
David Hildenbrand	c5e79ef	2019-11-30 17:54:17 -0800	[diff] [blame]	1813	static int count_system_ram_pages_cb(unsigned long start_pfn,
				1814	unsigned long nr_pages, void *data)
				1815	{
				1816	unsigned long *nr_system_ram_pages = data;
				1817
				1818	*nr_system_ram_pages += nr_pages;
				1819	return 0;
				1820	}
				1821
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	1822	int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
				1823	struct memory_group *group)
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1824	{
David Hildenbrand	73a11c9	2020-10-15 20:07:46 -0700	[diff] [blame]	1825	const unsigned long end_pfn = start_pfn + nr_pages;
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1826	unsigned long pfn, system_ram_pages = 0;
Cody P Schafer	d702909	2013-07-03 15:02:11 -0700	[diff] [blame]	1827	unsigned long flags;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1828	struct zone *zone;
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1829	struct memory_notify arg;
David Hildenbrand	ea15153	2020-10-15 20:08:03 -0700	[diff] [blame]	1830	int ret, node;
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1831	char *reason;
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1832
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1833	/*
				1834	* {on,off}lining is constrained to full memory sections (or more
Zhen Lei	041711c	2021-06-30 18:53:17 -0700	[diff] [blame]	1835	* precisely to memory blocks from the user space POV).
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1836	* memmap_on_memory is an exception because it reserves initial part
				1837	* of the physical memory space for vmemmaps. That space is pageblock
				1838	* aligned.
				1839	*/
David Hildenbrand	4986fac	2020-10-15 20:07:50 -0700	[diff] [blame]	1840	if (WARN_ON_ONCE(!nr_pages \|\|
Oscar Salvador	dd8e2f2	2021-05-04 18:39:36 -0700	[diff] [blame]	1841	!IS_ALIGNED(start_pfn, pageblock_nr_pages) \|\|
				1842	!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
David Hildenbrand	4986fac	2020-10-15 20:07:50 -0700	[diff] [blame]	1843	return -EINVAL;
				1844
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1845	mem_hotplug_begin();
				1846
David Hildenbrand	c5e79ef	2019-11-30 17:54:17 -0800	[diff] [blame]	1847	/*
				1848	* Don't allow to offline memory blocks that contain holes.
				1849	* Consequently, memory blocks with holes can never get onlined
				1850	* via the hotplug path - online_pages() - as hotplugged memory has
				1851	* no holes. This way, we e.g., don't have to worry about marking
				1852	* memory holes PG_reserved, don't need pfn_valid() checks, and can
				1853	* avoid using walk_system_ram_range() later.
				1854	*/
David Hildenbrand	73a11c9	2020-10-15 20:07:46 -0700	[diff] [blame]	1855	walk_system_ram_range(start_pfn, nr_pages, &system_ram_pages,
David Hildenbrand	c5e79ef	2019-11-30 17:54:17 -0800	[diff] [blame]	1856	count_system_ram_pages_cb);
David Hildenbrand	73a11c9	2020-10-15 20:07:46 -0700	[diff] [blame]	1857	if (system_ram_pages != nr_pages) {
David Hildenbrand	c5e79ef	2019-11-30 17:54:17 -0800	[diff] [blame]	1858	ret = -EINVAL;
				1859	reason = "memory holes";
				1860	goto failed_removal;
				1861	}
				1862
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1863	/* This makes hotplug much easier...and readable.
				1864	we assume this for now. .*/
David Hildenbrand	9291799	2020-02-03 17:34:26 -0800	[diff] [blame]	1865	zone = test_pages_in_a_zone(start_pfn, end_pfn);
				1866	if (!zone) {
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1867	ret = -EINVAL;
				1868	reason = "multizone range";
				1869	goto failed_removal;
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1870	}
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1871	node = zone_to_nid(zone);
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1872
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1873	/*
				1874	* Disable pcplists so that page isolation cannot race with freeing
				1875	* in a way that pages from isolated pageblock are left on pcplists.
				1876	*/
				1877	zone_pcp_disable(zone);
Minchan Kim	d479960e	2021-05-04 18:36:54 -0700	[diff] [blame]	1878	lru_cache_disable();
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1879
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1880	/* set above range as isolated */
Wen Congyang	b023f46	2012-12-11 16:00:45 -0800	[diff] [blame]	1881	ret = start_isolate_page_range(start_pfn, end_pfn,
Michal Hocko	d381c54	2018-12-28 00:33:56 -0800	[diff] [blame]	1882	MIGRATE_MOVABLE,
David Hildenbrand	756d25b	2019-11-30 17:54:07 -0800	[diff] [blame]	1883	MEMORY_OFFLINE \| REPORT_FAILURE);
David Hildenbrand	3fa0c7c	2020-10-15 20:08:07 -0700	[diff] [blame]	1884	if (ret) {
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1885	reason = "failure to isolate range";
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1886	goto failed_removal_pcplists_disabled;
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1887	}
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1888
				1889	arg.start_pfn = start_pfn;
				1890	arg.nr_pages = nr_pages;
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1891	node_states_check_changes_offline(nr_pages, zone, &arg);
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1892
				1893	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
				1894	ret = notifier_to_errno(ret);
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1895	if (ret) {
				1896	reason = "notifier failure";
				1897	goto failed_removal_isolated;
				1898	}
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1899
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1900	do {
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1901	pfn = start_pfn;
				1902	do {
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1903	if (signal_pending(current)) {
				1904	ret = -EINTR;
				1905	reason = "signal backoff";
				1906	goto failed_removal_isolated;
				1907	}
Michal Hocko	72b39cf	2017-11-15 17:33:34 -0800	[diff] [blame]	1908
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1909	cond_resched();
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1910
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1911	ret = scan_movable_pages(pfn, end_pfn, &pfn);
				1912	if (!ret) {
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1913	/*
				1914	* TODO: fatal migration failures should bail
				1915	* out
				1916	*/
				1917	do_migrate_range(pfn, end_pfn);
				1918	}
David Hildenbrand	aa21879	2020-05-07 16:01:30 +0200	[diff] [blame]	1919	} while (!ret);
				1920
				1921	if (ret != -ENOENT) {
				1922	reason = "unmovable page";
				1923	goto failed_removal_isolated;
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1924	}
Michal Hocko	72b39cf	2017-11-15 17:33:34 -0800	[diff] [blame]	1925
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1926	/*
				1927	* Dissolve free hugepages in the memory block before doing
				1928	* offlining actually in order to make hugetlbfs's object
				1929	* counting consistent.
				1930	*/
				1931	ret = dissolve_free_huge_pages(start_pfn, end_pfn);
				1932	if (ret) {
				1933	reason = "failure to dissolve huge pages";
				1934	goto failed_removal_isolated;
				1935	}
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1936
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1937	ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE);
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1938
Michal Hocko	5557c76	2019-05-13 17:21:24 -0700	[diff] [blame]	1939	} while (ret);
Michal Hocko	bb8965b	2018-12-28 00:38:32 -0800	[diff] [blame]	1940
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1941	/* Mark all sections offline and remove free pages from the buddy. */
				1942	__offline_isolated_pages(start_pfn, end_pfn);
Laurent Dufour	7c33023	2020-12-15 20:42:26 -0800	[diff] [blame]	1943	pr_debug("Offlined Pages %ld\n", nr_pages);
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1944
Qian Cai	9b7ea46	2019-03-28 20:43:34 -0700	[diff] [blame]	1945	/*
David Hildenbrand	b30c592	2020-10-15 20:08:23 -0700	[diff] [blame]	1946	* The memory sections are marked offline, and the pageblock flags
				1947	* effectively stale; nobody should be touching them. Fixup the number
				1948	* of isolated pageblocks, memory onlining will properly revert this.
Qian Cai	9b7ea46	2019-03-28 20:43:34 -0700	[diff] [blame]	1949	*/
				1950	spin_lock_irqsave(&zone->lock, flags);
David Hildenbrand	ea15153	2020-10-15 20:08:03 -0700	[diff] [blame]	1951	zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
Qian Cai	9b7ea46	2019-03-28 20:43:34 -0700	[diff] [blame]	1952	spin_unlock_irqrestore(&zone->lock, flags);
				1953
Minchan Kim	d479960e	2021-05-04 18:36:54 -0700	[diff] [blame]	1954	lru_cache_enable();
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1955	zone_pcp_enable(zone);
				1956
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1957	/* removal success */
David Hildenbrand	0a1a9a0	2020-10-15 20:07:54 -0700	[diff] [blame]	1958	adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
David Hildenbrand	836809e	2021-09-07 19:55:30 -0700	[diff] [blame]	1959	adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1960
Mel Gorman	b92ca18	2021-06-28 19:42:12 -0700	[diff] [blame]	1961	/* reinitialise watermarks and update pcp limits */
KOSAKI Motohiro	1b79acc	2011-05-24 17:11:32 -0700	[diff] [blame]	1962	init_per_zone_wmark_min();
				1963
Xishi Qiu	1e8537b	2012-10-08 16:31:51 -0700	[diff] [blame]	1964	if (!populated_zone(zone)) {
Jiang Liu	340175b	2012-07-31 16:43:32 -0700	[diff] [blame]	1965	zone_pcp_reset(zone);
Michal Hocko	72675e1	2017-09-06 16:20:24 -0700	[diff] [blame]	1966	build_all_zonelists(NULL);
Mel Gorman	b92ca18	2021-06-28 19:42:12 -0700	[diff] [blame]	1967	}
Jiang Liu	340175b	2012-07-31 16:43:32 -0700	[diff] [blame]	1968
Lai Jiangshan	d971367	2012-12-11 16:01:03 -0800	[diff] [blame]	1969	node_states_clear_node(node, &arg);
Vlastimil Babka	698b1b3	2016-03-17 14:18:08 -0700	[diff] [blame]	1970	if (arg.status_change_nid >= 0) {
David Rientjes	8fe23e0	2009-12-14 17:58:33 -0800	[diff] [blame]	1971	kswapd_stop(node);
Vlastimil Babka	698b1b3	2016-03-17 14:18:08 -0700	[diff] [blame]	1972	kcompactd_stop(node);
				1973	}
Minchan Kim	bce7394	2009-06-16 15:32:50 -0700	[diff] [blame]	1974
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1975	writeback_set_ratelimit();
Yasunori Goto	7b78d33	2007-10-21 16:41:36 -0700	[diff] [blame]	1976
				1977	memory_notify(MEM_OFFLINE, &arg);
David Hildenbrand	feee6b2	2020-01-04 12:59:33 -0800	[diff] [blame]	1978	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1979	mem_hotplug_done();
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1980	return 0;
				1981
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1982	failed_removal_isolated:
				1983	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
Qian Cai	c4efe48	2019-03-28 20:44:16 -0700	[diff] [blame]	1984	memory_notify(MEM_CANCEL_OFFLINE, &arg);
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1985	failed_removal_pcplists_disabled:
Miaohe Lin	946746d1	2021-08-25 12:17:55 -0700	[diff] [blame]	1986	lru_cache_enable();
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	1987	zone_pcp_enable(zone);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1988	failed_removal:
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1989	pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
Chen Yucong	e33e33b	2016-03-17 14:19:35 -0700	[diff] [blame]	1990	(unsigned long long) start_pfn << PAGE_SHIFT,
Michal Hocko	7960509	2018-12-28 00:33:49 -0800	[diff] [blame]	1991	((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
				1992	reason);
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1993	/* pushback to free area */
David Hildenbrand	381eab4	2018-10-30 15:10:29 -0700	[diff] [blame]	1994	mem_hotplug_done();
KAMEZAWA Hiroyuki	0c0e619	2007-10-16 01:26:12 -0700	[diff] [blame]	1995	return ret;
				1996	}
Badari Pulavarty	7108878	2008-10-18 20:25:58 -0700	[diff] [blame]	1997
Xishi Qiu	d6de9d5	2013-11-12 15:07:20 -0800	[diff] [blame]	1998	static int check_memblock_offlined_cb(struct memory_block mem, void arg)
Wen Congyang	bbc76be	2013-02-22 16:32:54 -0800	[diff] [blame]	1999	{
				2000	int ret = !is_memblock_offlined(mem);
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2001	int *nid = arg;
Wen Congyang	bbc76be	2013-02-22 16:32:54 -0800	[diff] [blame]	2002
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2003	*nid = mem->nid;
Randy Dunlap	349daa0	2013-04-29 15:08:49 -0700	[diff] [blame]	2004	if (unlikely(ret)) {
				2005	phys_addr_t beginpa, endpa;
				2006
				2007	beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
David Hildenbrand	b6c88d3	2019-09-23 15:35:49 -0700	[diff] [blame]	2008	endpa = beginpa + memory_block_size_bytes() - 1;
Joe Perches	756a025	2016-03-17 14:19:47 -0700	[diff] [blame]	2009	pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
Randy Dunlap	349daa0	2013-04-29 15:08:49 -0700	[diff] [blame]	2010	&beginpa, &endpa);
Wen Congyang	bbc76be	2013-02-22 16:32:54 -0800	[diff] [blame]	2011
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2012	return -EBUSY;
				2013	}
				2014	return 0;
Wen Congyang	bbc76be	2013-02-22 16:32:54 -0800	[diff] [blame]	2015	}
				2016
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	2017	static int get_nr_vmemmap_pages_cb(struct memory_block mem, void arg)
				2018	{
				2019	/*
				2020	* If not set, continue with the next block.
				2021	*/
				2022	return mem->nr_vmemmap_pages;
				2023	}
				2024
Toshi Kani	0f1cfe9	2013-09-11 14:21:50 -0700	[diff] [blame]	2025	static int check_cpu_on_node(pg_data_t *pgdat)
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2026	{
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2027	int cpu;
				2028
				2029	for_each_present_cpu(cpu) {
				2030	if (cpu_to_node(cpu) == pgdat->node_id)
				2031	/*
				2032	* the cpu on this node isn't removed, and we can't
				2033	* offline this node.
				2034	*/
				2035	return -EBUSY;
				2036	}
				2037
				2038	return 0;
				2039	}
				2040
David Hildenbrand	2c91f8f	2019-11-15 17:34:57 -0800	[diff] [blame]	2041	static int check_no_memblock_for_node_cb(struct memory_block mem, void arg)
				2042	{
				2043	int nid = (int )arg;
				2044
				2045	/*
				2046	* If a memory block belongs to multiple nodes, the stored nid is not
				2047	* reliable. However, such blocks are always online (e.g., cannot get
				2048	* offlined) and, therefore, are still spanned by the node.
				2049	*/
				2050	return mem->nid == nid ? -EEXIST : 0;
				2051	}
				2052
Toshi Kani	0f1cfe9	2013-09-11 14:21:50 -0700	[diff] [blame]	2053	/**
				2054	* try_offline_node
Mike Rapoport	e8b098f	2018-04-05 16:24:57 -0700	[diff] [blame]	2055	* @nid: the node ID
Toshi Kani	0f1cfe9	2013-09-11 14:21:50 -0700	[diff] [blame]	2056	*
				2057	* Offline a node if all memory sections and cpus of the node are removed.
				2058	*
				2059	* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
				2060	* and online/offline operations before this call.
				2061	*/
Wen Congyang	90b30cd	2013-02-22 16:33:27 -0800	[diff] [blame]	2062	void try_offline_node(int nid)
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2063	{
Wen Congyang	d822b86	2013-02-22 16:33:16 -0800	[diff] [blame]	2064	pg_data_t *pgdat = NODE_DATA(nid);
David Hildenbrand	2c91f8f	2019-11-15 17:34:57 -0800	[diff] [blame]	2065	int rc;
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2066
David Hildenbrand	2c91f8f	2019-11-15 17:34:57 -0800	[diff] [blame]	2067	/*
				2068	* If the node still spans pages (especially ZONE_DEVICE), don't
				2069	* offline it. A node spans memory after move_pfn_range_to_zone(),
				2070	* e.g., after the memory block was onlined.
				2071	*/
				2072	if (pgdat->node_spanned_pages)
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2073	return;
David Hildenbrand	2c91f8f	2019-11-15 17:34:57 -0800	[diff] [blame]	2074
				2075	/*
				2076	* Especially offline memory blocks might not be spanned by the
				2077	* node. They will get spanned by the node once they get onlined.
				2078	* However, they link to the node in sysfs and can get onlined later.
				2079	*/
				2080	rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
				2081	if (rc)
				2082	return;
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2083
Michal Hocko	46a3679	2018-12-28 00:34:13 -0800	[diff] [blame]	2084	if (check_cpu_on_node(pgdat))
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2085	return;
				2086
				2087	/*
				2088	* all memory/cpu of this node are removed, we can offline this
				2089	* node now.
				2090	*/
				2091	node_set_offline(nid);
				2092	unregister_one_node(nid);
				2093	}
Wen Congyang	90b30cd	2013-02-22 16:33:27 -0800	[diff] [blame]	2094	EXPORT_SYMBOL(try_offline_node);
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2095
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2096	static int __ref try_remove_memory(u64 start, u64 size)
Wen Congyang	bbc76be	2013-02-22 16:32:54 -0800	[diff] [blame]	2097	{
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	2098	struct vmem_altmap mhp_altmap = {};
				2099	struct vmem_altmap *altmap = NULL;
				2100	unsigned long nr_vmemmap_pages;
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2101	int rc = 0, nid = NUMA_NO_NODE;
Wen Congyang	993c1aa	2013-02-22 16:32:50 -0800	[diff] [blame]	2102
Toshi Kani	27356f5	2013-09-11 14:21:49 -0700	[diff] [blame]	2103	BUG_ON(check_hotplug_memory_range(start, size));
				2104
Yasuaki Ishimatsu	6677e3e	2013-02-22 16:32:52 -0800	[diff] [blame]	2105	/*
Rafael J. Wysocki	242831e	2013-05-27 12:58:46 +0200	[diff] [blame]	2106	* All memory blocks must be offlined before removing memory. Check
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2107	* whether all memory blocks in question are offline and return error
Rafael J. Wysocki	242831e	2013-05-27 12:58:46 +0200	[diff] [blame]	2108	* if this is not the case.
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2109	*
				2110	* While at it, determine the nid. Note that if we'd have mixed nodes,
				2111	* we'd only try to offline the last determined one -- which is good
				2112	* enough for the cases we care about.
Yasuaki Ishimatsu	6677e3e	2013-02-22 16:32:52 -0800	[diff] [blame]	2113	*/
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2114	rc = walk_memory_blocks(start, size, &nid, check_memblock_offlined_cb);
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2115	if (rc)
Jia He	b4223a5	2020-08-11 18:32:20 -0700	[diff] [blame]	2116	return rc;
Yasuaki Ishimatsu	6677e3e	2013-02-22 16:32:52 -0800	[diff] [blame]	2117
Oscar Salvador	a08a2ae	2021-05-04 18:39:42 -0700	[diff] [blame]	2118	/*
				2119	* We only support removing memory added with MHP_MEMMAP_ON_MEMORY in
				2120	* the same granularity it was added - a single memory block.
				2121	*/
				2122	if (memmap_on_memory) {
				2123	nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
				2124	get_nr_vmemmap_pages_cb);
				2125	if (nr_vmemmap_pages) {
				2126	if (size != memory_block_size_bytes()) {
				2127	pr_warn("Refuse to remove %#llx - %#llx,"
				2128	"wrong granularity\n",
				2129	start, start + size);
				2130	return -EINVAL;
				2131	}
				2132
				2133	/*
				2134	* Let remove_pmd_table->free_hugepage_table do the
				2135	* right thing if we used vmem_altmap when hot-adding
				2136	* the range.
				2137	*/
				2138	mhp_altmap.alloc = nr_vmemmap_pages;
				2139	altmap = &mhp_altmap;
				2140	}
				2141	}
				2142
Yasuaki Ishimatsu	46c66c4	2013-02-22 16:32:56 -0800	[diff] [blame]	2143	/* remove memmap entry */
				2144	firmware_map_remove(start, start + size, "System RAM");
				2145
Dan Williams	f1037ec	2020-01-30 22:11:17 -0800	[diff] [blame]	2146	/*
				2147	* Memory block device removal under the device_hotplug_lock is
				2148	* a barrier against racing online attempts.
				2149	*/
David Hildenbrand	4c4b7f9	2019-07-18 15:57:06 -0700	[diff] [blame]	2150	remove_memory_block_devices(start, size);
				2151
Dan Williams	f1037ec	2020-01-30 22:11:17 -0800	[diff] [blame]	2152	mem_hotplug_begin();
				2153
David Hildenbrand	65a2aa5	2021-09-07 19:55:04 -0700	[diff] [blame]	2154	arch_remove_memory(start, size, altmap);
David Hildenbrand	52219ae	2020-06-04 16:48:38 -0700	[diff] [blame]	2155
				2156	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
				2157	memblock_free(start, size);
				2158	memblock_remove(start, size);
				2159	}
				2160
David Hildenbrand	cb8e3c8	2020-10-15 20:09:12 -0700	[diff] [blame]	2161	release_mem_region_adjustable(start, size);
Wen Congyang	24d335c	2013-02-22 16:32:58 -0800	[diff] [blame]	2162
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2163	if (nid != NUMA_NO_NODE)
				2164	try_offline_node(nid);
Tang Chen	60a5a19	2013-02-22 16:33:14 -0800	[diff] [blame]	2165
Vladimir Davydov	bfc8c90	2014-06-04 16:07:18 -0700	[diff] [blame]	2166	mem_hotplug_done();
Jia He	b4223a5	2020-08-11 18:32:20 -0700	[diff] [blame]	2167	return 0;
Badari Pulavarty	7108878	2008-10-18 20:25:58 -0700	[diff] [blame]	2168	}
David Hildenbrand	d15e592	2018-10-30 15:10:18 -0700	[diff] [blame]	2169
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2170	/**
Mel Gorman	5640c9c	2021-06-30 18:53:38 -0700	[diff] [blame]	2171	* __remove_memory - Remove memory if every memory block is offline
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2172	* @start: physical address of the region to remove
				2173	* @size: size of the region to remove
				2174	*
				2175	* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
				2176	* and online/offline operations before this call, as required by
				2177	* try_offline_node().
				2178	*/
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2179	void __remove_memory(u64 start, u64 size)
David Hildenbrand	d15e592	2018-10-30 15:10:18 -0700	[diff] [blame]	2180	{
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2181
				2182	/*
Souptick Joarder	29a90db	2019-09-23 15:36:18 -0700	[diff] [blame]	2183	* trigger BUG() if some memory is not offlined prior to calling this
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2184	* function
				2185	*/
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2186	if (try_remove_memory(start, size))
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2187	BUG();
				2188	}
				2189
				2190	/*
				2191	* Remove memory if every memory block is offline, otherwise return -EBUSY is
				2192	* some memory is not offline
				2193	*/
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2194	int remove_memory(u64 start, u64 size)
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2195	{
				2196	int rc;
				2197
David Hildenbrand	d15e592	2018-10-30 15:10:18 -0700	[diff] [blame]	2198	lock_device_hotplug();
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2199	rc = try_remove_memory(start, size);
David Hildenbrand	d15e592	2018-10-30 15:10:18 -0700	[diff] [blame]	2200	unlock_device_hotplug();
Pavel Tatashin	eca499a	2019-07-16 16:30:31 -0700	[diff] [blame]	2201
				2202	return rc;
David Hildenbrand	d15e592	2018-10-30 15:10:18 -0700	[diff] [blame]	2203	}
Badari Pulavarty	7108878	2008-10-18 20:25:58 -0700	[diff] [blame]	2204	EXPORT_SYMBOL_GPL(remove_memory);
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2205
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2206	static int try_offline_memory_block(struct memory_block mem, void arg)
				2207	{
				2208	uint8_t online_type = MMOP_ONLINE_KERNEL;
				2209	uint8_t **online_types = arg;
				2210	struct page *page;
				2211	int rc;
				2212
				2213	/*
				2214	* Sense the online_type via the zone of the memory block. Offlining
				2215	* with multiple zones within one memory block will be rejected
				2216	* by offlining code ... so we don't care about that.
				2217	*/
				2218	page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
				2219	if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
				2220	online_type = MMOP_ONLINE_MOVABLE;
				2221
				2222	rc = device_offline(&mem->dev);
				2223	/*
				2224	* Default is MMOP_OFFLINE - change it only if offlining succeeded,
				2225	* so try_reonline_memory_block() can do the right thing.
				2226	*/
				2227	if (!rc)
				2228	**online_types = online_type;
				2229
				2230	(*online_types)++;
				2231	/* Ignore if already offline. */
				2232	return rc < 0 ? rc : 0;
				2233	}
				2234
				2235	static int try_reonline_memory_block(struct memory_block mem, void arg)
				2236	{
				2237	uint8_t **online_types = arg;
				2238	int rc;
				2239
				2240	if (**online_types != MMOP_OFFLINE) {
				2241	mem->online_type = **online_types;
				2242	rc = device_online(&mem->dev);
				2243	if (rc < 0)
				2244	pr_warn("%s: Failed to re-online memory: %d",
				2245	__func__, rc);
				2246	}
				2247
				2248	/* Continue processing all remaining memory blocks. */
				2249	(*online_types)++;
				2250	return 0;
				2251	}
				2252
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2253	/*
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2254	* Try to offline and remove memory. Might take a long time to finish in case
				2255	* memory is still in use. Primarily useful for memory devices that logically
				2256	* unplugged all memory (so it's no longer in use) and want to offline + remove
				2257	* that memory.
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2258	*/
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2259	int offline_and_remove_memory(u64 start, u64 size)
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2260	{
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2261	const unsigned long mb_count = size / memory_block_size_bytes();
				2262	uint8_t online_types, tmp;
				2263	int rc;
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2264
				2265	if (!IS_ALIGNED(start, memory_block_size_bytes()) \|\|
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2266	!IS_ALIGNED(size, memory_block_size_bytes()) \|\| !size)
				2267	return -EINVAL;
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2268
				2269	/*
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2270	* We'll remember the old online type of each memory block, so we can
				2271	* try to revert whatever we did when offlining one memory block fails
				2272	* after offlining some others succeeded.
				2273	*/
				2274	online_types = kmalloc_array(mb_count, sizeof(*online_types),
				2275	GFP_KERNEL);
				2276	if (!online_types)
				2277	return -ENOMEM;
				2278	/*
				2279	* Initialize all states to MMOP_OFFLINE, so when we abort processing in
				2280	* try_offline_memory_block(), we'll skip all unprocessed blocks in
				2281	* try_reonline_memory_block().
				2282	*/
				2283	memset(online_types, MMOP_OFFLINE, mb_count);
				2284
				2285	lock_device_hotplug();
				2286
				2287	tmp = online_types;
				2288	rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
				2289
				2290	/*
				2291	* In case we succeeded to offline all memory, remove it.
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2292	* This cannot fail as it cannot get onlined in the meantime.
				2293	*/
				2294	if (!rc) {
David Hildenbrand	e1c158e	2021-09-07 19:55:09 -0700	[diff] [blame]	2295	rc = try_remove_memory(start, size);
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2296	if (rc)
				2297	pr_err("%s: Failed to remove memory: %d", __func__, rc);
				2298	}
				2299
				2300	/*
				2301	* Rollback what we did. While memory onlining might theoretically fail
				2302	* (nacked by a notifier), it barely ever happens.
				2303	*/
				2304	if (rc) {
				2305	tmp = online_types;
				2306	walk_memory_blocks(start, size, &tmp,
				2307	try_reonline_memory_block);
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2308	}
				2309	unlock_device_hotplug();
				2310
David Hildenbrand	8dc4bb5	2020-11-12 14:38:13 +0100	[diff] [blame]	2311	kfree(online_types);
David Hildenbrand	08b3acd	2020-05-07 16:01:32 +0200	[diff] [blame]	2312	return rc;
				2313	}
				2314	EXPORT_SYMBOL_GPL(offline_and_remove_memory);
Rafael J. Wysocki	aba6efc	2013-06-01 22:24:07 +0200	[diff] [blame]	2315	#endif /* CONFIG_MEMORY_HOTREMOVE */