Blame - mm/page_ext.c - linux - Git Browser for ODROID

blob: e5e31ff1adba5cf1751b420e4ab634c50fa22b1d [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	2	#include <linux/mm.h>
				3	#include <linux/mmzone.h>
Mike Rapoport	57c8a66	2018-10-30 15:09:49 -0700	[diff] [blame]	4	#include <linux/memblock.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	5	#include <linux/page_ext.h>
				6	#include <linux/memory.h>
				7	#include <linux/vmalloc.h>
				8	#include <linux/kmemleak.h>
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	9	#include <linux/page_owner.h>
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	10	#include <linux/page_idle.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	11
				12	/*
				13	* struct page extension
				14	*
				15	* This is the feature to manage memory for extended data per page.
				16	*
				17	* Until now, we must modify struct page itself to store extra data per page.
				18	* This requires rebuilding the kernel and it is really time consuming process.
				19	* And, sometimes, rebuild is impossible due to third party module dependency.
				20	* At last, enlarging struct page could cause un-wanted system behaviour change.
				21	*
				22	* This feature is intended to overcome above mentioned problems. This feature
				23	* allocates memory for extended data per page in certain place rather than
				24	* the struct page itself. This memory can be accessed by the accessor
				25	* functions provided by this code. During the boot process, it checks whether
				26	* allocation of huge chunk of memory is needed or not. If not, it avoids
				27	* allocating memory at all. With this advantage, we can include this feature
				28	* into the kernel in default and can avoid rebuild and solve related problems.
				29	*
				30	* To help these things to work well, there are two callbacks for clients. One
				31	* is the need callback which is mandatory if user wants to avoid useless
				32	* memory allocation at boot-time. The other is optional, init callback, which
				33	* is used to do proper initialization after memory is allocated.
				34	*
				35	* The need callback is used to decide whether extended memory allocation is
				36	* needed or not. Sometimes users want to deactivate some features in this
				37	* boot and extra memory would be unneccessary. In this case, to avoid
				38	* allocating huge chunk of memory, each clients represent their need of
				39	* extra memory through the need callback. If one of the need callbacks
				40	* returns true, it means that someone needs extra memory so that
				41	* page extension core should allocates memory for page extension. If
				42	* none of need callbacks return true, memory isn't needed at all in this boot
				43	* and page extension core can skip to allocate memory. As result,
				44	* none of memory is wasted.
				45	*
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	46	* When need callback returns true, page_ext checks if there is a request for
				47	* extra memory through size in struct page_ext_operations. If it is non-zero,
				48	* extra space is allocated for each page_ext entry and offset is returned to
				49	* user through offset in struct page_ext_operations.
				50	*
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	51	* The init callback is used to do proper initialization after page extension
				52	* is completely initialized. In sparse memory system, extra memory is
				53	* allocated some time later than memmap is allocated. In other words, lifetime
				54	* of memory for page extension isn't same with memmap for struct page.
				55	* Therefore, clients can't store extra data until page extension is
				56	* initialized, even if pages are allocated and used freely. This could
				57	* cause inadequate state of extra data per page, so, to prevent it, client
				58	* can utilize this callback to initialize the state of it correctly.
				59	*/
				60
SeongJae Park	75e13ba	2021-09-07 19:56:40 -0700	[diff] [blame^]	61	#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
				62	static bool need_page_idle(void)
				63	{
				64	return true;
				65	}
				66	struct page_ext_operations page_idle_ops = {
				67	.need = need_page_idle,
				68	};
				69	#endif
				70
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	71	static struct page_ext_operations *page_ext_ops[] = {
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	72	#ifdef CONFIG_PAGE_OWNER
				73	&page_owner_ops,
				74	#endif
SeongJae Park	75e13ba	2021-09-07 19:56:40 -0700	[diff] [blame^]	75	#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	76	&page_idle_ops,
				77	#endif
Minchan Kim	6e12c5b	2021-03-18 09:56:10 -0700	[diff] [blame]	78	#ifdef CONFIG_PAGE_PINNER
				79	&page_pinner_ops,
				80	#endif
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	81	};
				82
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	83	unsigned long page_ext_size = sizeof(struct page_ext);
				84
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	85	static unsigned long total_usage;
				86
				87	static bool __init invoke_need_callbacks(void)
				88	{
				89	int i;
				90	int entries = ARRAY_SIZE(page_ext_ops);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	91	bool need = false;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	92
				93	for (i = 0; i < entries; i++) {
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	94	if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	95	page_ext_ops[i]->offset = page_ext_size;
				96	page_ext_size += page_ext_ops[i]->size;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	97	need = true;
				98	}
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	99	}
				100
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	101	return need;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	102	}
				103
				104	static void __init invoke_init_callbacks(void)
				105	{
				106	int i;
				107	int entries = ARRAY_SIZE(page_ext_ops);
				108
				109	for (i = 0; i < entries; i++) {
				110	if (page_ext_ops[i]->init)
				111	page_ext_ops[i]->init();
				112	}
				113	}
				114
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	115	static inline struct page_ext get_entry(void base, unsigned long index)
				116	{
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	117	return base + page_ext_size * index;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	118	}
				119
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	120	#if !defined(CONFIG_SPARSEMEM)
				121
				122
				123	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				124	{
				125	pgdat->node_page_ext = NULL;
				126	}
				127
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	128	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	129	{
				130	unsigned long pfn = page_to_pfn(page);
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	131	unsigned long index;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	132	struct page_ext *base;
				133
				134	base = NODE_DATA(page_to_nid(page))->node_page_ext;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	135	/*
				136	* The sanity checks the page allocator does upon freeing a
				137	* page can reach here before the page_ext arrays are
				138	* allocated when feeding a range of pages to the allocator
				139	* for the first time during bootup or memory hotplug.
				140	*/
				141	if (unlikely(!base))
				142	return NULL;
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	143	index = pfn - round_down(node_start_pfn(page_to_nid(page)),
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	144	MAX_ORDER_NR_PAGES);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	145	return get_entry(base, index);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	146	}
Vijayanand Jitta	0a7166a	2021-01-05 11:27:28 +0530	[diff] [blame]	147	EXPORT_SYMBOL_GPL(lookup_page_ext);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	148
				149	static int __init alloc_node_page_ext(int nid)
				150	{
				151	struct page_ext *base;
				152	unsigned long table_size;
				153	unsigned long nr_pages;
				154
				155	nr_pages = NODE_DATA(nid)->node_spanned_pages;
				156	if (!nr_pages)
				157	return 0;
				158
				159	/*
				160	* Need extra space if node range is not aligned with
				161	* MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
				162	* checks buddy's status, range could be out of exact node range.
				163	*/
				164	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) \|\|
				165	!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
				166	nr_pages += MAX_ORDER_NR_PAGES;
				167
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	168	table_size = page_ext_size * nr_pages;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	169
Mike Rapoport	26fb3da	2019-03-11 23:30:42 -0700	[diff] [blame]	170	base = memblock_alloc_try_nid(
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	171	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
Mike Rapoport	97ad108	2018-10-30 15:09:44 -0700	[diff] [blame]	172	MEMBLOCK_ALLOC_ACCESSIBLE, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	173	if (!base)
				174	return -ENOMEM;
				175	NODE_DATA(nid)->node_page_ext = base;
				176	total_usage += table_size;
				177	return 0;
				178	}
				179
				180	void __init page_ext_init_flatmem(void)
				181	{
				182
				183	int nid, fail;
				184
				185	if (!invoke_need_callbacks())
				186	return;
				187
				188	for_each_online_node(nid) {
				189	fail = alloc_node_page_ext(nid);
				190	if (fail)
				191	goto fail;
				192	}
				193	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				194	invoke_init_callbacks();
				195	return;
				196
				197	fail:
				198	pr_crit("allocation of page_ext failed.\n");
				199	panic("Out of memory");
				200	}
				201
				202	#else /* CONFIG_FLAT_NODE_MEM_MAP */
				203
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	204	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	205	{
				206	unsigned long pfn = page_to_pfn(page);
				207	struct mem_section *section = __pfn_to_section(pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	208	/*
				209	* The sanity checks the page allocator does upon freeing a
				210	* page can reach here before the page_ext arrays are
				211	* allocated when feeding a range of pages to the allocator
				212	* for the first time during bootup or memory hotplug.
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	213	*/
				214	if (!section->page_ext)
				215	return NULL;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	216	return get_entry(section->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	217	}
Vijayanand Jitta	0a7166a	2021-01-05 11:27:28 +0530	[diff] [blame]	218	EXPORT_SYMBOL_GPL(lookup_page_ext);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	219
				220	static void *__meminit alloc_page_ext(size_t size, int nid)
				221	{
				222	gfp_t flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN;
				223	void *addr = NULL;
				224
				225	addr = alloc_pages_exact_nid(nid, size, flags);
				226	if (addr) {
				227	kmemleak_alloc(addr, size, 1, flags);
				228	return addr;
				229	}
				230
Michal Hocko	b95046b	2017-09-06 16:20:41 -0700	[diff] [blame]	231	addr = vzalloc_node(size, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	232
				233	return addr;
				234	}
				235
				236	static int __meminit init_section_page_ext(unsigned long pfn, int nid)
				237	{
				238	struct mem_section *section;
				239	struct page_ext *base;
				240	unsigned long table_size;
				241
				242	section = __pfn_to_section(pfn);
				243
				244	if (section->page_ext)
				245	return 0;
				246
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	247	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	248	base = alloc_page_ext(table_size, nid);
				249
				250	/*
				251	* The value stored in section->page_ext is (base - pfn)
				252	* and it does not point to the memory block allocated above,
				253	* causing kmemleak false positives.
				254	*/
				255	kmemleak_not_leak(base);
				256
				257	if (!base) {
				258	pr_err("page ext allocation failure\n");
				259	return -ENOMEM;
				260	}
				261
				262	/*
				263	* The passed "pfn" may not be aligned to SECTION. For the calculation
				264	* we need to apply a mask.
				265	*/
				266	pfn &= PAGE_SECTION_MASK;
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	267	section->page_ext = (void )base - page_ext_size pfn;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	268	total_usage += table_size;
				269	return 0;
				270	}
				271	#ifdef CONFIG_MEMORY_HOTPLUG
				272	static void free_page_ext(void *addr)
				273	{
				274	if (is_vmalloc_addr(addr)) {
				275	vfree(addr);
				276	} else {
				277	struct page *page = virt_to_page(addr);
				278	size_t table_size;
				279
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	280	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	281
				282	BUG_ON(PageReserved(page));
Qian Cai	0c81585	2019-03-05 15:49:46 -0800	[diff] [blame]	283	kmemleak_free(addr);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	284	free_pages_exact(addr, table_size);
				285	}
				286	}
				287
				288	static void __free_page_ext(unsigned long pfn)
				289	{
				290	struct mem_section *ms;
				291	struct page_ext *base;
				292
				293	ms = __pfn_to_section(pfn);
				294	if (!ms \|\| !ms->page_ext)
				295	return;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	296	base = get_entry(ms->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	297	free_page_ext(base);
				298	ms->page_ext = NULL;
				299	}
				300
				301	static int __meminit online_page_ext(unsigned long start_pfn,
				302	unsigned long nr_pages,
				303	int nid)
				304	{
				305	unsigned long start, end, pfn;
				306	int fail = 0;
				307
				308	start = SECTION_ALIGN_DOWN(start_pfn);
				309	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				310
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	311	if (nid == NUMA_NO_NODE) {
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	312	/*
				313	* In this case, "nid" already exists and contains valid memory.
				314	* "start_pfn" passed to us is a pfn which is an arg for
				315	* online__pages(), and start_pfn should exist.
				316	*/
				317	nid = pfn_to_nid(start_pfn);
				318	VM_BUG_ON(!node_state(nid, N_ONLINE));
				319	}
				320
David Hildenbrand	dccacf8	2020-04-06 20:06:47 -0700	[diff] [blame]	321	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	322	fail = init_section_page_ext(pfn, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	323	if (!fail)
				324	return 0;
				325
				326	/* rollback */
				327	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				328	__free_page_ext(pfn);
				329
				330	return -ENOMEM;
				331	}
				332
				333	static int __meminit offline_page_ext(unsigned long start_pfn,
				334	unsigned long nr_pages, int nid)
				335	{
				336	unsigned long start, end, pfn;
				337
				338	start = SECTION_ALIGN_DOWN(start_pfn);
				339	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				340
				341	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				342	__free_page_ext(pfn);
				343	return 0;
				344
				345	}
				346
				347	static int __meminit page_ext_callback(struct notifier_block *self,
				348	unsigned long action, void *arg)
				349	{
				350	struct memory_notify *mn = arg;
				351	int ret = 0;
				352
				353	switch (action) {
				354	case MEM_GOING_ONLINE:
				355	ret = online_page_ext(mn->start_pfn,
				356	mn->nr_pages, mn->status_change_nid);
				357	break;
				358	case MEM_OFFLINE:
				359	offline_page_ext(mn->start_pfn,
				360	mn->nr_pages, mn->status_change_nid);
				361	break;
				362	case MEM_CANCEL_ONLINE:
				363	offline_page_ext(mn->start_pfn,
				364	mn->nr_pages, mn->status_change_nid);
				365	break;
				366	case MEM_GOING_OFFLINE:
				367	break;
				368	case MEM_ONLINE:
				369	case MEM_CANCEL_OFFLINE:
				370	break;
				371	}
				372
				373	return notifier_from_errno(ret);
				374	}
				375
				376	#endif
				377
				378	void __init page_ext_init(void)
				379	{
				380	unsigned long pfn;
				381	int nid;
				382
				383	if (!invoke_need_callbacks())
				384	return;
				385
				386	for_each_node_state(nid, N_MEMORY) {
				387	unsigned long start_pfn, end_pfn;
				388
				389	start_pfn = node_start_pfn(nid);
				390	end_pfn = node_end_pfn(nid);
				391	/*
				392	* start_pfn and end_pfn may not be aligned to SECTION and the
				393	* page->flags of out of node pages are not initialized. So we
				394	* scan [start_pfn, the biggest section's pfn < end_pfn) here.
				395	*/
				396	for (pfn = start_pfn; pfn < end_pfn;
				397	pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
				398
				399	if (!pfn_valid(pfn))
				400	continue;
				401	/*
				402	* Nodes's pfns can be overlapping.
				403	* We know some arch can have a nodes layout such as
				404	* -------------pfn-------------->
				405	* N0 \| N1 \| N2 \| N0 \| N1 \| N2\|....
				406	*/
Qian Cai	2f1ee09	2019-02-12 15:36:03 -0800	[diff] [blame]	407	if (pfn_to_nid(pfn) != nid)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	408	continue;
				409	if (init_section_page_ext(pfn, nid))
				410	goto oom;
Vlastimil Babka	0fc542b	2017-09-06 16:20:48 -0700	[diff] [blame]	411	cond_resched();
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	412	}
				413	}
				414	hotplug_memory_notifier(page_ext_callback, 0);
				415	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				416	invoke_init_callbacks();
				417	return;
				418
				419	oom:
				420	panic("Out of memory");
				421	}
				422
				423	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				424	{
				425	}
				426
				427	#endif