Blame - mm/page_ext.c - linux - Git Browser for ODROID

blob: 7e44726b3549511bda09ef2d66cb35efd3c0208a [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	2	#include <linux/mm.h>
				3	#include <linux/mmzone.h>
Mike Rapoport	57c8a66	2018-10-30 15:09:49 -0700	[diff] [blame]	4	#include <linux/memblock.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	5	#include <linux/page_ext.h>
				6	#include <linux/memory.h>
				7	#include <linux/vmalloc.h>
				8	#include <linux/kmemleak.h>
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	9	#include <linux/page_owner.h>
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	10	#include <linux/page_idle.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	11
				12	/*
				13	* struct page extension
				14	*
				15	* This is the feature to manage memory for extended data per page.
				16	*
				17	* Until now, we must modify struct page itself to store extra data per page.
				18	* This requires rebuilding the kernel and it is really time consuming process.
				19	* And, sometimes, rebuild is impossible due to third party module dependency.
				20	* At last, enlarging struct page could cause un-wanted system behaviour change.
				21	*
				22	* This feature is intended to overcome above mentioned problems. This feature
				23	* allocates memory for extended data per page in certain place rather than
				24	* the struct page itself. This memory can be accessed by the accessor
				25	* functions provided by this code. During the boot process, it checks whether
				26	* allocation of huge chunk of memory is needed or not. If not, it avoids
				27	* allocating memory at all. With this advantage, we can include this feature
				28	* into the kernel in default and can avoid rebuild and solve related problems.
				29	*
				30	* To help these things to work well, there are two callbacks for clients. One
				31	* is the need callback which is mandatory if user wants to avoid useless
				32	* memory allocation at boot-time. The other is optional, init callback, which
				33	* is used to do proper initialization after memory is allocated.
				34	*
				35	* The need callback is used to decide whether extended memory allocation is
				36	* needed or not. Sometimes users want to deactivate some features in this
				37	* boot and extra memory would be unneccessary. In this case, to avoid
				38	* allocating huge chunk of memory, each clients represent their need of
				39	* extra memory through the need callback. If one of the need callbacks
				40	* returns true, it means that someone needs extra memory so that
				41	* page extension core should allocates memory for page extension. If
				42	* none of need callbacks return true, memory isn't needed at all in this boot
				43	* and page extension core can skip to allocate memory. As result,
				44	* none of memory is wasted.
				45	*
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	46	* When need callback returns true, page_ext checks if there is a request for
				47	* extra memory through size in struct page_ext_operations. If it is non-zero,
				48	* extra space is allocated for each page_ext entry and offset is returned to
				49	* user through offset in struct page_ext_operations.
				50	*
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	51	* The init callback is used to do proper initialization after page extension
				52	* is completely initialized. In sparse memory system, extra memory is
				53	* allocated some time later than memmap is allocated. In other words, lifetime
				54	* of memory for page extension isn't same with memmap for struct page.
				55	* Therefore, clients can't store extra data until page extension is
				56	* initialized, even if pages are allocated and used freely. This could
				57	* cause inadequate state of extra data per page, so, to prevent it, client
				58	* can utilize this callback to initialize the state of it correctly.
				59	*/
				60
				61	static struct page_ext_operations *page_ext_ops[] = {
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	62	#ifdef CONFIG_PAGE_OWNER
				63	&page_owner_ops,
				64	#endif
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	65	#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
				66	&page_idle_ops,
				67	#endif
Minchan Kim	6e12c5b	2021-03-18 09:56:10 -0700	[diff] [blame]	68	#ifdef CONFIG_PAGE_PINNER
				69	&page_pinner_ops,
				70	#endif
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	71	};
				72
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	73	unsigned long page_ext_size = sizeof(struct page_ext);
				74
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	75	static unsigned long total_usage;
				76
				77	static bool __init invoke_need_callbacks(void)
				78	{
				79	int i;
				80	int entries = ARRAY_SIZE(page_ext_ops);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	81	bool need = false;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	82
				83	for (i = 0; i < entries; i++) {
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	84	if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	85	page_ext_ops[i]->offset = page_ext_size;
				86	page_ext_size += page_ext_ops[i]->size;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	87	need = true;
				88	}
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	89	}
				90
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	91	return need;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	92	}
				93
				94	static void __init invoke_init_callbacks(void)
				95	{
				96	int i;
				97	int entries = ARRAY_SIZE(page_ext_ops);
				98
				99	for (i = 0; i < entries; i++) {
				100	if (page_ext_ops[i]->init)
				101	page_ext_ops[i]->init();
				102	}
				103	}
				104
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	105	static inline struct page_ext get_entry(void base, unsigned long index)
				106	{
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	107	return base + page_ext_size * index;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	108	}
				109
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	110	#if !defined(CONFIG_SPARSEMEM)
				111
				112
				113	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				114	{
				115	pgdat->node_page_ext = NULL;
				116	}
				117
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	118	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	119	{
				120	unsigned long pfn = page_to_pfn(page);
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	121	unsigned long index;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	122	struct page_ext *base;
				123
				124	base = NODE_DATA(page_to_nid(page))->node_page_ext;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	125	/*
				126	* The sanity checks the page allocator does upon freeing a
				127	* page can reach here before the page_ext arrays are
				128	* allocated when feeding a range of pages to the allocator
				129	* for the first time during bootup or memory hotplug.
				130	*/
				131	if (unlikely(!base))
				132	return NULL;
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	133	index = pfn - round_down(node_start_pfn(page_to_nid(page)),
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	134	MAX_ORDER_NR_PAGES);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	135	return get_entry(base, index);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	136	}
Vijayanand Jitta	0a7166a	2021-01-05 11:27:28 +0530	[diff] [blame]	137	EXPORT_SYMBOL_GPL(lookup_page_ext);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	138
				139	static int __init alloc_node_page_ext(int nid)
				140	{
				141	struct page_ext *base;
				142	unsigned long table_size;
				143	unsigned long nr_pages;
				144
				145	nr_pages = NODE_DATA(nid)->node_spanned_pages;
				146	if (!nr_pages)
				147	return 0;
				148
				149	/*
				150	* Need extra space if node range is not aligned with
				151	* MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
				152	* checks buddy's status, range could be out of exact node range.
				153	*/
				154	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) \|\|
				155	!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
				156	nr_pages += MAX_ORDER_NR_PAGES;
				157
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	158	table_size = page_ext_size * nr_pages;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	159
Mike Rapoport	26fb3da	2019-03-11 23:30:42 -0700	[diff] [blame]	160	base = memblock_alloc_try_nid(
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	161	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
Mike Rapoport	97ad108	2018-10-30 15:09:44 -0700	[diff] [blame]	162	MEMBLOCK_ALLOC_ACCESSIBLE, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	163	if (!base)
				164	return -ENOMEM;
				165	NODE_DATA(nid)->node_page_ext = base;
				166	total_usage += table_size;
				167	return 0;
				168	}
				169
				170	void __init page_ext_init_flatmem(void)
				171	{
				172
				173	int nid, fail;
				174
				175	if (!invoke_need_callbacks())
				176	return;
				177
				178	for_each_online_node(nid) {
				179	fail = alloc_node_page_ext(nid);
				180	if (fail)
				181	goto fail;
				182	}
				183	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				184	invoke_init_callbacks();
				185	return;
				186
				187	fail:
				188	pr_crit("allocation of page_ext failed.\n");
				189	panic("Out of memory");
				190	}
				191
				192	#else /* CONFIG_FLAT_NODE_MEM_MAP */
				193
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	194	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	195	{
				196	unsigned long pfn = page_to_pfn(page);
				197	struct mem_section *section = __pfn_to_section(pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	198	/*
				199	* The sanity checks the page allocator does upon freeing a
				200	* page can reach here before the page_ext arrays are
				201	* allocated when feeding a range of pages to the allocator
				202	* for the first time during bootup or memory hotplug.
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	203	*/
				204	if (!section->page_ext)
				205	return NULL;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	206	return get_entry(section->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	207	}
Vijayanand Jitta	0a7166a	2021-01-05 11:27:28 +0530	[diff] [blame]	208	EXPORT_SYMBOL_GPL(lookup_page_ext);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	209
				210	static void *__meminit alloc_page_ext(size_t size, int nid)
				211	{
				212	gfp_t flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN;
				213	void *addr = NULL;
				214
				215	addr = alloc_pages_exact_nid(nid, size, flags);
				216	if (addr) {
				217	kmemleak_alloc(addr, size, 1, flags);
				218	return addr;
				219	}
				220
Michal Hocko	b95046b	2017-09-06 16:20:41 -0700	[diff] [blame]	221	addr = vzalloc_node(size, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	222
				223	return addr;
				224	}
				225
				226	static int __meminit init_section_page_ext(unsigned long pfn, int nid)
				227	{
				228	struct mem_section *section;
				229	struct page_ext *base;
				230	unsigned long table_size;
				231
				232	section = __pfn_to_section(pfn);
				233
				234	if (section->page_ext)
				235	return 0;
				236
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	237	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	238	base = alloc_page_ext(table_size, nid);
				239
				240	/*
				241	* The value stored in section->page_ext is (base - pfn)
				242	* and it does not point to the memory block allocated above,
				243	* causing kmemleak false positives.
				244	*/
				245	kmemleak_not_leak(base);
				246
				247	if (!base) {
				248	pr_err("page ext allocation failure\n");
				249	return -ENOMEM;
				250	}
				251
				252	/*
				253	* The passed "pfn" may not be aligned to SECTION. For the calculation
				254	* we need to apply a mask.
				255	*/
				256	pfn &= PAGE_SECTION_MASK;
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	257	section->page_ext = (void )base - page_ext_size pfn;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	258	total_usage += table_size;
				259	return 0;
				260	}
				261	#ifdef CONFIG_MEMORY_HOTPLUG
				262	static void free_page_ext(void *addr)
				263	{
				264	if (is_vmalloc_addr(addr)) {
				265	vfree(addr);
				266	} else {
				267	struct page *page = virt_to_page(addr);
				268	size_t table_size;
				269
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	270	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	271
				272	BUG_ON(PageReserved(page));
Qian Cai	0c81585	2019-03-05 15:49:46 -0800	[diff] [blame]	273	kmemleak_free(addr);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	274	free_pages_exact(addr, table_size);
				275	}
				276	}
				277
				278	static void __free_page_ext(unsigned long pfn)
				279	{
				280	struct mem_section *ms;
				281	struct page_ext *base;
				282
				283	ms = __pfn_to_section(pfn);
				284	if (!ms \|\| !ms->page_ext)
				285	return;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	286	base = get_entry(ms->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	287	free_page_ext(base);
				288	ms->page_ext = NULL;
				289	}
				290
				291	static int __meminit online_page_ext(unsigned long start_pfn,
				292	unsigned long nr_pages,
				293	int nid)
				294	{
				295	unsigned long start, end, pfn;
				296	int fail = 0;
				297
				298	start = SECTION_ALIGN_DOWN(start_pfn);
				299	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				300
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	301	if (nid == NUMA_NO_NODE) {
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	302	/*
				303	* In this case, "nid" already exists and contains valid memory.
				304	* "start_pfn" passed to us is a pfn which is an arg for
				305	* online__pages(), and start_pfn should exist.
				306	*/
				307	nid = pfn_to_nid(start_pfn);
				308	VM_BUG_ON(!node_state(nid, N_ONLINE));
				309	}
				310
David Hildenbrand	dccacf8	2020-04-06 20:06:47 -0700	[diff] [blame]	311	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	312	fail = init_section_page_ext(pfn, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	313	if (!fail)
				314	return 0;
				315
				316	/* rollback */
				317	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				318	__free_page_ext(pfn);
				319
				320	return -ENOMEM;
				321	}
				322
				323	static int __meminit offline_page_ext(unsigned long start_pfn,
				324	unsigned long nr_pages, int nid)
				325	{
				326	unsigned long start, end, pfn;
				327
				328	start = SECTION_ALIGN_DOWN(start_pfn);
				329	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				330
				331	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				332	__free_page_ext(pfn);
				333	return 0;
				334
				335	}
				336
				337	static int __meminit page_ext_callback(struct notifier_block *self,
				338	unsigned long action, void *arg)
				339	{
				340	struct memory_notify *mn = arg;
				341	int ret = 0;
				342
				343	switch (action) {
				344	case MEM_GOING_ONLINE:
				345	ret = online_page_ext(mn->start_pfn,
				346	mn->nr_pages, mn->status_change_nid);
				347	break;
				348	case MEM_OFFLINE:
				349	offline_page_ext(mn->start_pfn,
				350	mn->nr_pages, mn->status_change_nid);
				351	break;
				352	case MEM_CANCEL_ONLINE:
				353	offline_page_ext(mn->start_pfn,
				354	mn->nr_pages, mn->status_change_nid);
				355	break;
				356	case MEM_GOING_OFFLINE:
				357	break;
				358	case MEM_ONLINE:
				359	case MEM_CANCEL_OFFLINE:
				360	break;
				361	}
				362
				363	return notifier_from_errno(ret);
				364	}
				365
				366	#endif
				367
				368	void __init page_ext_init(void)
				369	{
				370	unsigned long pfn;
				371	int nid;
				372
				373	if (!invoke_need_callbacks())
				374	return;
				375
				376	for_each_node_state(nid, N_MEMORY) {
				377	unsigned long start_pfn, end_pfn;
				378
				379	start_pfn = node_start_pfn(nid);
				380	end_pfn = node_end_pfn(nid);
				381	/*
				382	* start_pfn and end_pfn may not be aligned to SECTION and the
				383	* page->flags of out of node pages are not initialized. So we
				384	* scan [start_pfn, the biggest section's pfn < end_pfn) here.
				385	*/
				386	for (pfn = start_pfn; pfn < end_pfn;
				387	pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
				388
				389	if (!pfn_valid(pfn))
				390	continue;
				391	/*
				392	* Nodes's pfns can be overlapping.
				393	* We know some arch can have a nodes layout such as
				394	* -------------pfn-------------->
				395	* N0 \| N1 \| N2 \| N0 \| N1 \| N2\|....
				396	*/
Qian Cai	2f1ee09	2019-02-12 15:36:03 -0800	[diff] [blame]	397	if (pfn_to_nid(pfn) != nid)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	398	continue;
				399	if (init_section_page_ext(pfn, nid))
				400	goto oom;
Vlastimil Babka	0fc542b	2017-09-06 16:20:48 -0700	[diff] [blame]	401	cond_resched();
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	402	}
				403	}
				404	hotplug_memory_notifier(page_ext_callback, 0);
				405	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				406	invoke_init_callbacks();
				407	return;
				408
				409	oom:
				410	panic("Out of memory");
				411	}
				412
				413	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				414	{
				415	}
				416
				417	#endif