Blame - mm/mremap.c - linux - Git Browser for ODROID

blob: 0763b83ef77947fca2438ef1cf8caa86278cb8fe [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
				3	* mm/mremap.c
				4	*
				5	* (C) Copyright 1996 Linus Torvalds
				6	*
Alan Cox	046c688	2009-01-05 14:06:29 +0000	[diff] [blame]	7	* Address space accounting code <alan@lxorguk.ukuu.org.uk>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8	* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
				9	*/
				10
				11	#include <linux/mm.h>
				12	#include <linux/hugetlb.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13	#include <linux/shm.h>
Hugh Dickins	1ff82995	2009-09-21 17:02:05 -0700	[diff] [blame]	14	#include <linux/ksm.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15	#include <linux/mman.h>
				16	#include <linux/swap.h>
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	17	#include <linux/capability.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18	#include <linux/fs.h>
Cyrill Gorcunov	6dec97d	2013-08-27 12:37:18 +0400	[diff] [blame]	19	#include <linux/swapops.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	#include <linux/highmem.h>
				21	#include <linux/security.h>
				22	#include <linux/syscalls.h>
Andrea Arcangeli	cddb8a5	2008-07-28 15:46:29 -0700	[diff] [blame]	23	#include <linux/mmu_notifier.h>
Paul McQuade	2581d20	2014-10-09 15:29:01 -0700	[diff] [blame]	24	#include <linux/uaccess.h>
Laurent Dufour	4abad2c	2015-06-24 16:56:19 -0700	[diff] [blame]	25	#include <linux/mm-arch-hooks.h>
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	26	#include <linux/userfaultfd_k.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	27
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <asm/cacheflush.h>
				29	#include <asm/tlbflush.h>
				30
Rik van Riel	ba470de	2008-10-18 20:26:50 -0700	[diff] [blame]	31	#include "internal.h"
				32
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	33	static pud_t get_old_pud(struct mm_struct mm, unsigned long addr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	34	{
				35	pgd_t *pgd;
Kirill A. Shutemov	c2febaf	2017-03-09 17:24:07 +0300	[diff] [blame]	36	p4d_t *p4d;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	37	pud_t *pud;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	38
				39	pgd = pgd_offset(mm, addr);
				40	if (pgd_none_or_clear_bad(pgd))
				41	return NULL;
				42
Kirill A. Shutemov	c2febaf	2017-03-09 17:24:07 +0300	[diff] [blame]	43	p4d = p4d_offset(pgd, addr);
				44	if (p4d_none_or_clear_bad(p4d))
				45	return NULL;
				46
				47	pud = pud_offset(p4d, addr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	if (pud_none_or_clear_bad(pud))
				49	return NULL;
				50
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	51	return pud;
				52	}
				53
				54	static pmd_t get_old_pmd(struct mm_struct mm, unsigned long addr)
				55	{
				56	pud_t *pud;
				57	pmd_t *pmd;
				58
				59	pud = get_old_pud(mm, addr);
				60	if (!pud)
				61	return NULL;
				62
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	63	pmd = pmd_offset(pud, addr);
Andrea Arcangeli	37a1c49	2011-10-31 17:08:30 -0700	[diff] [blame]	64	if (pmd_none(*pmd))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	65	return NULL;
				66
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	67	return pmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	68	}
				69
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	70	static pud_t alloc_new_pud(struct mm_struct mm, struct vm_area_struct *vma,
Andrea Arcangeli	8ac1f83	2011-01-13 15:46:43 -0800	[diff] [blame]	71	unsigned long addr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	72	{
				73	pgd_t *pgd;
Kirill A. Shutemov	c2febaf	2017-03-09 17:24:07 +0300	[diff] [blame]	74	p4d_t *p4d;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	75
				76	pgd = pgd_offset(mm, addr);
Kirill A. Shutemov	c2febaf	2017-03-09 17:24:07 +0300	[diff] [blame]	77	p4d = p4d_alloc(mm, pgd, addr);
				78	if (!p4d)
				79	return NULL;
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	80
				81	return pud_alloc(mm, p4d, addr);
				82	}
				83
				84	static pmd_t alloc_new_pmd(struct mm_struct mm, struct vm_area_struct *vma,
				85	unsigned long addr)
				86	{
				87	pud_t *pud;
				88	pmd_t *pmd;
				89
				90	pud = alloc_new_pud(mm, vma, addr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	91	if (!pud)
Hugh Dickins	c74df32	2005-10-29 18:16:23 -0700	[diff] [blame]	92	return NULL;
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	93
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	94	pmd = pmd_alloc(mm, pud, addr);
Hugh Dickins	57a8f0c	2013-10-16 13:47:09 -0700	[diff] [blame]	95	if (!pmd)
Hugh Dickins	c74df32	2005-10-29 18:16:23 -0700	[diff] [blame]	96	return NULL;
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	97
Andrea Arcangeli	8ac1f83	2011-01-13 15:46:43 -0800	[diff] [blame]	98	VM_BUG_ON(pmd_trans_huge(*pmd));
Hugh Dickins	c74df32	2005-10-29 18:16:23 -0700	[diff] [blame]	99
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	100	return pmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	101	}
				102
Hugh Dickins	1d069b7	2016-05-19 17:12:57 -0700	[diff] [blame]	103	static void take_rmap_locks(struct vm_area_struct *vma)
				104	{
				105	if (vma->vm_file)
				106	i_mmap_lock_write(vma->vm_file->f_mapping);
				107	if (vma->anon_vma)
				108	anon_vma_lock_write(vma->anon_vma);
				109	}
				110
				111	static void drop_rmap_locks(struct vm_area_struct *vma)
				112	{
				113	if (vma->anon_vma)
				114	anon_vma_unlock_write(vma->anon_vma);
				115	if (vma->vm_file)
				116	i_mmap_unlock_write(vma->vm_file->f_mapping);
				117	}
				118
Cyrill Gorcunov	6dec97d	2013-08-27 12:37:18 +0400	[diff] [blame]	119	static pte_t move_soft_dirty_pte(pte_t pte)
				120	{
				121	/*
				122	* Set soft dirty bit so we can notice
				123	* in userspace the ptes were moved.
				124	*/
				125	#ifdef CONFIG_MEM_SOFT_DIRTY
				126	if (pte_present(pte))
				127	pte = pte_mksoft_dirty(pte);
				128	else if (is_swap_pte(pte))
				129	pte = pte_swp_mksoft_dirty(pte);
Cyrill Gorcunov	6dec97d	2013-08-27 12:37:18 +0400	[diff] [blame]	130	#endif
				131	return pte;
				132	}
				133
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	134	static void move_ptes(struct vm_area_struct vma, pmd_t old_pmd,
				135	unsigned long old_addr, unsigned long old_end,
				136	struct vm_area_struct new_vma, pmd_t new_pmd,
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	137	unsigned long new_addr, bool need_rmap_locks)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	138	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	struct mm_struct *mm = vma->vm_mm;
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	140	pte_t old_pte, new_pte, pte;
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	141	spinlock_t old_ptl, new_ptl;
Aaron Lu	5d19042	2016-11-10 17:16:33 +0800	[diff] [blame]	142	bool force_flush = false;
				143	unsigned long len = old_end - old_addr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	144
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	145	/*
Davidlohr Bueso	c8c06ef	2014-12-12 16:54:24 -0800	[diff] [blame]	146	* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	147	* locks to ensure that rmap will always observe either the old or the
				148	* new ptes. This is the easiest way to avoid races with
				149	* truncate_pagecache(), page migration, etc...
				150	*
				151	* When need_rmap_locks is false, we use other ways to avoid
				152	* such races:
				153	*
				154	* - During exec() shift_arg_pages(), we use a specially tagged vma
Anshuman Khandual	222100e	2020-04-01 21:07:52 -0700	[diff] [blame]	155	* which rmap call sites look for using vma_is_temporary_stack().
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	156	*
				157	* - During mremap(), new_vma is often known to be placed after vma
				158	* in rmap traversal order. This ensures rmap will always observe
				159	* either the old pte, or the new pte, or both (the page table locks
				160	* serialize access to individual ptes, but only rmap traversal
				161	* order guarantees that we won't miss both the old and new ptes).
				162	*/
Hugh Dickins	1d069b7	2016-05-19 17:12:57 -0700	[diff] [blame]	163	if (need_rmap_locks)
				164	take_rmap_locks(vma);
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	165
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	166	/*
				167	* We don't have to worry about the ordering of src and dst
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	168	* pte locks because exclusive mmap_lock prevents deadlock.
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	169	*/
Hugh Dickins	c74df32	2005-10-29 18:16:23 -0700	[diff] [blame]	170	old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
Peter Zijlstra	ece0e2b	2010-10-26 14:21:52 -0700	[diff] [blame]	171	new_pte = pte_offset_map(new_pmd, new_addr);
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	172	new_ptl = pte_lockptr(mm, new_pmd);
				173	if (new_ptl != old_ptl)
Ingo Molnar	f20dc5f	2006-07-03 00:25:08 -0700	[diff] [blame]	174	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
Mel Gorman	3ea2771	2017-08-02 13:31:52 -0700	[diff] [blame]	175	flush_tlb_batched_pending(vma->vm_mm);
Zachary Amsden	6606c3e	2006-09-30 23:29:33 -0700	[diff] [blame]	176	arch_enter_lazy_mmu_mode();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	177
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	178	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
				179	new_pte++, new_addr += PAGE_SIZE) {
				180	if (pte_none(*old_pte))
				181	continue;
Aaron Lu	5d19042	2016-11-10 17:16:33 +0800	[diff] [blame]	182
Andrea Arcangeli	7b6efc2	2011-10-31 17:08:26 -0700	[diff] [blame]	183	pte = ptep_get_and_clear(mm, old_addr, old_pte);
Aaron Lu	a2ce266	2016-11-29 13:27:31 +0800	[diff] [blame]	184	/*
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	185	* If we are remapping a valid PTE, make sure
Aaron Lu	a2ce266	2016-11-29 13:27:31 +0800	[diff] [blame]	186	* to flush TLB before we drop the PTL for the
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	187	* PTE.
Aaron Lu	a2ce266	2016-11-29 13:27:31 +0800	[diff] [blame]	188	*
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	189	* NOTE! Both old and new PTL matter: the old one
				190	* for racing with page_mkclean(), the new one to
				191	* make sure the physical page stays valid until
				192	* the TLB entry for the old mapping has been
				193	* flushed.
Aaron Lu	a2ce266	2016-11-29 13:27:31 +0800	[diff] [blame]	194	*/
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	195	if (pte_present(pte))
Aaron Lu	a2ce266	2016-11-29 13:27:31 +0800	[diff] [blame]	196	force_flush = true;
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	197	pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
Cyrill Gorcunov	6dec97d	2013-08-27 12:37:18 +0400	[diff] [blame]	198	pte = move_soft_dirty_pte(pte);
				199	set_pte_at(mm, new_addr, new_pte, pte);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	200	}
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	201
Zachary Amsden	6606c3e	2006-09-30 23:29:33 -0700	[diff] [blame]	202	arch_leave_lazy_mmu_mode();
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	203	if (force_flush)
				204	flush_tlb_range(vma, old_end - len, old_end);
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	205	if (new_ptl != old_ptl)
				206	spin_unlock(new_ptl);
Peter Zijlstra	ece0e2b	2010-10-26 14:21:52 -0700	[diff] [blame]	207	pte_unmap(new_pte - 1);
Hugh Dickins	c74df32	2005-10-29 18:16:23 -0700	[diff] [blame]	208	pte_unmap_unlock(old_pte - 1, old_ptl);
Hugh Dickins	1d069b7	2016-05-19 17:12:57 -0700	[diff] [blame]	209	if (need_rmap_locks)
				210	drop_rmap_locks(vma);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	211	}
				212
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	213	#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
				214	DECLARE_WAIT_QUEUE_HEAD(vma_users_wait);
				215	atomic_t vma_user_waiters = ATOMIC_INIT(0);
				216
				217	static inline void wait_for_vma_users(struct vm_area_struct *vma)
				218	{
				219	/*
				220	* If we have the only reference, swap the refcount to -1. This
				221	* will prevent other concurrent references by get_vma() for SPFs.
				222	*/
				223	if (likely(atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1))
				224	return;
				225
				226	/* Indicate we are waiting for other users of the VMA to finish. */
				227	atomic_inc(&vma_user_waiters);
				228
				229	/* Failed atomic_cmpxchg; no implicit barrier, use an explicit one. */
				230	smp_mb();
				231
				232	/*
				233	* Callers cannot handle failure, sleep uninterruptibly until there
				234	* are no other users of this VMA.
				235	*
				236	* We don't need to worry about references from concurrent waiters,
				237	* since this is only used in the context of fast mremaps, with
				238	* exclusive mmap write lock held.
				239	*/
				240	wait_event(vma_users_wait, atomic_cmpxchg(&vma->vm_ref_count, 1, -1) == 1);
				241
				242	atomic_dec(&vma_user_waiters);
				243	}
				244
				245
Suren Baghdasaryan	0f43357	2022-11-18 17:06:03 -0800	[diff] [blame]	246	/*
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	247	* Restore the VMA reference count to 1 after a fast mremap.
Suren Baghdasaryan	0f43357	2022-11-18 17:06:03 -0800	[diff] [blame]	248	*/
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	249	static inline void restore_vma_ref_count(struct vm_area_struct *vma)
				250	{
				251	/*
				252	* This should only be called after a corresponding,
				253	* wait_for_vma_users()
				254	*/
				255	VM_BUG_ON_VMA(atomic_cmpxchg(&vma->vm_ref_count, -1, 1) != -1,
				256	vma);
				257	}
				258	#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */
				259	static inline void wait_for_vma_users(struct vm_area_struct *vma)
				260	{
				261	}
				262	static inline void restore_vma_ref_count(struct vm_area_struct *vma)
				263	{
				264	}
				265	#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
				266
				267	#ifdef CONFIG_HAVE_MOVE_PMD
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	268	static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
Wei Yang	b8aa9d9	2020-08-06 23:23:40 -0700	[diff] [blame]	269	unsigned long new_addr, pmd_t old_pmd, pmd_t new_pmd)
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	270	{
				271	spinlock_t old_ptl, new_ptl;
				272	struct mm_struct *mm = vma->vm_mm;
				273	pmd_t pmd;
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	274	bool ret;
				275
				276	/*
				277	* Wait for concurrent users, since these can potentially be
				278	* speculative page faults.
				279	*/
				280	wait_for_vma_users(vma);
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	281
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	282	/*
				283	* The destination pmd shouldn't be established, free_pgtables()
Linus Torvalds	f81fdd0	2020-07-13 11:37:39 -0700	[diff] [blame]	284	* should have released it.
				285	*
				286	* However, there's a case during execve() where we use mremap
				287	* to move the initial stack, and in that case the target area
				288	* may overlap the source area (always moving down).
				289	*
				290	* If everything is PMD-aligned, that works fine, as moving
				291	* each pmd down will clear the source pmd. But if we first
				292	* have a few 4kB-only pages that get moved down, and then
				293	* hit the "now the rest is PMD-aligned, let's do everything
				294	* one pmd at a time", we will still have the old (now empty
				295	* of any 4kB pages, but still there) PMD in the page table
				296	* tree.
				297	*
				298	* Warn on it once - because we really should try to figure
				299	* out how to do this better - but then say "I won't move
				300	* this pmd".
				301	*
				302	* One alternative might be to just unmap the target pmd at
				303	* this point, and verify that it really is empty. We'll see.
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	304	*/
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	305	if (WARN_ON_ONCE(!pmd_none(*new_pmd))) {
				306	ret = false;
				307	goto out;
				308	}
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	309
				310	/*
				311	* We don't have to worry about the ordering of src and dst
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	312	* ptlocks because exclusive mmap_lock prevents deadlock.
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	313	*/
				314	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
				315	new_ptl = pmd_lockptr(mm, new_pmd);
				316	if (new_ptl != old_ptl)
				317	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
				318
				319	/* Clear the pmd */
				320	pmd = *old_pmd;
				321	pmd_clear(old_pmd);
				322
				323	VM_BUG_ON(!pmd_none(*new_pmd));
				324
				325	/* Set the new pmd */
				326	set_pmd_at(mm, new_addr, new_pmd, pmd);
				327	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
				328	if (new_ptl != old_ptl)
				329	spin_unlock(new_ptl);
				330	spin_unlock(old_ptl);
				331
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	332	ret = true;
				333
				334	out:
				335	restore_vma_ref_count(vma);
				336	return ret;
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	337	}
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	338	#else
				339	static inline bool move_normal_pmd(struct vm_area_struct *vma,
				340	unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd,
				341	pmd_t *new_pmd)
				342	{
				343	return false;
				344	}
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	345	#endif
				346
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	347	#ifdef CONFIG_HAVE_MOVE_PUD
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	348	static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
				349	unsigned long new_addr, pud_t old_pud, pud_t new_pud)
				350	{
				351	spinlock_t old_ptl, new_ptl;
				352	struct mm_struct *mm = vma->vm_mm;
				353	pud_t pud;
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	354	bool ret;
				355
				356	/*
				357	* Wait for concurrent users, since these can potentially be
				358	* speculative page faults.
				359	*/
				360	wait_for_vma_users(vma);
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	361
				362	/*
				363	* The destination pud shouldn't be established, free_pgtables()
				364	* should have released it.
				365	*/
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	366	if (WARN_ON_ONCE(!pud_none(*new_pud))) {
				367	ret = false;
				368	goto out;
				369	}
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	370
				371	/*
				372	* We don't have to worry about the ordering of src and dst
				373	* ptlocks because exclusive mmap_lock prevents deadlock.
				374	*/
				375	old_ptl = pud_lock(vma->vm_mm, old_pud);
				376	new_ptl = pud_lockptr(mm, new_pud);
				377	if (new_ptl != old_ptl)
				378	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
				379
				380	/* Clear the pud */
				381	pud = *old_pud;
				382	pud_clear(old_pud);
				383
				384	VM_BUG_ON(!pud_none(*new_pud));
				385
				386	/* Set the new pud */
				387	set_pud_at(mm, new_addr, new_pud, pud);
				388	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
				389	if (new_ptl != old_ptl)
				390	spin_unlock(new_ptl);
				391	spin_unlock(old_ptl);
				392
Kalesh Singh	134c1aa	2022-12-19 21:07:49 -0800	[diff] [blame]	393	ret = true;
				394
				395	out:
				396	restore_vma_ref_count(vma);
				397	return ret;
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	398	}
				399	#else
				400	static inline bool move_normal_pud(struct vm_area_struct *vma,
				401	unsigned long old_addr, unsigned long new_addr, pud_t *old_pud,
				402	pud_t *new_pud)
				403	{
				404	return false;
				405	}
				406	#endif
				407
				408	enum pgt_entry {
				409	NORMAL_PMD,
				410	HPAGE_PMD,
				411	NORMAL_PUD,
				412	};
				413
				414	/*
				415	* Returns an extent of the corresponding size for the pgt_entry specified if
				416	* valid. Else returns a smaller extent bounded by the end of the source and
				417	* destination pgt_entry.
				418	*/
Arnd Bergmann	45b1eb7	2021-02-09 13:42:10 -0800	[diff] [blame]	419	static __always_inline unsigned long get_extent(enum pgt_entry entry,
				420	unsigned long old_addr, unsigned long old_end,
				421	unsigned long new_addr)
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	422	{
				423	unsigned long next, extent, mask, size;
				424
				425	switch (entry) {
				426	case HPAGE_PMD:
				427	case NORMAL_PMD:
				428	mask = PMD_MASK;
				429	size = PMD_SIZE;
				430	break;
				431	case NORMAL_PUD:
				432	mask = PUD_MASK;
				433	size = PUD_SIZE;
				434	break;
				435	default:
				436	BUILD_BUG();
				437	break;
				438	}
				439
				440	next = (old_addr + size) & mask;
				441	/* even if next overflowed, extent below will be ok */
Kalesh Singh	62098d3	2020-12-29 15:14:40 -0800	[diff] [blame]	442	extent = next - old_addr;
				443	if (extent > old_end - old_addr)
				444	extent = old_end - old_addr;
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	445	next = (new_addr + size) & mask;
				446	if (extent > next - new_addr)
				447	extent = next - new_addr;
				448	return extent;
				449	}
				450
				451	/*
				452	* Attempts to speedup the move by moving entry at the level corresponding to
				453	* pgt_entry. Returns true if the move was successful, else false.
				454	*/
				455	static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
				456	unsigned long old_addr, unsigned long new_addr,
				457	void old_entry, void new_entry, bool need_rmap_locks)
				458	{
				459	bool moved = false;
				460
				461	/* See comment in move_ptes() */
				462	if (need_rmap_locks)
				463	take_rmap_locks(vma);
				464
				465	switch (entry) {
				466	case NORMAL_PMD:
				467	moved = move_normal_pmd(vma, old_addr, new_addr, old_entry,
				468	new_entry);
				469	break;
				470	case NORMAL_PUD:
				471	moved = move_normal_pud(vma, old_addr, new_addr, old_entry,
				472	new_entry);
				473	break;
				474	case HPAGE_PMD:
				475	moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
				476	move_huge_pmd(vma, old_addr, new_addr, old_entry,
				477	new_entry);
				478	break;
				479	default:
				480	WARN_ON_ONCE(1);
				481	break;
				482	}
				483
				484	if (need_rmap_locks)
				485	drop_rmap_locks(vma);
				486
				487	return moved;
				488	}
				489
Ollie Wild	b6a2fea	2007-07-19 01:48:16 -0700	[diff] [blame]	490	unsigned long move_page_tables(struct vm_area_struct *vma,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	491	unsigned long old_addr, struct vm_area_struct *new_vma,
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	492	unsigned long new_addr, unsigned long len,
				493	bool need_rmap_locks)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	494	{
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	495	unsigned long extent, old_end;
Jérôme Glisse	ac46d4f	2018-12-28 00:38:09 -0800	[diff] [blame]	496	struct mmu_notifier_range range;
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	497	pmd_t old_pmd, new_pmd;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	498
Paolo Bonzini	7d659cb	2022-04-08 13:09:04 -0700	[diff] [blame]	499	if (!len)
				500	return 0;
				501
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	502	old_end = old_addr + len;
				503	flush_cache_range(vma, old_addr, old_end);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	504
Jérôme Glisse	6f4f13e	2019-05-13 17:20:49 -0700	[diff] [blame]	505	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
				506	old_addr, old_end);
Jérôme Glisse	ac46d4f	2018-12-28 00:38:09 -0800	[diff] [blame]	507	mmu_notifier_invalidate_range_start(&range);
Andrea Arcangeli	7b6efc2	2011-10-31 17:08:26 -0700	[diff] [blame]	508
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	509	for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	510	cond_resched();
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	511	/*
				512	* If extent is PUD-sized try to speed up the move by moving at the
				513	* PUD level if possible.
				514	*/
				515	extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
				516	if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
				517	pud_t old_pud, new_pud;
				518
				519	old_pud = get_old_pud(vma->vm_mm, old_addr);
				520	if (!old_pud)
				521	continue;
				522	new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
				523	if (!new_pud)
				524	break;
				525	if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
Aneesh Kumar K.V	e37cc8a0	2021-07-07 18:10:15 -0700	[diff] [blame]	526	old_pud, new_pud, true))
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	527	continue;
				528	}
				529
				530	extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr);
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	531	old_pmd = get_old_pmd(vma->vm_mm, old_addr);
				532	if (!old_pmd)
				533	continue;
Andrea Arcangeli	8ac1f83	2011-01-13 15:46:43 -0800	[diff] [blame]	534	new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	535	if (!new_pmd)
				536	break;
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	537	if (is_swap_pmd(old_pmd) \|\| pmd_trans_huge(old_pmd) \|\|
				538	pmd_devmap(*old_pmd)) {
				539	if (extent == HPAGE_PMD_SIZE &&
				540	move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr,
				541	old_pmd, new_pmd, need_rmap_locks))
				542	continue;
Kirill A. Shutemov	4b471e8	2016-01-15 16:53:39 -0800	[diff] [blame]	543	split_huge_pmd(vma, old_pmd, old_addr);
Naoya Horiguchi	337d9ab	2016-07-26 15:24:03 -0700	[diff] [blame]	544	if (pmd_trans_unstable(old_pmd))
Kirill A. Shutemov	6b9116a	2016-02-11 16:13:03 -0800	[diff] [blame]	545	continue;
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	546	} else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) &&
				547	extent == PMD_SIZE) {
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	548	/*
				549	* If the extent is PMD-sized, try to speed the move by
				550	* moving at the PMD level if possible.
				551	*/
Kalesh Singh	dcceb19	2020-12-14 19:07:30 -0800	[diff] [blame]	552	if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr,
Aneesh Kumar K.V	e37cc8a0	2021-07-07 18:10:15 -0700	[diff] [blame]	553	old_pmd, new_pmd, true))
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	554	continue;
Andrea Arcangeli	37a1c49	2011-10-31 17:08:30 -0700	[diff] [blame]	555	}
Joel Fernandes (Google)	2c91bd4	2019-01-03 15:28:38 -0800	[diff] [blame]	556
Joel Fernandes (Google)	4cf5892	2019-01-03 15:28:34 -0800	[diff] [blame]	557	if (pte_alloc(new_vma->vm_mm, new_pmd))
Andrea Arcangeli	37a1c49	2011-10-31 17:08:30 -0700	[diff] [blame]	558	break;
Aaron Lu	5d19042	2016-11-10 17:16:33 +0800	[diff] [blame]	559	move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
Linus Torvalds	eb66ae0	2018-10-12 15:22:59 -0700	[diff] [blame]	560	new_pmd, new_addr, need_rmap_locks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	561	}
Andrea Arcangeli	7b6efc2	2011-10-31 17:08:26 -0700	[diff] [blame]	562
Jérôme Glisse	ac46d4f	2018-12-28 00:38:09 -0800	[diff] [blame]	563	mmu_notifier_invalidate_range_end(&range);
Hugh Dickins	7be7a54	2005-10-29 18:16:00 -0700	[diff] [blame]	564
				565	return len + old_addr - old_end; /* how much done */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	566	}
				567
				568	static unsigned long move_vma(struct vm_area_struct *vma,
				569	unsigned long old_addr, unsigned long old_len,
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	570	unsigned long new_len, unsigned long new_addr,
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	571	bool *locked, unsigned long flags,
				572	struct vm_userfaultfd_ctx uf, struct list_head uf_unmap)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	573	{
				574	struct mm_struct *mm = vma->vm_mm;
				575	struct vm_area_struct *new_vma;
				576	unsigned long vm_flags = vma->vm_flags;
				577	unsigned long new_pgoff;
				578	unsigned long moved_len;
				579	unsigned long excess = 0;
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	580	unsigned long hiwater_vm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	581	int split = 0;
Hugh Dickins	7103ad3	2009-09-21 17:02:28 -0700	[diff] [blame]	582	int err;
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	583	bool need_rmap_locks;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	584
				585	/*
				586	* We'd prefer to avoid failure later on in do_munmap:
				587	* which may split one vma into three before unmapping.
				588	*/
				589	if (mm->map_count >= sysctl_max_map_count - 3)
				590	return -ENOMEM;
				591
Hugh Dickins	1ff82995	2009-09-21 17:02:05 -0700	[diff] [blame]	592	/*
				593	* Advise KSM to break any KSM pages in the area to be moved:
				594	* it would be confusing if they were to turn up at the new
				595	* location, where they happen to coincide with different KSM
				596	* pages recently unmapped. But leave vma->vm_flags as it was,
				597	* so KSM can come around to merge on vma and new_vma afterwards.
				598	*/
Hugh Dickins	7103ad3	2009-09-21 17:02:28 -0700	[diff] [blame]	599	err = ksm_madvise(vma, old_addr, old_addr + old_len,
				600	MADV_UNMERGEABLE, &vm_flags);
				601	if (err)
				602	return err;
Hugh Dickins	1ff82995	2009-09-21 17:02:05 -0700	[diff] [blame]	603
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	604	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	605	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
				606	&need_rmap_locks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	607	if (!new_vma)
				608	return -ENOMEM;
				609
Laurent Dufour	0525756	2018-04-17 16:33:16 +0200	[diff] [blame]	610	/* new_vma is returned protected by copy_vma, to prevent speculative
				611	* page fault to be done in the destination area before we move the pte.
				612	* Now, we must also protect the source VMA since we don't want pages
				613	* to be mapped in our back while we are copying the PTEs.
				614	*/
				615	if (vma != new_vma)
Vinayak Menon	c920163	2021-01-15 19:52:40 +0530	[diff] [blame]	616	vm_write_begin(vma);
Laurent Dufour	0525756	2018-04-17 16:33:16 +0200	[diff] [blame]	617
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	618	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
				619	need_rmap_locks);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	620	if (moved_len < old_len) {
Oleg Nesterov	df1eab3	2015-09-04 15:48:01 -0700	[diff] [blame]	621	err = -ENOMEM;
Oleg Nesterov	5477e70	2015-09-04 15:48:04 -0700	[diff] [blame]	622	} else if (vma->vm_ops && vma->vm_ops->mremap) {
				623	err = vma->vm_ops->mremap(new_vma);
Oleg Nesterov	df1eab3	2015-09-04 15:48:01 -0700	[diff] [blame]	624	}
				625
				626	if (unlikely(err)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	627	/*
				628	* On error, move entries back from new area to old,
				629	* which will succeed since page tables still there,
				630	* and then proceed to unmap new area instead of old.
				631	*/
Michel Lespinasse	38a7601	2012-10-08 16:31:50 -0700	[diff] [blame]	632	move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
				633	true);
Laurent Dufour	0525756	2018-04-17 16:33:16 +0200	[diff] [blame]	634	if (vma != new_vma)
Vinayak Menon	c920163	2021-01-15 19:52:40 +0530	[diff] [blame]	635	vm_write_end(vma);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636	vma = new_vma;
				637	old_len = new_len;
				638	old_addr = new_addr;
Oleg Nesterov	df1eab3	2015-09-04 15:48:01 -0700	[diff] [blame]	639	new_addr = err;
Laurent Dufour	4abad2c	2015-06-24 16:56:19 -0700	[diff] [blame]	640	} else {
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	641	mremap_userfaultfd_prep(new_vma, uf);
Laurent Dufour	4abad2c	2015-06-24 16:56:19 -0700	[diff] [blame]	642	arch_remap(mm, old_addr, old_addr + old_len,
				643	new_addr, new_addr + new_len);
Laurent Dufour	0525756	2018-04-17 16:33:16 +0200	[diff] [blame]	644	if (vma != new_vma)
Vinayak Menon	c920163	2021-01-15 19:52:40 +0530	[diff] [blame]	645	vm_write_end(vma);
Al Viro	b2edffd	2015-04-06 17:48:54 -0400	[diff] [blame]	646	}
Vinayak Menon	c920163	2021-01-15 19:52:40 +0530	[diff] [blame]	647	vm_write_end(new_vma);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	648
				649	/* Conceal VM_ACCOUNT so old reservation is not undone */
				650	if (vm_flags & VM_ACCOUNT) {
				651	vma->vm_flags &= ~VM_ACCOUNT;
				652	excess = vma->vm_end - vma->vm_start - old_len;
				653	if (old_addr > vma->vm_start &&
				654	old_addr + old_len < vma->vm_end)
				655	split = 1;
				656	}
				657
Kirill Korotaev	7179906	2005-05-16 21:53:18 -0700	[diff] [blame]	658	/*
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	659	* If we failed to move page tables we still do total_vm increment
				660	* since do_munmap() will decrement it by old_len == new_len.
				661	*
				662	* Since total_vm is about to be raised artificially high for a
				663	* moment, we need to restore high watermark afterwards: if stats
				664	* are taken meanwhile, total_vm and hiwater_vm appear too high.
				665	* If this were a serious issue, we'd add a flag to do_munmap().
Kirill Korotaev	7179906	2005-05-16 21:53:18 -0700	[diff] [blame]	666	*/
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	667	hiwater_vm = mm->hiwater_vm;
Konstantin Khlebnikov	8463833	2016-01-14 15:22:07 -0800	[diff] [blame]	668	vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
Kirill Korotaev	7179906	2005-05-16 21:53:18 -0700	[diff] [blame]	669
Toshi Kani	d9fe4fab	2015-12-22 17:54:23 -0700	[diff] [blame]	670	/* Tell pfnmap has moved from this vma */
				671	if (unlikely(vma->vm_flags & VM_PFNMAP))
				672	untrack_pfn_moved(vma);
				673
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	674	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
				675	if (vm_flags & VM_ACCOUNT) {
				676	/* Always put back VM_ACCOUNT since we won't unmap */
				677	vma->vm_flags \|= VM_ACCOUNT;
				678
Brian Geffon	dadbd85	2020-04-17 10:25:56 -0700	[diff] [blame]	679	vm_acct_memory(new_len >> PAGE_SHIFT);
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	680	}
				681
Brian Geffon	dadbd85	2020-04-17 10:25:56 -0700	[diff] [blame]	682	/*
				683	* VMAs can actually be merged back together in copy_vma
				684	* calling merge_vma. This can happen with anonymous vmas
				685	* which have not yet been faulted, so if we were to consider
				686	* this VMA split we'll end up adding VM_ACCOUNT on the
				687	* next VMA, which is completely unrelated if this VMA
				688	* was re-merged.
				689	*/
				690	if (split && new_vma == vma)
				691	split = 0;
				692
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	693	/* We always clear VM_LOCKED[ONFAULT] on the old vma */
				694	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
				695
				696	/* Because we won't unmap we don't need to touch locked_vm */
				697	goto out;
				698	}
				699
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	700	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	701	/* OOM: unable to split vma, just get accounts right */
				702	vm_unacct_memory(excess >> PAGE_SHIFT);
				703	excess = 0;
				704	}
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	705
				706	if (vm_flags & VM_LOCKED) {
				707	mm->locked_vm += new_len >> PAGE_SHIFT;
				708	*locked = true;
				709	}
				710	out:
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	711	mm->hiwater_vm = hiwater_vm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	712
				713	/* Restore VM_ACCOUNT if one or two pieces of vma left */
				714	if (excess) {
				715	vma->vm_flags \|= VM_ACCOUNT;
				716	if (split)
				717	vma->vm_next->vm_flags \|= VM_ACCOUNT;
				718	}
				719
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	720	return new_addr;
				721	}
				722
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	723	static struct vm_area_struct *vma_to_resize(unsigned long addr,
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	724	unsigned long old_len, unsigned long new_len, unsigned long flags,
				725	unsigned long *p)
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	726	{
				727	struct mm_struct *mm = current->mm;
				728	struct vm_area_struct *vma = find_vma(mm, addr);
Oleg Nesterov	1d39168	2015-09-04 15:48:10 -0700	[diff] [blame]	729	unsigned long pgoff;
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	730
				731	if (!vma \|\| vma->vm_start > addr)
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	732	return ERR_PTR(-EFAULT);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	733
Mike Kravetz	dba58d3	2017-09-06 16:20:55 -0700	[diff] [blame]	734	/*
				735	* !old_len is a special case where an attempt is made to 'duplicate'
				736	* a mapping. This makes no sense for private mappings as it will
				737	* instead create a fresh/new mapping unrelated to the original. This
				738	* is contrary to the basic idea of mremap which creates new mappings
				739	* based on the original. There are no known use cases for this
				740	* behavior. As a result, fail such attempts.
				741	*/
				742	if (!old_len && !(vma->vm_flags & (VM_SHARED \| VM_MAYSHARE))) {
				743	pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid);
				744	return ERR_PTR(-EINVAL);
				745	}
				746
Brian Geffon	1d695cc	2021-03-23 11:25:17 -0700	[diff] [blame]	747	if ((flags & MREMAP_DONTUNMAP) &&
				748	(vma->vm_flags & (VM_DONTEXPAND \| VM_PFNMAP)))
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	749	return ERR_PTR(-EINVAL);
				750
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	751	if (is_vm_hugetlb_page(vma))
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	752	return ERR_PTR(-EINVAL);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	753
				754	/* We can't remap across vm area boundaries */
				755	if (old_len > vma->vm_end - addr)
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	756	return ERR_PTR(-EFAULT);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	757
Oleg Nesterov	1d39168	2015-09-04 15:48:10 -0700	[diff] [blame]	758	if (new_len == old_len)
				759	return vma;
Linus Torvalds	982134b	2011-04-07 07:35:50 -0700	[diff] [blame]	760
Oleg Nesterov	1d39168	2015-09-04 15:48:10 -0700	[diff] [blame]	761	/* Need to be careful about a growing mapping */
				762	pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
				763	pgoff += vma->vm_pgoff;
				764	if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
				765	return ERR_PTR(-EINVAL);
				766
				767	if (vma->vm_flags & (VM_DONTEXPAND \| VM_PFNMAP))
				768	return ERR_PTR(-EFAULT);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	769
				770	if (vma->vm_flags & VM_LOCKED) {
				771	unsigned long locked, lock_limit;
				772	locked = mm->locked_vm << PAGE_SHIFT;
Jiri Slaby	59e99e5	2010-03-05 13:41:44 -0800	[diff] [blame]	773	lock_limit = rlimit(RLIMIT_MEMLOCK);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	774	locked += new_len - old_len;
				775	if (locked > lock_limit && !capable(CAP_IPC_LOCK))
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	776	return ERR_PTR(-EAGAIN);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	777	}
				778
Konstantin Khlebnikov	8463833	2016-01-14 15:22:07 -0800	[diff] [blame]	779	if (!may_expand_vm(mm, vma->vm_flags,
				780	(new_len - old_len) >> PAGE_SHIFT))
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	781	return ERR_PTR(-ENOMEM);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	782
				783	if (vma->vm_flags & VM_ACCOUNT) {
				784	unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
Al Viro	191c542	2012-02-13 03:58:52 +0000	[diff] [blame]	785	if (security_vm_enough_memory_mm(mm, charged))
Derek	6cd57613	2015-04-15 16:14:02 -0700	[diff] [blame]	786	return ERR_PTR(-ENOMEM);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	787	*p = charged;
				788	}
				789
				790	return vma;
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	791	}
				792
Michel Lespinasse	81909b8	2013-02-22 16:32:41 -0800	[diff] [blame]	793	static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	794	unsigned long new_addr, unsigned long new_len, bool *locked,
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	795	unsigned long flags, struct vm_userfaultfd_ctx *uf,
Mike Rapoport	b228237	2017-08-02 13:31:55 -0700	[diff] [blame]	796	struct list_head *uf_unmap_early,
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	797	struct list_head *uf_unmap)
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	798	{
				799	struct mm_struct *mm = current->mm;
				800	struct vm_area_struct *vma;
				801	unsigned long ret = -EINVAL;
				802	unsigned long charged = 0;
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	803	unsigned long map_flags = 0;
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	804
Alexander Kuleshov	f19cb11	2015-11-05 18:46:57 -0800	[diff] [blame]	805	if (offset_in_page(new_addr))
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	806	goto out;
				807
				808	if (new_len > TASK_SIZE \|\| new_addr > TASK_SIZE - new_len)
				809	goto out;
				810
Oleg Nesterov	9943242	2015-09-04 15:48:13 -0700	[diff] [blame]	811	/* Ensure the old/new locations do not overlap */
				812	if (addr + old_len > new_addr && new_addr + new_len > addr)
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	813	goto out;
				814
Oscar Salvador	ea2c3f6	2019-03-05 15:50:14 -0800	[diff] [blame]	815	/*
				816	* move_vma() need us to stay 4 maps below the threshold, otherwise
				817	* it will bail out at the very beginning.
				818	* That is a problem if we have already unmaped the regions here
				819	* (new_addr, and old_addr), because userspace will not know the
				820	* state of the vma's after it gets -ENOMEM.
				821	* So, to avoid such scenario we can pre-compute if the whole
				822	* operation has high chances to success map-wise.
				823	* Worst-scenario case is when both vma's (new_addr and old_addr) get
				824	* split in 3 before unmaping it.
				825	* That means 2 more maps (1 for each) to the ones we already hold.
				826	* Check whether current map count plus 2 still leads us to 4 maps below
				827	* the threshold, otherwise return -ENOMEM here to be more safe.
				828	*/
				829	if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
				830	return -ENOMEM;
				831
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	832	if (flags & MREMAP_FIXED) {
				833	ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
				834	if (ret)
				835	goto out;
				836	}
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	837
				838	if (old_len >= new_len) {
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	839	ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	840	if (ret && old_len != new_len)
				841	goto out;
				842	old_len = new_len;
				843	}
				844
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	845	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	846	if (IS_ERR(vma)) {
				847	ret = PTR_ERR(vma);
				848	goto out;
				849	}
				850
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	851	/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
				852	if (flags & MREMAP_DONTUNMAP &&
				853	!may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
				854	ret = -ENOMEM;
				855	goto out;
				856	}
				857
				858	if (flags & MREMAP_FIXED)
				859	map_flags \|= MAP_FIXED;
				860
Al Viro	097eed1	2009-11-24 08:43:52 -0500	[diff] [blame]	861	if (vma->vm_flags & VM_MAYSHARE)
				862	map_flags \|= MAP_SHARED;
Al Viro	9206de9	2009-12-03 15:23:11 -0500	[diff] [blame]	863
Al Viro	097eed1	2009-11-24 08:43:52 -0500	[diff] [blame]	864	ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
				865	((addr - vma->vm_start) >> PAGE_SHIFT),
				866	map_flags);
Gaowei Pu	ff68dac	2019-11-30 17:51:03 -0800	[diff] [blame]	867	if (IS_ERR_VALUE(ret))
Al Viro	097eed1	2009-11-24 08:43:52 -0500	[diff] [blame]	868	goto out1;
				869
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	870	/* We got a new mapping */
				871	if (!(flags & MREMAP_FIXED))
				872	new_addr = ret;
				873
				874	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	875	uf_unmap);
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	876
Alexander Kuleshov	f19cb11	2015-11-05 18:46:57 -0800	[diff] [blame]	877	if (!(offset_in_page(ret)))
Al Viro	097eed1	2009-11-24 08:43:52 -0500	[diff] [blame]	878	goto out;
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	879
Al Viro	097eed1	2009-11-24 08:43:52 -0500	[diff] [blame]	880	out1:
				881	vm_unacct_memory(charged);
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	882
				883	out:
				884	return ret;
				885	}
				886
Al Viro	1a0ef85	2009-11-24 07:43:18 -0500	[diff] [blame]	887	static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
				888	{
Al Viro	f106af4	2009-11-24 08:25:18 -0500	[diff] [blame]	889	unsigned long end = vma->vm_end + delta;
Al Viro	9206de9	2009-12-03 15:23:11 -0500	[diff] [blame]	890	if (end < vma->vm_end) /* overflow */
Al Viro	1a0ef85	2009-11-24 07:43:18 -0500	[diff] [blame]	891	return 0;
Al Viro	9206de9	2009-12-03 15:23:11 -0500	[diff] [blame]	892	if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
Al Viro	f106af4	2009-11-24 08:25:18 -0500	[diff] [blame]	893	return 0;
				894	if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
				895	0, MAP_FIXED) & ~PAGE_MASK)
				896	return 0;
Al Viro	1a0ef85	2009-11-24 07:43:18 -0500	[diff] [blame]	897	return 1;
				898	}
				899
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	900	/*
				901	* Expand (or shrink) an existing mapping, potentially moving it at the
				902	* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
				903	*
				904	* MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
				905	* This option implies MREMAP_MAYMOVE.
				906	*/
Al Viro	63a81db	2012-05-30 11:32:04 -0400	[diff] [blame]	907	SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
				908	unsigned long, new_len, unsigned long, flags,
				909	unsigned long, new_addr)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	910	{
Hugh Dickins	d0de32d	2005-10-29 18:16:16 -0700	[diff] [blame]	911	struct mm_struct *mm = current->mm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	912	struct vm_area_struct *vma;
				913	unsigned long ret = -EINVAL;
				914	unsigned long charged = 0;
Michel Lespinasse	81909b8	2013-02-22 16:32:41 -0800	[diff] [blame]	915	bool locked = false;
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	916	bool downgraded = false;
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	917	struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
Mike Rapoport	b228237	2017-08-02 13:31:55 -0700	[diff] [blame]	918	LIST_HEAD(uf_unmap_early);
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	919	LIST_HEAD(uf_unmap);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	920
Will Deacon	b2a84de	2020-03-25 11:13:46 +0000	[diff] [blame]	921	/*
				922	* There is a deliberate asymmetry here: we strip the pointer tag
				923	* from the old address but leave the new address alone. This is
				924	* for consistency with mmap(), where we prevent the creation of
				925	* aliasing mappings in userspace by leaving the tag bits of the
				926	* mapping address intact. A non-zero tag will cause the subsequent
				927	* range checks to reject the address as invalid.
				928	*
				929	* See Documentation/arm64/tagged-address-abi.rst for more information.
				930	*/
Andrey Konovalov	057d3389	2019-09-25 16:48:30 -0700	[diff] [blame]	931	addr = untagged_addr(addr);
				932
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	933	if (flags & ~(MREMAP_FIXED \| MREMAP_MAYMOVE \| MREMAP_DONTUNMAP))
Rasmus Villemoes	9a2458a	2013-07-08 15:59:48 -0700	[diff] [blame]	934	return ret;
				935
				936	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
				937	return ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	938
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	939	/*
				940	* MREMAP_DONTUNMAP is always a move and it does not allow resizing
				941	* in the process.
				942	*/
				943	if (flags & MREMAP_DONTUNMAP &&
				944	(!(flags & MREMAP_MAYMOVE) \|\| old_len != new_len))
				945	return ret;
				946
				947
Alexander Kuleshov	f19cb11	2015-11-05 18:46:57 -0800	[diff] [blame]	948	if (offset_in_page(addr))
Rasmus Villemoes	9a2458a	2013-07-08 15:59:48 -0700	[diff] [blame]	949	return ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	950
				951	old_len = PAGE_ALIGN(old_len);
				952	new_len = PAGE_ALIGN(new_len);
				953
				954	/*
				955	* We allow a zero old-len as a special case
				956	* for DOS-emu "duplicate shm area" thing. But
				957	* a zero new-len is nonsensical.
				958	*/
				959	if (!new_len)
Rasmus Villemoes	9a2458a	2013-07-08 15:59:48 -0700	[diff] [blame]	960	return ret;
				961
Michel Lespinasse	d8ed45c	2020-06-08 21:33:25 -0700	[diff] [blame]	962	if (mmap_write_lock_killable(current->mm))
Michal Hocko	dc0ef0d	2016-05-23 16:25:27 -0700	[diff] [blame]	963	return -EINTR;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	964
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	965	if (flags & (MREMAP_FIXED \| MREMAP_DONTUNMAP)) {
Rasmus Villemoes	9a2458a	2013-07-08 15:59:48 -0700	[diff] [blame]	966	ret = mremap_to(addr, old_len, new_addr, new_len,
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	967	&locked, flags, &uf, &uf_unmap_early,
				968	&uf_unmap);
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	969	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	970	}
				971
				972	/*
				973	* Always allow a shrinking remap: that just unmaps
				974	* the unnecessary pages..
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	975	* __do_munmap does all the needed commit accounting, and
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	976	* downgrades mmap_lock to read if so directed.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	977	*/
				978	if (old_len >= new_len) {
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	979	int retval;
				980
				981	retval = __do_munmap(mm, addr+new_len, old_len - new_len,
				982	&uf_unmap, true);
				983	if (retval < 0 && old_len != new_len) {
				984	ret = retval;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	985	goto out;
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	986	/* Returning 1 indicates mmap_lock is downgraded to read. */
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	987	} else if (retval == 1)
				988	downgraded = true;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	989	ret = addr;
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	990	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	991	}
				992
				993	/*
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	994	* Ok, we need to grow..
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	995	*/
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	996	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
Al Viro	54f5de7	2009-11-24 07:17:46 -0500	[diff] [blame]	997	if (IS_ERR(vma)) {
				998	ret = PTR_ERR(vma);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	999	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1000	}
				1001
				1002	/* old_len exactly to the end of the area..
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1003	*/
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	1004	if (old_len == vma->vm_end - addr) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1005	/* can we just expand the current mapping? */
Al Viro	1a0ef85	2009-11-24 07:43:18 -0500	[diff] [blame]	1006	if (vma_expandable(vma, new_len - old_len)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1007	int pages = (new_len - old_len) >> PAGE_SHIFT;
				1008
Rik van Riel	5beb493	2010-03-05 13:42:07 -0800	[diff] [blame]	1009	if (vma_adjust(vma, vma->vm_start, addr + new_len,
				1010	vma->vm_pgoff, NULL)) {
				1011	ret = -ENOMEM;
				1012	goto out;
				1013	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1014
Konstantin Khlebnikov	8463833	2016-01-14 15:22:07 -0800	[diff] [blame]	1015	vm_stat_account(mm, vma->vm_flags, pages);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1016	if (vma->vm_flags & VM_LOCKED) {
Hugh Dickins	d0de32d	2005-10-29 18:16:16 -0700	[diff] [blame]	1017	mm->locked_vm += pages;
Michel Lespinasse	81909b8	2013-02-22 16:32:41 -0800	[diff] [blame]	1018	locked = true;
				1019	new_addr = addr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1020	}
				1021	ret = addr;
				1022	goto out;
				1023	}
				1024	}
				1025
				1026	/*
				1027	* We weren't able to just expand or shrink the area,
				1028	* we need to create a new one and move it..
				1029	*/
				1030	ret = -ENOMEM;
				1031	if (flags & MREMAP_MAYMOVE) {
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	1032	unsigned long map_flags = 0;
				1033	if (vma->vm_flags & VM_MAYSHARE)
				1034	map_flags \|= MAP_SHARED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1035
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	1036	new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
Al Viro	9358741	2009-11-24 08:45:24 -0500	[diff] [blame]	1037	vma->vm_pgoff +
				1038	((addr - vma->vm_start) >> PAGE_SHIFT),
				1039	map_flags);
Gaowei Pu	ff68dac	2019-11-30 17:51:03 -0800	[diff] [blame]	1040	if (IS_ERR_VALUE(new_addr)) {
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	1041	ret = new_addr;
				1042	goto out;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1043	}
Al Viro	ecc1a89	2009-11-24 07:28:07 -0500	[diff] [blame]	1044
Pavel Emelyanov	72f8765	2017-02-22 15:42:34 -0800	[diff] [blame]	1045	ret = move_vma(vma, addr, old_len, new_len, new_addr,
Brian Geffon	e346b38	2020-04-01 21:09:17 -0700	[diff] [blame]	1046	&locked, flags, &uf, &uf_unmap);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1047	}
				1048	out:
Alexander Kuleshov	f19cb11	2015-11-05 18:46:57 -0800	[diff] [blame]	1049	if (offset_in_page(ret)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1050	vm_unacct_memory(charged);
Zou Wei	fa1f68c	2020-06-04 16:49:46 -0700	[diff] [blame]	1051	locked = false;
Oleg Nesterov	d456fb9	2015-09-04 15:48:07 -0700	[diff] [blame]	1052	}
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	1053	if (downgraded)
Michel Lespinasse	d8ed45c	2020-06-08 21:33:25 -0700	[diff] [blame]	1054	mmap_read_unlock(current->mm);
Yang Shi	85a0683	2018-10-26 15:08:50 -0700	[diff] [blame]	1055	else
Michel Lespinasse	d8ed45c	2020-06-08 21:33:25 -0700	[diff] [blame]	1056	mmap_write_unlock(current->mm);
Michel Lespinasse	81909b8	2013-02-22 16:32:41 -0800	[diff] [blame]	1057	if (locked && new_len > old_len)
				1058	mm_populate(new_addr + old_len, new_len - old_len);
Mike Rapoport	b228237	2017-08-02 13:31:55 -0700	[diff] [blame]	1059	userfaultfd_unmap_complete(mm, &uf_unmap_early);
Brian Geffon	d1564926	2020-05-13 17:50:44 -0700	[diff] [blame]	1060	mremap_userfaultfd_complete(&uf, addr, ret, old_len);
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	1061	userfaultfd_unmap_complete(mm, &uf_unmap);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1062	return ret;
				1063	}