Skip to content

Commit eb66ae0

Browse files
torvaldsgregkh
authored andcommitted
mremap: properly flush TLB before releasing the page
Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn <jannh@google.com> Acked-by: Will Deacon <will.deacon@arm.com> Tested-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 19e6420 commit eb66ae0

File tree

3 files changed

+18
-24
lines changed

3 files changed

+18
-24
lines changed

include/linux/huge_mm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
4343
unsigned char *vec);
4444
extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
4545
unsigned long new_addr, unsigned long old_end,
46-
pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
46+
pmd_t *old_pmd, pmd_t *new_pmd);
4747
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
4848
unsigned long addr, pgprot_t newprot,
4949
int prot_numa);

mm/huge_memory.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
17801780

17811781
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
17821782
unsigned long new_addr, unsigned long old_end,
1783-
pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
1783+
pmd_t *old_pmd, pmd_t *new_pmd)
17841784
{
17851785
spinlock_t *old_ptl, *new_ptl;
17861786
pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
18111811
if (new_ptl != old_ptl)
18121812
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
18131813
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
1814-
if (pmd_present(pmd) && pmd_dirty(pmd))
1814+
if (pmd_present(pmd))
18151815
force_flush = true;
18161816
VM_BUG_ON(!pmd_none(*new_pmd));
18171817

@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
18221822
}
18231823
pmd = move_soft_dirty_pmd(pmd);
18241824
set_pmd_at(mm, new_addr, new_pmd, pmd);
1825-
if (new_ptl != old_ptl)
1826-
spin_unlock(new_ptl);
18271825
if (force_flush)
18281826
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
1829-
else
1830-
*need_flush = true;
1827+
if (new_ptl != old_ptl)
1828+
spin_unlock(new_ptl);
18311829
spin_unlock(old_ptl);
18321830
return true;
18331831
}

mm/mremap.c

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
115115
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
116116
unsigned long old_addr, unsigned long old_end,
117117
struct vm_area_struct *new_vma, pmd_t *new_pmd,
118-
unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
118+
unsigned long new_addr, bool need_rmap_locks)
119119
{
120120
struct mm_struct *mm = vma->vm_mm;
121121
pte_t *old_pte, *new_pte, pte;
@@ -163,29 +163,29 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
163163

164164
pte = ptep_get_and_clear(mm, old_addr, old_pte);
165165
/*
166-
* If we are remapping a dirty PTE, make sure
166+
* If we are remapping a valid PTE, make sure
167167
* to flush TLB before we drop the PTL for the
168-
* old PTE or we may race with page_mkclean().
168+
* PTE.
169169
*
170-
* This check has to be done after we removed the
171-
* old PTE from page tables or another thread may
172-
* dirty it after the check and before the removal.
170+
* NOTE! Both old and new PTL matter: the old one
171+
* for racing with page_mkclean(), the new one to
172+
* make sure the physical page stays valid until
173+
* the TLB entry for the old mapping has been
174+
* flushed.
173175
*/
174-
if (pte_present(pte) && pte_dirty(pte))
176+
if (pte_present(pte))
175177
force_flush = true;
176178
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
177179
pte = move_soft_dirty_pte(pte);
178180
set_pte_at(mm, new_addr, new_pte, pte);
179181
}
180182

181183
arch_leave_lazy_mmu_mode();
184+
if (force_flush)
185+
flush_tlb_range(vma, old_end - len, old_end);
182186
if (new_ptl != old_ptl)
183187
spin_unlock(new_ptl);
184188
pte_unmap(new_pte - 1);
185-
if (force_flush)
186-
flush_tlb_range(vma, old_end - len, old_end);
187-
else
188-
*need_flush = true;
189189
pte_unmap_unlock(old_pte - 1, old_ptl);
190190
if (need_rmap_locks)
191191
drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
198198
{
199199
unsigned long extent, next, old_end;
200200
pmd_t *old_pmd, *new_pmd;
201-
bool need_flush = false;
202201
unsigned long mmun_start; /* For mmu_notifiers */
203202
unsigned long mmun_end; /* For mmu_notifiers */
204203

@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
229228
if (need_rmap_locks)
230229
take_rmap_locks(vma);
231230
moved = move_huge_pmd(vma, old_addr, new_addr,
232-
old_end, old_pmd, new_pmd,
233-
&need_flush);
231+
old_end, old_pmd, new_pmd);
234232
if (need_rmap_locks)
235233
drop_rmap_locks(vma);
236234
if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
246244
if (extent > next - new_addr)
247245
extent = next - new_addr;
248246
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
249-
new_pmd, new_addr, need_rmap_locks, &need_flush);
247+
new_pmd, new_addr, need_rmap_locks);
250248
}
251-
if (need_flush)
252-
flush_tlb_range(vma, old_end-len, old_addr);
253249

254250
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
255251

0 commit comments

Comments
 (0)