Skip to content

Commit b6ebaed

Browse files
aagittorvalds
authored andcommitted
userfaultfd: avoid mmap_sem read recursion in mcopy_atomic
If the rwsem starves writers it wasn't strictly a bug but lockdep doesn't like it and this avoids depending on lowlevel implementation details of the lock. [akpm@linux-foundation.org: delete weird BUILD_BUG_ON()] Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Pavel Emelyanov <xemul@parallels.com> Cc: Sanidhya Kashyap <sanidhya.gatech@gmail.com> Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Andres Lagar-Cavilla <andreslc@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Feiner <pfeiner@google.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: "Huangpeng (Peter)" <peter.huangpeng@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent c1a4de9 commit b6ebaed

File tree

1 file changed

+65
-26
lines changed

1 file changed

+65
-26
lines changed

mm/userfaultfd.c

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,39 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
2121
pmd_t *dst_pmd,
2222
struct vm_area_struct *dst_vma,
2323
unsigned long dst_addr,
24-
unsigned long src_addr)
24+
unsigned long src_addr,
25+
struct page **pagep)
2526
{
2627
struct mem_cgroup *memcg;
2728
pte_t _dst_pte, *dst_pte;
2829
spinlock_t *ptl;
29-
struct page *page;
3030
void *page_kaddr;
3131
int ret;
32+
struct page *page;
3233

33-
ret = -ENOMEM;
34-
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
35-
if (!page)
36-
goto out;
37-
38-
page_kaddr = kmap(page);
39-
ret = -EFAULT;
40-
if (copy_from_user(page_kaddr, (const void __user *) src_addr,
41-
PAGE_SIZE))
42-
goto out_kunmap_release;
43-
kunmap(page);
34+
if (!*pagep) {
35+
ret = -ENOMEM;
36+
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
37+
if (!page)
38+
goto out;
39+
40+
page_kaddr = kmap_atomic(page);
41+
ret = copy_from_user(page_kaddr,
42+
(const void __user *) src_addr,
43+
PAGE_SIZE);
44+
kunmap_atomic(page_kaddr);
45+
46+
/* fallback to copy_from_user outside mmap_sem */
47+
if (unlikely(ret)) {
48+
ret = -EFAULT;
49+
*pagep = page;
50+
/* don't free the page */
51+
goto out;
52+
}
53+
} else {
54+
page = *pagep;
55+
*pagep = NULL;
56+
}
4457

4558
/*
4659
* The memory barrier inside __SetPageUptodate makes sure that
@@ -82,9 +95,6 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
8295
out_release:
8396
page_cache_release(page);
8497
goto out;
85-
out_kunmap_release:
86-
kunmap(page);
87-
goto out_release;
8898
}
8999

90100
static int mfill_zeropage_pte(struct mm_struct *dst_mm,
@@ -139,7 +149,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
139149
ssize_t err;
140150
pmd_t *dst_pmd;
141151
unsigned long src_addr, dst_addr;
142-
long copied = 0;
152+
long copied;
153+
struct page *page;
143154

144155
/*
145156
* Sanitize the command parameters:
@@ -151,6 +162,11 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
151162
BUG_ON(src_start + len <= src_start);
152163
BUG_ON(dst_start + len <= dst_start);
153164

165+
src_addr = src_start;
166+
dst_addr = dst_start;
167+
copied = 0;
168+
page = NULL;
169+
retry:
154170
down_read(&dst_mm->mmap_sem);
155171

156172
/*
@@ -160,10 +176,10 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
160176
err = -EINVAL;
161177
dst_vma = find_vma(dst_mm, dst_start);
162178
if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
163-
goto out;
179+
goto out_unlock;
164180
if (dst_start < dst_vma->vm_start ||
165181
dst_start + len > dst_vma->vm_end)
166-
goto out;
182+
goto out_unlock;
167183

168184
/*
169185
* Be strict and only allow __mcopy_atomic on userfaultfd
@@ -175,14 +191,14 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
175191
* belonging to the userfaultfd and not syscalls.
176192
*/
177193
if (!dst_vma->vm_userfaultfd_ctx.ctx)
178-
goto out;
194+
goto out_unlock;
179195

180196
/*
181197
* FIXME: only allow copying on anonymous vmas, tmpfs should
182198
* be added.
183199
*/
184200
if (dst_vma->vm_ops)
185-
goto out;
201+
goto out_unlock;
186202

187203
/*
188204
* Ensure the dst_vma has a anon_vma or this page
@@ -191,12 +207,13 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
191207
*/
192208
err = -ENOMEM;
193209
if (unlikely(anon_vma_prepare(dst_vma)))
194-
goto out;
210+
goto out_unlock;
195211

196-
for (src_addr = src_start, dst_addr = dst_start;
197-
src_addr < src_start + len; ) {
212+
while (src_addr < src_start + len) {
198213
pmd_t dst_pmdval;
214+
199215
BUG_ON(dst_addr >= dst_start + len);
216+
200217
dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
201218
if (unlikely(!dst_pmd)) {
202219
err = -ENOMEM;
@@ -229,13 +246,32 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
229246

230247
if (!zeropage)
231248
err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
232-
dst_addr, src_addr);
249+
dst_addr, src_addr, &page);
233250
else
234251
err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma,
235252
dst_addr);
236253

237254
cond_resched();
238255

256+
if (unlikely(err == -EFAULT)) {
257+
void *page_kaddr;
258+
259+
up_read(&dst_mm->mmap_sem);
260+
BUG_ON(!page);
261+
262+
page_kaddr = kmap(page);
263+
err = copy_from_user(page_kaddr,
264+
(const void __user *) src_addr,
265+
PAGE_SIZE);
266+
kunmap(page);
267+
if (unlikely(err)) {
268+
err = -EFAULT;
269+
goto out;
270+
}
271+
goto retry;
272+
} else
273+
BUG_ON(page);
274+
239275
if (!err) {
240276
dst_addr += PAGE_SIZE;
241277
src_addr += PAGE_SIZE;
@@ -248,8 +284,11 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
248284
break;
249285
}
250286

251-
out:
287+
out_unlock:
252288
up_read(&dst_mm->mmap_sem);
289+
out:
290+
if (page)
291+
page_cache_release(page);
253292
BUG_ON(copied < 0);
254293
BUG_ON(err > 0);
255294
BUG_ON(!copied && !err);

0 commit comments

Comments
 (0)