Skip to content

Commit 1b2ee12

Browse files
hansendcIngo Molnar
authored andcommitted
mm/core: Do not enforce PKEY permissions on remote mm access
We try to enforce protection keys in software the same way that we do in hardware. (See long example below). But, we only want to do this when accessing our *own* process's memory. If GDB set PKRU[6].AD=1 (disable access to PKEY 6), then tried to PTRACE_POKE a target process which just happened to have some mprotect_pkey(pkey=6) memory, we do *not* want to deny the debugger access to that memory. PKRU is fundamentally a thread-local structure and we do not want to enforce it on access to _another_ thread's data. This gets especially tricky when we have workqueues or other delayed-work mechanisms that might run in a random process's context. We can check that we only enforce pkeys when operating on our *own* mm, but delayed work gets performed when a random user context is active. We might end up with a situation where a delayed-work gup fails when running randomly under its "own" task but succeeds when running under another process. We want to avoid that. To avoid that, we use the new GUP flag: FOLL_REMOTE and add a fault flag: FAULT_FLAG_REMOTE. They indicate that we are walking an mm which is not guranteed to be the same as current->mm and should not be subject to protection key enforcement. Thanks to Jerome Glisse for pointing out this scenario. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexey Kardashevskiy <aik@ozlabs.ru> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <dchinner@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Dominik Dingel <dingel@linux.vnet.ibm.com> Cc: Dominik Vogt <vogt@linux.vnet.ibm.com> Cc: Eric B Munson <emunson@akamai.com> Cc: Geliang Tang <geliangtang@163.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Low <jason.low2@hp.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Shachar Raindel <raindel@mellanox.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Xie XiuQi <xiexiuqi@huawei.com> Cc: iommu@lists.linux-foundation.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 9d95b17 commit 1b2ee12

File tree

10 files changed

+33
-14
lines changed

10 files changed

+33
-14
lines changed

arch/powerpc/include/asm/mmu_context.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
148148
{
149149
}
150150

151-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
151+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
152+
bool write, bool foreign)
152153
{
153154
/* by default, allow everything */
154155
return true;

arch/s390/include/asm/mmu_context.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
130130
{
131131
}
132132

133-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
133+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
134+
bool write, bool foreign)
134135
{
135136
/* by default, allow everything */
136137
return true;

arch/unicore32/include/asm/mmu_context.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
9797
{
9898
}
9999

100-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
100+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
101+
bool write, bool foreign)
101102
{
102103
/* by default, allow everything */
103104
return true;

arch/x86/include/asm/mmu_context.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,11 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma)
322322
return false;
323323
}
324324

325-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
325+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
326+
bool write, bool foreign)
326327
{
327328
/* allow access if the VMA is not one from this process */
328-
if (vma_is_foreign(vma))
329+
if (foreign || vma_is_foreign(vma))
329330
return true;
330331
return __pkru_allows_pkey(vma_pkey(vma), write);
331332
}

drivers/iommu/amd_iommu_v2.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ static void do_fault(struct work_struct *work)
526526
flags |= FAULT_FLAG_USER;
527527
if (fault->flags & PPR_FAULT_WRITE)
528528
flags |= FAULT_FLAG_WRITE;
529+
flags |= FAULT_FLAG_REMOTE;
529530

530531
down_read(&mm->mmap_sem);
531532
vma = find_extend_vma(mm, address);

include/asm-generic/mm_hooks.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
2626
{
2727
}
2828

29-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
29+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
30+
bool write, bool foreign)
3031
{
3132
/* by default, allow everything */
3233
return true;

include/linux/mm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ extern pgprot_t protection_map[16];
251251
#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
252252
#define FAULT_FLAG_TRIED 0x20 /* Second try */
253253
#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
254+
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
254255

255256
/*
256257
* vm_fault is filled by the the pagefault handler and passed to the vma's

mm/gup.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
365365
return -ENOENT;
366366
if (*flags & FOLL_WRITE)
367367
fault_flags |= FAULT_FLAG_WRITE;
368+
if (*flags & FOLL_REMOTE)
369+
fault_flags |= FAULT_FLAG_REMOTE;
368370
if (nonblocking)
369371
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
370372
if (*flags & FOLL_NOWAIT)
@@ -415,11 +417,13 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
415417
static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
416418
{
417419
vm_flags_t vm_flags = vma->vm_flags;
420+
int write = (gup_flags & FOLL_WRITE);
421+
int foreign = (gup_flags & FOLL_REMOTE);
418422

419423
if (vm_flags & (VM_IO | VM_PFNMAP))
420424
return -EFAULT;
421425

422-
if (gup_flags & FOLL_WRITE) {
426+
if (write) {
423427
if (!(vm_flags & VM_WRITE)) {
424428
if (!(gup_flags & FOLL_FORCE))
425429
return -EFAULT;
@@ -445,7 +449,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
445449
if (!(vm_flags & VM_MAYREAD))
446450
return -EFAULT;
447451
}
448-
if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE)))
452+
if (!arch_vma_access_permitted(vma, write, foreign))
449453
return -EFAULT;
450454
return 0;
451455
}
@@ -615,17 +619,18 @@ EXPORT_SYMBOL(__get_user_pages);
615619

616620
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
617621
{
618-
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
622+
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
623+
bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
619624
vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
620625

621626
if (!(vm_flags & vma->vm_flags))
622627
return false;
623628

624629
/*
625630
* The architecture might have a hardware protection
626-
* mechanism other than read/write that can deny access
631+
* mechanism other than read/write that can deny access.
627632
*/
628-
if (!arch_vma_access_permitted(vma, write))
633+
if (!arch_vma_access_permitted(vma, write, foreign))
629634
return false;
630635

631636
return true;

mm/ksm.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
359359
* in case the application has unmapped and remapped mm,addr meanwhile.
360360
* Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP
361361
* mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
362+
*
363+
* FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
364+
* of the process that owns 'vma'. We also do not want to enforce
365+
* protection keys here anyway.
362366
*/
363367
static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
364368
{
@@ -367,12 +371,14 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
367371

368372
do {
369373
cond_resched();
370-
page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
374+
page = follow_page(vma, addr,
375+
FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
371376
if (IS_ERR_OR_NULL(page))
372377
break;
373378
if (PageKsm(page))
374379
ret = handle_mm_fault(vma->vm_mm, vma, addr,
375-
FAULT_FLAG_WRITE);
380+
FAULT_FLAG_WRITE |
381+
FAULT_FLAG_REMOTE);
376382
else
377383
ret = VM_FAULT_WRITE;
378384
put_page(page);

mm/memory.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3379,7 +3379,8 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
33793379
pmd_t *pmd;
33803380
pte_t *pte;
33813381

3382-
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE))
3382+
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
3383+
flags & FAULT_FLAG_REMOTE))
33833384
return VM_FAULT_SIGSEGV;
33843385

33853386
if (unlikely(is_vm_hugetlb_page(vma)))

0 commit comments

Comments
 (0)