Skip to content

Commit 33a709b

Browse files
hansendcIngo Molnar
authored andcommitted
mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys
Today, for normal faults and page table walks, we check the VMA and/or PTE to ensure that it is compatible with the action. For instance, if we get a write fault on a non-writeable VMA, we SIGSEGV. We try to do the same thing for protection keys. Basically, we try to make sure that if a user does this: mprotect(ptr, size, PROT_NONE); *ptr = foo; they see the same effects with protection keys when they do this: mprotect(ptr, size, PROT_READ|PROT_WRITE); set_pkey(ptr, size, 4); wrpkru(0xffffff3f); // access disable pkey 4 *ptr = foo; The state to do that checking is in the VMA, but we also sometimes have to do it on the page tables only, like when doing a get_user_pages_fast() where we have no VMA. We add two functions and expose them to generic code: arch_pte_access_permitted(pte_flags, write) arch_vma_access_permitted(vma, write) These are, of course, backed up in x86 arch code with checks against the PTE or VMA's protection key. But, there are also cases where we do not want to respect protection keys. When we ptrace(), for instance, we do not want to apply the tracer's PKRU permissions to the PTEs from the process being traced. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexey Kardashevskiy <aik@ozlabs.ru> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andy Lutomirski <luto@kernel.org> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave@sr71.net> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: David Hildenbrand <dahi@linux.vnet.ibm.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Dominik Dingel <dingel@linux.vnet.ibm.com> Cc: Dominik Vogt <vogt@linux.vnet.ibm.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jason Low <jason.low2@hp.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Shachar Raindel <raindel@mellanox.com> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20160212210219.14D5D715@viggo.jf.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 1874f68 commit 33a709b

File tree

10 files changed

+166
-4
lines changed

10 files changed

+166
-4
lines changed

arch/powerpc/include/asm/mmu_context.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,5 +148,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
148148
{
149149
}
150150

151+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
152+
{
153+
/* by default, allow everything */
154+
return true;
155+
}
156+
157+
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
158+
{
159+
/* by default, allow everything */
160+
return true;
161+
}
151162
#endif /* __KERNEL__ */
152163
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */

arch/s390/include/asm/mmu_context.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
130130
{
131131
}
132132

133+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
134+
{
135+
/* by default, allow everything */
136+
return true;
137+
}
138+
139+
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
140+
{
141+
/* by default, allow everything */
142+
return true;
143+
}
133144
#endif /* __S390_MMU_CONTEXT_H */

arch/unicore32/include/asm/mmu_context.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
9797
{
9898
}
9999

100+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
101+
{
102+
/* by default, allow everything */
103+
return true;
104+
}
105+
106+
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
107+
{
108+
/* by default, allow everything */
109+
return true;
110+
}
100111
#endif

arch/x86/include/asm/mmu_context.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,4 +286,53 @@ static inline int vma_pkey(struct vm_area_struct *vma)
286286
return pkey;
287287
}
288288

289+
static inline bool __pkru_allows_pkey(u16 pkey, bool write)
290+
{
291+
u32 pkru = read_pkru();
292+
293+
if (!__pkru_allows_read(pkru, pkey))
294+
return false;
295+
if (write && !__pkru_allows_write(pkru, pkey))
296+
return false;
297+
298+
return true;
299+
}
300+
301+
/*
302+
* We only want to enforce protection keys on the current process
303+
* because we effectively have no access to PKRU for other
304+
* processes or any way to tell *which * PKRU in a threaded
305+
* process we could use.
306+
*
307+
* So do not enforce things if the VMA is not from the current
308+
* mm, or if we are in a kernel thread.
309+
*/
310+
static inline bool vma_is_foreign(struct vm_area_struct *vma)
311+
{
312+
if (!current->mm)
313+
return true;
314+
/*
315+
* Should PKRU be enforced on the access to this VMA? If
316+
* the VMA is from another process, then PKRU has no
317+
* relevance and should not be enforced.
318+
*/
319+
if (current->mm != vma->vm_mm)
320+
return true;
321+
322+
return false;
323+
}
324+
325+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
326+
{
327+
/* allow access if the VMA is not one from this process */
328+
if (vma_is_foreign(vma))
329+
return true;
330+
return __pkru_allows_pkey(vma_pkey(vma), write);
331+
}
332+
333+
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
334+
{
335+
return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
336+
}
337+
289338
#endif /* _ASM_X86_MMU_CONTEXT_H */

arch/x86/include/asm/pgtable.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,35 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
919919
}
920920
#endif
921921

922+
#define PKRU_AD_BIT 0x1
923+
#define PKRU_WD_BIT 0x2
924+
925+
static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
926+
{
927+
int pkru_pkey_bits = pkey * 2;
928+
return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
929+
}
930+
931+
static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
932+
{
933+
int pkru_pkey_bits = pkey * 2;
934+
/*
935+
* Access-disable disables writes too so we need to check
936+
* both bits here.
937+
*/
938+
return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
939+
}
940+
941+
static inline u16 pte_flags_pkey(unsigned long pte_flags)
942+
{
943+
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
944+
/* ifdef to avoid doing 59-bit shift on 32-bit values */
945+
return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0;
946+
#else
947+
return 0;
948+
#endif
949+
}
950+
922951
#include <asm-generic/pgtable.h>
923952
#endif /* __ASSEMBLY__ */
924953

arch/x86/mm/fault.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,16 @@ bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
897897
__bad_area(regs, error_code, address, NULL, SEGV_MAPERR);
898898
}
899899

900+
static inline bool bad_area_access_from_pkeys(unsigned long error_code,
901+
struct vm_area_struct *vma)
902+
{
903+
if (!boot_cpu_has(X86_FEATURE_OSPKE))
904+
return false;
905+
if (error_code & PF_PK)
906+
return true;
907+
return false;
908+
}
909+
900910
static noinline void
901911
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
902912
unsigned long address, struct vm_area_struct *vma)
@@ -906,7 +916,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
906916
* But, doing it this way allows compiler optimizations
907917
* if pkeys are compiled out.
908918
*/
909-
if (boot_cpu_has(X86_FEATURE_OSPKE) && (error_code & PF_PK))
919+
if (bad_area_access_from_pkeys(error_code, vma))
910920
__bad_area(regs, error_code, address, vma, SEGV_PKUERR);
911921
else
912922
__bad_area(regs, error_code, address, vma, SEGV_ACCERR);
@@ -1081,6 +1091,15 @@ int show_unhandled_signals = 1;
10811091
static inline int
10821092
access_error(unsigned long error_code, struct vm_area_struct *vma)
10831093
{
1094+
/*
1095+
* Access or read was blocked by protection keys. We do
1096+
* this check before any others because we do not want
1097+
* to, for instance, confuse a protection-key-denied
1098+
* write with one for which we should do a COW.
1099+
*/
1100+
if (error_code & PF_PK)
1101+
return 1;
1102+
10841103
if (error_code & PF_WRITE) {
10851104
/* write, present and write, not present: */
10861105
if (unlikely(!(vma->vm_flags & VM_WRITE)))

arch/x86/mm/gup.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/swap.h>
1212
#include <linux/memremap.h>
1313

14+
#include <asm/mmu_context.h>
1415
#include <asm/pgtable.h>
1516

1617
static inline pte_t gup_get_pte(pte_t *ptep)
@@ -89,6 +90,10 @@ static inline int pte_allows_gup(unsigned long pteval, int write)
8990
if ((pteval & need_pte_bits) != need_pte_bits)
9091
return 0;
9192

93+
/* Check memory protection keys permissions. */
94+
if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write))
95+
return 0;
96+
9297
return 1;
9398
}
9499

include/asm-generic/mm_hooks.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
2626
{
2727
}
2828

29+
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write)
30+
{
31+
/* by default, allow everything */
32+
return true;
33+
}
34+
35+
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
36+
{
37+
/* by default, allow everything */
38+
return true;
39+
}
2940
#endif /* _ASM_GENERIC_MM_HOOKS_H */

mm/gup.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/rwsem.h>
1616
#include <linux/hugetlb.h>
1717

18+
#include <asm/mmu_context.h>
1819
#include <asm/pgtable.h>
1920
#include <asm/tlbflush.h>
2021

@@ -444,6 +445,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
444445
if (!(vm_flags & VM_MAYREAD))
445446
return -EFAULT;
446447
}
448+
if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE)))
449+
return -EFAULT;
447450
return 0;
448451
}
449452

@@ -612,13 +615,19 @@ EXPORT_SYMBOL(__get_user_pages);
612615

613616
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
614617
{
615-
vm_flags_t vm_flags;
616-
617-
vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
618+
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
619+
vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
618620

619621
if (!(vm_flags & vma->vm_flags))
620622
return false;
621623

624+
/*
625+
* The architecture might have a hardware protection
626+
* mechanism other than read/write that can deny access
627+
*/
628+
if (!arch_vma_access_permitted(vma, write))
629+
return false;
630+
622631
return true;
623632
}
624633

@@ -1172,6 +1181,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
11721181
pte_protnone(pte) || (write && !pte_write(pte)))
11731182
goto pte_unmap;
11741183

1184+
if (!arch_pte_access_permitted(pte, write))
1185+
goto pte_unmap;
1186+
11751187
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
11761188
page = pte_page(pte);
11771189
head = compound_head(page);

mm/memory.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
#include <linux/userfaultfd_k.h>
6666

6767
#include <asm/io.h>
68+
#include <asm/mmu_context.h>
6869
#include <asm/pgalloc.h>
6970
#include <asm/uaccess.h>
7071
#include <asm/tlb.h>
@@ -3378,6 +3379,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
33783379
pmd_t *pmd;
33793380
pte_t *pte;
33803381

3382+
if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE))
3383+
return VM_FAULT_SIGSEGV;
3384+
33813385
if (unlikely(is_vm_hugetlb_page(vma)))
33823386
return hugetlb_fault(mm, vma, address, flags);
33833387

0 commit comments

Comments
 (0)