Skip to content

Commit e8c24d3

Browse files
hansendcKAGA-KOKO
authored andcommitted
x86/pkeys: Allocation/free syscalls
This patch adds two new system calls: int pkey_alloc(unsigned long flags, unsigned long init_access_rights) int pkey_free(int pkey); These implement an "allocator" for the protection keys themselves, which can be thought of as analogous to the allocator that the kernel has for file descriptors. The kernel tracks which numbers are in use, and only allows operations on keys that are valid. A key which was not obtained by pkey_alloc() may not, for instance, be passed to pkey_mprotect(). These system calls are also very important given the kernel's use of pkeys to implement execute-only support. These help ensure that userspace can never assume that it has control of a key unless it first asks the kernel. The kernel does not promise to preserve PKRU (right register) contents except for allocated pkeys. The 'init_access_rights' argument to pkey_alloc() specifies the rights that will be established for the returned pkey. For instance: pkey = pkey_alloc(flags, PKEY_DENY_WRITE); will allocate 'pkey', but also sets the bits in PKRU[1] such that writing to 'pkey' is already denied. The kernel does not prevent pkey_free() from successfully freeing in-use pkeys (those still assigned to a memory range by pkey_mprotect()). It would be expensive to implement the checks for this, so we instead say, "Just don't do it" since sane software will never do it anyway. Any piece of userspace calling pkey_alloc() needs to be prepared for it to fail. Why? pkey_alloc() returns the same error code (ENOSPC) when there are no pkeys and when pkeys are unsupported. They can be unsupported for a whole host of reasons, so apps must be prepared for this. Also, libraries or LD_PRELOADs might steal keys before an application gets access to them. This allocation mechanism could be implemented in userspace. Even if we did it in userspace, we would still need additional user/kernel interfaces to tell userspace which keys are being used by the kernel internally (such as for execute-only mappings). Having the kernel provide this facility completely removes the need for these additional interfaces, or having an implementation of this in userspace at all. Note that we have to make changes to all of the architectures that do not use mman-common.h because we use the new PKEY_DENY_ACCESS/WRITE macros in arch-independent code. 1. PKRU is the Protection Key Rights User register. It is a usermode-accessible register that controls whether writes and/or access to each individual pkey is allowed or denied. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Cc: linux-arch@vger.kernel.org Cc: Dave Hansen <dave@sr71.net> Cc: arnd@arndb.de Cc: linux-api@vger.kernel.org Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20160729163015.444FE75F@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
1 parent a8502b6 commit e8c24d3

File tree

12 files changed

+221
-27
lines changed

12 files changed

+221
-27
lines changed

arch/alpha/include/uapi/asm/mman.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,9 @@
7878
#define MAP_HUGE_SHIFT 26
7979
#define MAP_HUGE_MASK 0x3f
8080

81+
#define PKEY_DISABLE_ACCESS 0x1
82+
#define PKEY_DISABLE_WRITE 0x2
83+
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
84+
PKEY_DISABLE_WRITE)
85+
8186
#endif /* __ALPHA_MMAN_H__ */

arch/mips/include/uapi/asm/mman.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,9 @@
105105
#define MAP_HUGE_SHIFT 26
106106
#define MAP_HUGE_MASK 0x3f
107107

108+
#define PKEY_DISABLE_ACCESS 0x1
109+
#define PKEY_DISABLE_WRITE 0x2
110+
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
111+
PKEY_DISABLE_WRITE)
112+
108113
#endif /* _ASM_MMAN_H */

arch/parisc/include/uapi/asm/mman.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,9 @@
7575
#define MAP_HUGE_SHIFT 26
7676
#define MAP_HUGE_MASK 0x3f
7777

78+
#define PKEY_DISABLE_ACCESS 0x1
79+
#define PKEY_DISABLE_WRITE 0x2
80+
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
81+
PKEY_DISABLE_WRITE)
82+
7883
#endif /* __PARISC_MMAN_H__ */

arch/x86/include/asm/mmu.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ typedef struct {
2323
const struct vdso_image *vdso_image; /* vdso image in use */
2424

2525
atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */
26+
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
27+
/*
28+
* One bit per protection key says whether userspace can
29+
* use it or not. protected by mmap_sem.
30+
*/
31+
u16 pkey_allocation_map;
32+
s16 execute_only_pkey;
33+
#endif
2634
} mm_context_t;
2735

2836
#ifdef CONFIG_SMP

arch/x86/include/asm/mmu_context.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,16 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
108108
static inline int init_new_context(struct task_struct *tsk,
109109
struct mm_struct *mm)
110110
{
111+
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
112+
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
113+
/* pkey 0 is the default and always allocated */
114+
mm->context.pkey_allocation_map = 0x1;
115+
/* -1 means unallocated or invalid */
116+
mm->context.execute_only_pkey = -1;
117+
}
118+
#endif
111119
init_new_context_ldt(tsk, mm);
120+
112121
return 0;
113122
}
114123
static inline void destroy_context(struct mm_struct *mm)
@@ -263,5 +272,4 @@ static inline bool arch_pte_access_permitted(pte_t pte, bool write)
263272
{
264273
return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
265274
}
266-
267275
#endif /* _ASM_X86_MMU_CONTEXT_H */

arch/x86/include/asm/pkeys.h

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
#ifndef _ASM_X86_PKEYS_H
22
#define _ASM_X86_PKEYS_H
33

4-
#define PKEY_DEDICATED_EXECUTE_ONLY 15
5-
/*
6-
* Consider the PKEY_DEDICATED_EXECUTE_ONLY key unavailable.
7-
*/
8-
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? \
9-
PKEY_DEDICATED_EXECUTE_ONLY : 1)
4+
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
105

116
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
127
unsigned long init_val);
@@ -40,4 +35,70 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
4035

4136
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | VM_PKEY_BIT3)
4237

38+
#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
39+
#define mm_set_pkey_allocated(mm, pkey) do { \
40+
mm_pkey_allocation_map(mm) |= (1U << pkey); \
41+
} while (0)
42+
#define mm_set_pkey_free(mm, pkey) do { \
43+
mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
44+
} while (0)
45+
46+
static inline
47+
bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
48+
{
49+
return mm_pkey_allocation_map(mm) & (1U << pkey);
50+
}
51+
52+
/*
53+
* Returns a positive, 4-bit key on success, or -1 on failure.
54+
*/
55+
static inline
56+
int mm_pkey_alloc(struct mm_struct *mm)
57+
{
58+
/*
59+
* Note: this is the one and only place we make sure
60+
* that the pkey is valid as far as the hardware is
61+
* concerned. The rest of the kernel trusts that
62+
* only good, valid pkeys come out of here.
63+
*/
64+
u16 all_pkeys_mask = ((1U << arch_max_pkey()) - 1);
65+
int ret;
66+
67+
/*
68+
* Are we out of pkeys? We must handle this specially
69+
* because ffz() behavior is undefined if there are no
70+
* zeros.
71+
*/
72+
if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
73+
return -1;
74+
75+
ret = ffz(mm_pkey_allocation_map(mm));
76+
77+
mm_set_pkey_allocated(mm, ret);
78+
79+
return ret;
80+
}
81+
82+
static inline
83+
int mm_pkey_free(struct mm_struct *mm, int pkey)
84+
{
85+
/*
86+
* pkey 0 is special, always allocated and can never
87+
* be freed.
88+
*/
89+
if (!pkey)
90+
return -EINVAL;
91+
if (!mm_pkey_is_allocated(mm, pkey))
92+
return -EINVAL;
93+
94+
mm_set_pkey_free(mm, pkey);
95+
96+
return 0;
97+
}
98+
99+
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
100+
unsigned long init_val);
101+
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
102+
unsigned long init_val);
103+
43104
#endif /*_ASM_X86_PKEYS_H */

arch/x86/kernel/fpu/xstate.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66
#include <linux/compat.h>
77
#include <linux/cpu.h>
8+
#include <linux/mman.h>
89
#include <linux/pkeys.h>
910

1011
#include <asm/fpu/api.h>
@@ -866,9 +867,10 @@ const void *get_xsave_field_ptr(int xsave_state)
866867
return get_xsave_addr(&fpu->state.xsave, xsave_state);
867868
}
868869

870+
#ifdef CONFIG_ARCH_HAS_PKEYS
871+
869872
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
870873
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
871-
872874
/*
873875
* This will go out and modify PKRU register to set the access
874876
* rights for @pkey to @init_val.
@@ -914,6 +916,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
914916

915917
return 0;
916918
}
919+
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
917920

918921
/*
919922
* This is similar to user_regset_copyout(), but will not add offset to

arch/x86/mm/pkeys.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,19 @@
2121

2222
int __execute_only_pkey(struct mm_struct *mm)
2323
{
24+
bool need_to_set_mm_pkey = false;
25+
int execute_only_pkey = mm->context.execute_only_pkey;
2426
int ret;
2527

28+
/* Do we need to assign a pkey for mm's execute-only maps? */
29+
if (execute_only_pkey == -1) {
30+
/* Go allocate one to use, which might fail */
31+
execute_only_pkey = mm_pkey_alloc(mm);
32+
if (execute_only_pkey < 0)
33+
return -1;
34+
need_to_set_mm_pkey = true;
35+
}
36+
2637
/*
2738
* We do not want to go through the relatively costly
2839
* dance to set PKRU if we do not need to. Check it
@@ -32,30 +43,41 @@ int __execute_only_pkey(struct mm_struct *mm)
3243
* can make fpregs inactive.
3344
*/
3445
preempt_disable();
35-
if (fpregs_active() &&
36-
!__pkru_allows_read(read_pkru(), PKEY_DEDICATED_EXECUTE_ONLY)) {
46+
if (!need_to_set_mm_pkey &&
47+
fpregs_active() &&
48+
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
3749
preempt_enable();
38-
return PKEY_DEDICATED_EXECUTE_ONLY;
50+
return execute_only_pkey;
3951
}
4052
preempt_enable();
41-
ret = arch_set_user_pkey_access(current, PKEY_DEDICATED_EXECUTE_ONLY,
53+
54+
/*
55+
* Set up PKRU so that it denies access for everything
56+
* other than execution.
57+
*/
58+
ret = arch_set_user_pkey_access(current, execute_only_pkey,
4259
PKEY_DISABLE_ACCESS);
4360
/*
4461
* If the PKRU-set operation failed somehow, just return
4562
* 0 and effectively disable execute-only support.
4663
*/
47-
if (ret)
48-
return 0;
64+
if (ret) {
65+
mm_set_pkey_free(mm, execute_only_pkey);
66+
return -1;
67+
}
4968

50-
return PKEY_DEDICATED_EXECUTE_ONLY;
69+
/* We got one, store it and use it from here on out */
70+
if (need_to_set_mm_pkey)
71+
mm->context.execute_only_pkey = execute_only_pkey;
72+
return execute_only_pkey;
5173
}
5274

5375
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
5476
{
5577
/* Do this check first since the vm_flags should be hot */
5678
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
5779
return false;
58-
if (vma_pkey(vma) != PKEY_DEDICATED_EXECUTE_ONLY)
80+
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
5981
return false;
6082

6183
return true;

arch/xtensa/include/uapi/asm/mman.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,9 @@
117117
#define MAP_HUGE_SHIFT 26
118118
#define MAP_HUGE_MASK 0x3f
119119

120+
#define PKEY_DISABLE_ACCESS 0x1
121+
#define PKEY_DISABLE_WRITE 0x2
122+
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
123+
PKEY_DISABLE_WRITE)
124+
120125
#endif /* _XTENSA_MMAN_H */

include/linux/pkeys.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
#include <linux/mm_types.h>
55
#include <asm/mmu_context.h>
66

7-
#define PKEY_DISABLE_ACCESS 0x1
8-
#define PKEY_DISABLE_WRITE 0x2
9-
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
10-
PKEY_DISABLE_WRITE)
11-
127
#ifdef CONFIG_ARCH_HAS_PKEYS
138
#include <asm/pkeys.h>
149
#else /* ! CONFIG_ARCH_HAS_PKEYS */
@@ -17,6 +12,29 @@
1712
#define arch_override_mprotect_pkey(vma, prot, pkey) (0)
1813
#define PKEY_DEDICATED_EXECUTE_ONLY 0
1914
#define ARCH_VM_PKEY_FLAGS 0
15+
16+
static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
17+
{
18+
return (pkey == 0);
19+
}
20+
21+
static inline int mm_pkey_alloc(struct mm_struct *mm)
22+
{
23+
return -1;
24+
}
25+
26+
static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
27+
{
28+
WARN_ONCE(1, "free of protection key when disabled");
29+
return -EINVAL;
30+
}
31+
32+
static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
33+
unsigned long init_val)
34+
{
35+
return 0;
36+
}
37+
2038
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
2139

2240
#endif /* _LINUX_PKEYS_H */

include/uapi/asm-generic/mman-common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,9 @@
7272
#define MAP_HUGE_SHIFT 26
7373
#define MAP_HUGE_MASK 0x3f
7474

75+
#define PKEY_DISABLE_ACCESS 0x1
76+
#define PKEY_DISABLE_WRITE 0x2
77+
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
78+
PKEY_DISABLE_WRITE)
79+
7580
#endif /* __ASM_GENERIC_MMAN_COMMON_H */

0 commit comments

Comments
 (0)