Skip to content

Commit 8eb8284

Browse files
dwindsorkees
authored andcommitted
usercopy: Prepare for usercopy whitelisting
This patch prepares the slab allocator to handle caches having annotations (useroffset and usersize) defining usercopy regions. This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Currently, hardened usercopy performs dynamic bounds checking on slab cache objects. This is good, but still leaves a lot of kernel memory available to be copied to/from userspace in the face of bugs. To further restrict what memory is available for copying, this creates a way to whitelist specific areas of a given slab cache object for copying to/from userspace, allowing much finer granularity of access control. Slab caches that are never exposed to userspace can declare no whitelist for their objects, thereby keeping them unavailable to userspace via dynamic copy operations. (Note, an implicit form of whitelisting is the use of constant sizes in usercopy operations and get_user()/put_user(); these bypass hardened usercopy checks since these sizes cannot change at runtime.) To support this whitelist annotation, usercopy region offset and size members are added to struct kmem_cache. The slab allocator receives a new function, kmem_cache_create_usercopy(), that creates a new cache with a usercopy region defined, suitable for declaring spans of fields within the objects that get copied to/from userspace. In this patch, the default kmem_cache_create() marks the entire allocation as whitelisted, leaving it semantically unchanged. Once all fine-grained whitelists have been added (in subsequent patches), this will be changed to a usersize of 0, making caches created with kmem_cache_create() not copyable to/from userspace. After the entire usercopy whitelist series is applied, less than 15% of the slab cache memory remains exposed to potential usercopy bugs after a fresh boot: Total Slab Memory: 48074720 Usercopyable Memory: 6367532 13.2% task_struct 0.2% 4480/1630720 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 269760/8740224 dentry 11.1% 585984/5273856 mm_struct 29.1% 54912/188448 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 81920/81920 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 167936/167936 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 455616/455616 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 812032/812032 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1310720/1310720 After some kernel build workloads, the percentage (mainly driven by dentry and inode caches expanding) drops under 10%: Total Slab Memory: 95516184 Usercopyable Memory: 8497452 8.8% task_struct 0.2% 4000/1456000 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 1217280/39439872 dentry 11.1% 1623200/14608800 mm_struct 29.1% 73216/251264 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 94208/94208 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 245760/245760 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 563520/563520 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 794624/794624 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1257472/1257472 Signed-off-by: David Windsor <dave@nullcore.net> [kees: adjust commit log, split out a few extra kmalloc hunks] [kees: add field names to function declarations] [kees: convert BUGs to WARNs and fail closed] [kees: add attack surface reduction analysis to commit log] Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Signed-off-by: Kees Cook <keescook@chromium.org> Acked-by: Christoph Lameter <cl@linux.com>
1 parent 4229a47 commit 8eb8284

File tree

7 files changed

+79
-18
lines changed

7 files changed

+79
-18
lines changed

include/linux/slab.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,13 @@ struct mem_cgroup;
135135
void __init kmem_cache_init(void);
136136
bool slab_is_available(void);
137137

138-
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
139-
slab_flags_t,
140-
void (*)(void *));
138+
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
139+
size_t align, slab_flags_t flags,
140+
void (*ctor)(void *));
141+
struct kmem_cache *kmem_cache_create_usercopy(const char *name,
142+
size_t size, size_t align, slab_flags_t flags,
143+
size_t useroffset, size_t usersize,
144+
void (*ctor)(void *));
141145
void kmem_cache_destroy(struct kmem_cache *);
142146
int kmem_cache_shrink(struct kmem_cache *);
143147

@@ -153,9 +157,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *);
153157
* f.e. add ____cacheline_aligned_in_smp to the struct declaration
154158
* then the objects will be properly aligned in SMP configurations.
155159
*/
156-
#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
157-
sizeof(struct __struct), __alignof__(struct __struct),\
158-
(__flags), NULL)
160+
#define KMEM_CACHE(__struct, __flags) \
161+
kmem_cache_create(#__struct, sizeof(struct __struct), \
162+
__alignof__(struct __struct), (__flags), NULL)
163+
164+
/*
165+
* To whitelist a single field for copying to/from usercopy, use this
166+
* macro instead for KMEM_CACHE() above.
167+
*/
168+
#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \
169+
kmem_cache_create_usercopy(#__struct, \
170+
sizeof(struct __struct), \
171+
__alignof__(struct __struct), (__flags), \
172+
offsetof(struct __struct, __field), \
173+
sizeof_field(struct __struct, __field), NULL)
159174

160175
/*
161176
* Common kmalloc functions provided by all allocators

include/linux/slab_def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ struct kmem_cache {
8585
unsigned int *random_seq;
8686
#endif
8787

88+
size_t useroffset; /* Usercopy region offset */
89+
size_t usersize; /* Usercopy region size */
90+
8891
struct kmem_cache_node *node[MAX_NUMNODES];
8992
};
9093

include/linux/slub_def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ struct kmem_cache {
135135
struct kasan_cache kasan_info;
136136
#endif
137137

138+
size_t useroffset; /* Usercopy region offset */
139+
size_t usersize; /* Usercopy region size */
140+
138141
struct kmem_cache_node *node[MAX_NUMNODES];
139142
};
140143

mm/slab.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,7 @@ void __init kmem_cache_init(void)
12811281
create_boot_cache(kmem_cache, "kmem_cache",
12821282
offsetof(struct kmem_cache, node) +
12831283
nr_node_ids * sizeof(struct kmem_cache_node *),
1284-
SLAB_HWCACHE_ALIGN);
1284+
SLAB_HWCACHE_ALIGN, 0, 0);
12851285
list_add(&kmem_cache->list, &slab_caches);
12861286
slab_state = PARTIAL;
12871287

mm/slab.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ struct kmem_cache {
2222
unsigned int size; /* The aligned/padded/added on size */
2323
unsigned int align; /* Alignment as calculated */
2424
slab_flags_t flags; /* Active flags on the slab */
25+
size_t useroffset; /* Usercopy region offset */
26+
size_t usersize; /* Usercopy region size */
2527
const char *name; /* Slab name for sysfs */
2628
int refcount; /* Use counter */
2729
void (*ctor)(void *); /* Called on object slot creation */
@@ -97,7 +99,8 @@ int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
9799
extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
98100
slab_flags_t flags);
99101
extern void create_boot_cache(struct kmem_cache *, const char *name,
100-
size_t size, slab_flags_t flags);
102+
size_t size, slab_flags_t flags, size_t useroffset,
103+
size_t usersize);
101104

102105
int slab_unmergeable(struct kmem_cache *s);
103106
struct kmem_cache *find_mergeable(size_t size, size_t align,

mm/slab_common.c

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,9 @@ int slab_unmergeable(struct kmem_cache *s)
281281
if (s->ctor)
282282
return 1;
283283

284+
if (s->usersize)
285+
return 1;
286+
284287
/*
285288
* We may have set a slab to be unmergeable during bootstrap.
286289
*/
@@ -366,12 +369,16 @@ unsigned long calculate_alignment(slab_flags_t flags,
366369

367370
static struct kmem_cache *create_cache(const char *name,
368371
size_t object_size, size_t size, size_t align,
369-
slab_flags_t flags, void (*ctor)(void *),
372+
slab_flags_t flags, size_t useroffset,
373+
size_t usersize, void (*ctor)(void *),
370374
struct mem_cgroup *memcg, struct kmem_cache *root_cache)
371375
{
372376
struct kmem_cache *s;
373377
int err;
374378

379+
if (WARN_ON(useroffset + usersize > object_size))
380+
useroffset = usersize = 0;
381+
375382
err = -ENOMEM;
376383
s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
377384
if (!s)
@@ -382,6 +389,8 @@ static struct kmem_cache *create_cache(const char *name,
382389
s->size = size;
383390
s->align = align;
384391
s->ctor = ctor;
392+
s->useroffset = useroffset;
393+
s->usersize = usersize;
385394

386395
err = init_memcg_params(s, memcg, root_cache);
387396
if (err)
@@ -406,11 +415,13 @@ static struct kmem_cache *create_cache(const char *name,
406415
}
407416

408417
/*
409-
* kmem_cache_create - Create a cache.
418+
* kmem_cache_create_usercopy - Create a cache.
410419
* @name: A string which is used in /proc/slabinfo to identify this cache.
411420
* @size: The size of objects to be created in this cache.
412421
* @align: The required alignment for the objects.
413422
* @flags: SLAB flags
423+
* @useroffset: Usercopy region offset
424+
* @usersize: Usercopy region size
414425
* @ctor: A constructor for the objects.
415426
*
416427
* Returns a ptr to the cache on success, NULL on failure.
@@ -430,8 +441,9 @@ static struct kmem_cache *create_cache(const char *name,
430441
* as davem.
431442
*/
432443
struct kmem_cache *
433-
kmem_cache_create(const char *name, size_t size, size_t align,
434-
slab_flags_t flags, void (*ctor)(void *))
444+
kmem_cache_create_usercopy(const char *name, size_t size, size_t align,
445+
slab_flags_t flags, size_t useroffset, size_t usersize,
446+
void (*ctor)(void *))
435447
{
436448
struct kmem_cache *s = NULL;
437449
const char *cache_name;
@@ -462,7 +474,13 @@ kmem_cache_create(const char *name, size_t size, size_t align,
462474
*/
463475
flags &= CACHE_CREATE_MASK;
464476

465-
s = __kmem_cache_alias(name, size, align, flags, ctor);
477+
/* Fail closed on bad usersize of useroffset values. */
478+
if (WARN_ON(!usersize && useroffset) ||
479+
WARN_ON(size < usersize || size - usersize < useroffset))
480+
usersize = useroffset = 0;
481+
482+
if (!usersize)
483+
s = __kmem_cache_alias(name, size, align, flags, ctor);
466484
if (s)
467485
goto out_unlock;
468486

@@ -474,7 +492,7 @@ kmem_cache_create(const char *name, size_t size, size_t align,
474492

475493
s = create_cache(cache_name, size, size,
476494
calculate_alignment(flags, align, size),
477-
flags, ctor, NULL, NULL);
495+
flags, useroffset, usersize, ctor, NULL, NULL);
478496
if (IS_ERR(s)) {
479497
err = PTR_ERR(s);
480498
kfree_const(cache_name);
@@ -500,6 +518,15 @@ kmem_cache_create(const char *name, size_t size, size_t align,
500518
}
501519
return s;
502520
}
521+
EXPORT_SYMBOL(kmem_cache_create_usercopy);
522+
523+
struct kmem_cache *
524+
kmem_cache_create(const char *name, size_t size, size_t align,
525+
slab_flags_t flags, void (*ctor)(void *))
526+
{
527+
return kmem_cache_create_usercopy(name, size, align, flags, 0, size,
528+
ctor);
529+
}
503530
EXPORT_SYMBOL(kmem_cache_create);
504531

505532
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
@@ -612,6 +639,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
612639
s = create_cache(cache_name, root_cache->object_size,
613640
root_cache->size, root_cache->align,
614641
root_cache->flags & CACHE_CREATE_MASK,
642+
root_cache->useroffset, root_cache->usersize,
615643
root_cache->ctor, memcg, root_cache);
616644
/*
617645
* If we could not create a memcg cache, do not complain, because
@@ -879,13 +907,15 @@ bool slab_is_available(void)
879907
#ifndef CONFIG_SLOB
880908
/* Create a cache during boot when no slab services are available yet */
881909
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
882-
slab_flags_t flags)
910+
slab_flags_t flags, size_t useroffset, size_t usersize)
883911
{
884912
int err;
885913

886914
s->name = name;
887915
s->size = s->object_size = size;
888916
s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
917+
s->useroffset = useroffset;
918+
s->usersize = usersize;
889919

890920
slab_init_memcg_params(s);
891921

@@ -906,7 +936,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
906936
if (!s)
907937
panic("Out of memory when creating slab %s\n", name);
908938

909-
create_boot_cache(s, name, size, flags);
939+
create_boot_cache(s, name, size, flags, 0, size);
910940
list_add(&s->list, &slab_caches);
911941
memcg_link_cache(s);
912942
s->refcount = 1;

mm/slub.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4183,7 +4183,7 @@ void __init kmem_cache_init(void)
41834183
kmem_cache = &boot_kmem_cache;
41844184

41854185
create_boot_cache(kmem_cache_node, "kmem_cache_node",
4186-
sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
4186+
sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
41874187

41884188
register_hotmemory_notifier(&slab_memory_callback_nb);
41894189

@@ -4193,7 +4193,7 @@ void __init kmem_cache_init(void)
41934193
create_boot_cache(kmem_cache, "kmem_cache",
41944194
offsetof(struct kmem_cache, node) +
41954195
nr_node_ids * sizeof(struct kmem_cache_node *),
4196-
SLAB_HWCACHE_ALIGN);
4196+
SLAB_HWCACHE_ALIGN, 0, 0);
41974197

41984198
kmem_cache = bootstrap(&boot_kmem_cache);
41994199

@@ -5063,6 +5063,12 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
50635063
SLAB_ATTR_RO(cache_dma);
50645064
#endif
50655065

5066+
static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5067+
{
5068+
return sprintf(buf, "%zu\n", s->usersize);
5069+
}
5070+
SLAB_ATTR_RO(usersize);
5071+
50665072
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
50675073
{
50685074
return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
@@ -5437,6 +5443,7 @@ static struct attribute *slab_attrs[] = {
54375443
#ifdef CONFIG_FAILSLAB
54385444
&failslab_attr.attr,
54395445
#endif
5446+
&usersize_attr.attr,
54405447

54415448
NULL
54425449
};

0 commit comments

Comments
 (0)