anolis: kfence: support kfence pool per numa node
OpenAnolis Bug Tracker: 0000278 The original kfence is designed for high efficiency so that the pool size is small. Since we enlarged the max value of num_objects to million level, it is necessary to support numa. To avoid confusion, the boot cmdline parameter kfence.num_objects is changed to kfence.num_objects_pernode. Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com> Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
This commit is contained in:
parent
1b171f3246
commit
c6d8e4ce9b
|
@ -55,6 +55,11 @@ The total memory dedicated to the KFENCE memory pool can be computed as::
|
||||||
Using the default config, and assuming a page size of 4 KiB, results in
|
Using the default config, and assuming a page size of 4 KiB, results in
|
||||||
dedicating 2 MiB to the KFENCE memory pool.
|
dedicating 2 MiB to the KFENCE memory pool.
|
||||||
|
|
||||||
|
You can change the KFENCE memory pool size by setting ``kfence.num_objects_pernode``
|
||||||
|
in boot command line, and the pool size of each node will be computed and updated
|
||||||
|
in the same way as above. You can set this value as large as possible, so
|
||||||
|
please be careful DO NOT use up all memorys.
|
||||||
|
|
||||||
Note: On architectures that support huge pages, KFENCE will ensure that the
|
Note: On architectures that support huge pages, KFENCE will ensure that the
|
||||||
pool is using pages of size ``PAGE_SIZE``. This will result in additional page
|
pool is using pages of size ``PAGE_SIZE``. This will result in additional page
|
||||||
tables being allocated.
|
tables being allocated.
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
|
|
||||||
static inline bool arch_kfence_init_pool(void) { return true; }
|
static inline bool arch_kfence_init_pool(int node) { return true; }
|
||||||
|
|
||||||
static inline bool kfence_protect_page(unsigned long addr, bool protect)
|
static inline bool kfence_protect_page(unsigned long addr, bool protect)
|
||||||
{
|
{
|
||||||
|
|
|
@ -19,11 +19,12 @@
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
/* Force 4K pages for __kfence_pool. */
|
/* Force 4K pages for __kfence_pool. */
|
||||||
static inline bool arch_kfence_init_pool(void)
|
static inline bool arch_kfence_init_pool(int node)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
|
char *__kfence_pool = __kfence_pool_node[node];
|
||||||
|
|
||||||
for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
|
for (addr = (unsigned long)__kfence_pool; is_kfence_address_node((void *)addr, node);
|
||||||
addr += PAGE_SIZE) {
|
addr += PAGE_SIZE) {
|
||||||
unsigned int level;
|
unsigned int level;
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
*/
|
*/
|
||||||
#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
|
#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
|
||||||
extern unsigned long kfence_pool_size;
|
extern unsigned long kfence_pool_size;
|
||||||
extern char *__kfence_pool;
|
extern char **__kfence_pool_node;
|
||||||
|
|
||||||
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
||||||
#include <linux/static_key.h>
|
#include <linux/static_key.h>
|
||||||
|
@ -31,6 +31,22 @@ DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
|
||||||
extern atomic_t kfence_allocation_gate;
|
extern atomic_t kfence_allocation_gate;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* is_kfence_address_node() - check if an address belongs to KFENCE pool on given node
|
||||||
|
* @addr: address to check
|
||||||
|
* @node: node to check
|
||||||
|
*
|
||||||
|
* Return: true or false depending on whether the address is within the KFENCE
|
||||||
|
* object range on given node.
|
||||||
|
*
|
||||||
|
* This function is used when you already know the node.
|
||||||
|
*/
|
||||||
|
static __always_inline bool is_kfence_address_node(const void *addr, const int node)
|
||||||
|
{
|
||||||
|
return unlikely((unsigned long)((char *)addr - __kfence_pool_node[node]) <
|
||||||
|
kfence_pool_size && __kfence_pool_node[node]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* is_kfence_address() - check if an address belongs to KFENCE pool
|
* is_kfence_address() - check if an address belongs to KFENCE pool
|
||||||
* @addr: address to check
|
* @addr: address to check
|
||||||
|
@ -51,13 +67,10 @@ extern atomic_t kfence_allocation_gate;
|
||||||
*/
|
*/
|
||||||
static __always_inline bool is_kfence_address(const void *addr)
|
static __always_inline bool is_kfence_address(const void *addr)
|
||||||
{
|
{
|
||||||
/*
|
if (unlikely(!virt_addr_valid(addr)))
|
||||||
* The __kfence_pool != NULL check is required to deal with the case
|
return false;
|
||||||
* where __kfence_pool == NULL && addr < kfence_pool_size. Keep it in
|
|
||||||
* the slow-path after the range-check!
|
return unlikely(is_kfence_address_node(addr, page_to_nid(virt_to_page(addr))));
|
||||||
*/
|
|
||||||
return unlikely((unsigned long)((char *)addr - __kfence_pool) <
|
|
||||||
kfence_pool_size && __kfence_pool);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -96,9 +109,9 @@ void kfence_shutdown_cache(struct kmem_cache *s);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate a KFENCE object. Allocators must not call this function directly,
|
* Allocate a KFENCE object. Allocators must not call this function directly,
|
||||||
* use kfence_alloc() instead.
|
* use kfence_alloc() or kfence_alloc_node() instead.
|
||||||
*/
|
*/
|
||||||
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
|
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags, int node);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kfence_alloc() - allocate a KFENCE object with a low probability
|
* kfence_alloc() - allocate a KFENCE object with a low probability
|
||||||
|
@ -123,7 +136,36 @@ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp
|
||||||
#else
|
#else
|
||||||
if (unlikely(!atomic_read(&kfence_allocation_gate)))
|
if (unlikely(!atomic_read(&kfence_allocation_gate)))
|
||||||
#endif
|
#endif
|
||||||
return __kfence_alloc(s, size, flags);
|
return __kfence_alloc(s, size, flags, NUMA_NO_NODE);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kfence_alloc_node() - allocate a KFENCE object with a low probability
|
||||||
|
* @s: struct kmem_cache with object requirements
|
||||||
|
* @size: exact size of the object to allocate (can be less than @s->size
|
||||||
|
* e.g. for kmalloc caches)
|
||||||
|
* @flags: GFP flags
|
||||||
|
* @node: alloc from kfence pool on which node
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* * NULL - must proceed with allocating as usual,
|
||||||
|
* * non-NULL - pointer to a KFENCE object.
|
||||||
|
*
|
||||||
|
* kfence_alloc_node() should be inserted into the heap allocation fast path,
|
||||||
|
* allowing it to transparently return KFENCE-allocated objects with a low
|
||||||
|
* probability using a static branch (the probability is controlled by the
|
||||||
|
* kfence.sample_interval boot parameter).
|
||||||
|
*/
|
||||||
|
static __always_inline void *kfence_alloc_node(struct kmem_cache *s, size_t size, gfp_t flags,
|
||||||
|
int node)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
||||||
|
if (static_branch_unlikely(&kfence_allocation_key))
|
||||||
|
#else
|
||||||
|
if (unlikely(!atomic_read(&kfence_allocation_gate)))
|
||||||
|
#endif
|
||||||
|
return __kfence_alloc(s, size, flags, node);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,11 +247,16 @@ bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, st
|
||||||
|
|
||||||
#else /* CONFIG_KFENCE */
|
#else /* CONFIG_KFENCE */
|
||||||
|
|
||||||
|
static inline bool is_kfence_address_node(const void *addr, const int node) { return false; }
|
||||||
static inline bool is_kfence_address(const void *addr) { return false; }
|
static inline bool is_kfence_address(const void *addr) { return false; }
|
||||||
static inline void kfence_alloc_pool(void) { }
|
static inline void kfence_alloc_pool(void) { }
|
||||||
static inline void kfence_init(void) { }
|
static inline void kfence_init(void) { }
|
||||||
static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
|
static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
|
||||||
static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
|
static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
|
||||||
|
static inline void *kfence_alloc_node(struct kmem_cache *s, size_t size, gfp_t flags, int node)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
static inline size_t kfence_ksize(const void *addr) { return 0; }
|
static inline size_t kfence_ksize(const void *addr) { return 0; }
|
||||||
static inline void *kfence_object_start(const void *addr) { return NULL; }
|
static inline void *kfence_object_start(const void *addr) { return NULL; }
|
||||||
static inline void __kfence_free(void *addr) { }
|
static inline void __kfence_free(void *addr) { }
|
||||||
|
|
|
@ -52,10 +52,10 @@ config KFENCE_NUM_OBJECTS
|
||||||
range 1 10000000
|
range 1 10000000
|
||||||
default 255
|
default 255
|
||||||
help
|
help
|
||||||
The number of guarded objects available. For each KFENCE object, 2
|
The number of guarded objects available per numa node. For each
|
||||||
pages are required; with one containing the object and two adjacent
|
KFENCE object, 2 pages are required; with one containing the object
|
||||||
ones used as guard pages. May be overridden via boot parameter
|
and two adjacent ones used as guard pages. May be overridden via boot
|
||||||
"kfence.num_objects".
|
parameter "kfence.num_objects_pernode".
|
||||||
|
|
||||||
config KFENCE_STRESS_TEST_FAULTS
|
config KFENCE_STRESS_TEST_FAULTS
|
||||||
int "Stress testing of fault handling and error reporting" if EXPERT
|
int "Stress testing of fault handling and error reporting" if EXPERT
|
||||||
|
|
230
mm/kfence/core.c
230
mm/kfence/core.c
|
@ -114,21 +114,28 @@ static const struct kernel_param_ops num_objects_param_ops = {
|
||||||
.set = param_set_num_objects,
|
.set = param_set_num_objects,
|
||||||
.get = param_get_num_objects,
|
.get = param_get_num_objects,
|
||||||
};
|
};
|
||||||
module_param_cb(num_objects, &num_objects_param_ops, &kfence_num_objects, 0600);
|
module_param_cb(num_objects_pernode, &num_objects_param_ops, &kfence_num_objects, 0600);
|
||||||
|
|
||||||
/* The pool of pages used for guard pages and objects. */
|
/* The pool of pages used for guard pages and objects. */
|
||||||
char *__kfence_pool __ro_after_init;
|
char **__kfence_pool_node __ro_after_init;
|
||||||
EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
|
EXPORT_SYMBOL(__kfence_pool_node);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per-object metadata, with one-to-one mapping of object metadata to
|
* Per-object metadata, with one-to-one mapping of object metadata to
|
||||||
* backing pages (in __kfence_pool).
|
* backing pages (in __kfence_pool).
|
||||||
*/
|
*/
|
||||||
struct kfence_metadata *kfence_metadata;
|
struct kfence_metadata **kfence_metadata_node;
|
||||||
|
|
||||||
/* Freelist with available objects. */
|
/* Freelist with available objects. */
|
||||||
static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
|
struct kfence_freelist_node {
|
||||||
static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
|
struct list_head freelist;
|
||||||
|
raw_spinlock_t lock;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct kfence_freelist {
|
||||||
|
struct kfence_freelist_node *node;
|
||||||
|
};
|
||||||
|
static struct kfence_freelist freelist;
|
||||||
|
|
||||||
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
||||||
/* The static key to set up a KFENCE allocation. */
|
/* The static key to set up a KFENCE allocation. */
|
||||||
|
@ -172,11 +179,20 @@ static bool kfence_unprotect(unsigned long addr)
|
||||||
static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
|
static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
|
||||||
{
|
{
|
||||||
long index;
|
long index;
|
||||||
|
int node;
|
||||||
|
char *__kfence_pool;
|
||||||
|
struct kfence_metadata *kfence_metadata;
|
||||||
|
|
||||||
/* The checks do not affect performance; only called from slow-paths. */
|
/* The checks do not affect performance; only called from slow-paths. */
|
||||||
|
|
||||||
if (!is_kfence_address((void *)addr))
|
if (!virt_addr_valid(addr))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
node = virt_to_nid(addr);
|
||||||
|
if (!is_kfence_address_node((void *)addr, node))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
__kfence_pool = __kfence_pool_node[node];
|
||||||
|
kfence_metadata = kfence_metadata_node[node];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* May be an invalid index if called with an address at the edge of
|
* May be an invalid index if called with an address at the edge of
|
||||||
|
@ -190,8 +206,10 @@ static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
|
||||||
return &kfence_metadata[index];
|
return &kfence_metadata[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
|
static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta, int node)
|
||||||
{
|
{
|
||||||
|
char *__kfence_pool = __kfence_pool_node[node];
|
||||||
|
struct kfence_metadata *kfence_metadata = kfence_metadata_node[node];
|
||||||
unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
|
unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
|
||||||
unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
|
unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
|
||||||
|
|
||||||
|
@ -290,20 +308,24 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp)
|
static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, int node)
|
||||||
{
|
{
|
||||||
struct kfence_metadata *meta = NULL;
|
struct kfence_metadata *meta = NULL;
|
||||||
|
struct kfence_freelist_node *kfence_freelist = &freelist.node[node];
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
void *addr;
|
void *addr;
|
||||||
|
|
||||||
|
if (unlikely(!__kfence_pool_node[node]))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* Try to obtain a free object. */
|
/* Try to obtain a free object. */
|
||||||
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
|
raw_spin_lock_irqsave(&kfence_freelist->lock, flags);
|
||||||
if (!list_empty(&kfence_freelist)) {
|
if (!list_empty(&kfence_freelist->freelist)) {
|
||||||
meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
|
meta = list_entry(kfence_freelist->freelist.next, struct kfence_metadata, list);
|
||||||
list_del_init(&meta->list);
|
list_del_init(&meta->list);
|
||||||
}
|
}
|
||||||
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
|
raw_spin_unlock_irqrestore(&kfence_freelist->lock, flags);
|
||||||
if (!meta)
|
if (!meta)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -317,15 +339,15 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
|
||||||
* report that there is a possibility of deadlock. Fix it by
|
* report that there is a possibility of deadlock. Fix it by
|
||||||
* using trylock and bailing out gracefully.
|
* using trylock and bailing out gracefully.
|
||||||
*/
|
*/
|
||||||
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
|
raw_spin_lock_irqsave(&kfence_freelist->lock, flags);
|
||||||
/* Put the object back on the freelist. */
|
/* Put the object back on the freelist. */
|
||||||
list_add_tail(&meta->list, &kfence_freelist);
|
list_add_tail(&meta->list, &kfence_freelist->freelist);
|
||||||
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
|
raw_spin_unlock_irqrestore(&kfence_freelist->lock, flags);
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
meta->addr = metadata_to_pageaddr(meta);
|
meta->addr = metadata_to_pageaddr(meta, node);
|
||||||
/* Unprotect if we're reusing this page. */
|
/* Unprotect if we're reusing this page. */
|
||||||
if (meta->state == KFENCE_OBJECT_FREED)
|
if (meta->state == KFENCE_OBJECT_FREED)
|
||||||
kfence_unprotect(meta->addr);
|
kfence_unprotect(meta->addr);
|
||||||
|
@ -386,7 +408,9 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
|
||||||
|
|
||||||
static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
|
static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
|
||||||
{
|
{
|
||||||
|
int node = virt_to_nid(addr);
|
||||||
struct kcsan_scoped_access assert_page_exclusive;
|
struct kcsan_scoped_access assert_page_exclusive;
|
||||||
|
struct kfence_freelist_node *kfence_freelist = &freelist.node[node];
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&meta->lock, flags);
|
raw_spin_lock_irqsave(&meta->lock, flags);
|
||||||
|
@ -437,10 +461,10 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
|
||||||
kcsan_end_scoped_access(&assert_page_exclusive);
|
kcsan_end_scoped_access(&assert_page_exclusive);
|
||||||
if (!zombie) {
|
if (!zombie) {
|
||||||
/* Add it to the tail of the freelist for reuse. */
|
/* Add it to the tail of the freelist for reuse. */
|
||||||
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
|
raw_spin_lock_irqsave(&kfence_freelist->lock, flags);
|
||||||
KFENCE_WARN_ON(!list_empty(&meta->list));
|
KFENCE_WARN_ON(!list_empty(&meta->list));
|
||||||
list_add_tail(&meta->list, &kfence_freelist);
|
list_add_tail(&meta->list, &kfence_freelist->freelist);
|
||||||
raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
|
raw_spin_unlock_irqrestore(&kfence_freelist->lock, flags);
|
||||||
|
|
||||||
atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
|
atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
|
||||||
atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
|
atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
|
||||||
|
@ -457,16 +481,20 @@ static void rcu_guarded_free(struct rcu_head *h)
|
||||||
kfence_guarded_free((void *)meta->addr, meta, false);
|
kfence_guarded_free((void *)meta->addr, meta, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool __init kfence_init_pool(void)
|
static bool __init kfence_init_pool_node(int node)
|
||||||
{
|
{
|
||||||
|
char *__kfence_pool = __kfence_pool_node[node];
|
||||||
|
struct kfence_metadata *kfence_metadata = kfence_metadata_node[node];
|
||||||
|
struct kfence_freelist_node *kfence_freelist = &freelist.node[node];
|
||||||
unsigned long addr = (unsigned long)__kfence_pool;
|
unsigned long addr = (unsigned long)__kfence_pool;
|
||||||
|
phys_addr_t metadata_size = sizeof(struct kfence_metadata) * kfence_num_objects;
|
||||||
struct page *pages;
|
struct page *pages;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!__kfence_pool)
|
if (!__kfence_pool)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!arch_kfence_init_pool())
|
if (!arch_kfence_init_pool(node))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
pages = virt_to_page(addr);
|
pages = virt_to_page(addr);
|
||||||
|
@ -511,7 +539,7 @@ static bool __init kfence_init_pool(void)
|
||||||
raw_spin_lock_init(&meta->lock);
|
raw_spin_lock_init(&meta->lock);
|
||||||
meta->state = KFENCE_OBJECT_UNUSED;
|
meta->state = KFENCE_OBJECT_UNUSED;
|
||||||
meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
|
meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
|
||||||
list_add_tail(&meta->list, &kfence_freelist);
|
list_add_tail(&meta->list, &kfence_freelist->freelist);
|
||||||
|
|
||||||
/* Protect the right redzone. */
|
/* Protect the right redzone. */
|
||||||
if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
|
if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
|
||||||
|
@ -526,6 +554,7 @@ static bool __init kfence_init_pool(void)
|
||||||
* otherwise overlap with allocations returned by kfence_alloc(), which
|
* otherwise overlap with allocations returned by kfence_alloc(), which
|
||||||
* are registered with kmemleak through the slab post-alloc hook.
|
* are registered with kmemleak through the slab post-alloc hook.
|
||||||
*/
|
*/
|
||||||
|
kmemleak_free(kfence_metadata);
|
||||||
kmemleak_free(__kfence_pool);
|
kmemleak_free(__kfence_pool);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -539,10 +568,31 @@ err:
|
||||||
* most failure cases.
|
* most failure cases.
|
||||||
*/
|
*/
|
||||||
memblock_free_late(__pa(addr), kfence_pool_size - (addr - (unsigned long)__kfence_pool));
|
memblock_free_late(__pa(addr), kfence_pool_size - (addr - (unsigned long)__kfence_pool));
|
||||||
__kfence_pool = NULL;
|
memblock_free_late(__pa(kfence_metadata), metadata_size);
|
||||||
|
__kfence_pool_node[node] = NULL;
|
||||||
|
kfence_metadata_node[node] = NULL;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool __init kfence_init_pool(void)
|
||||||
|
{
|
||||||
|
int node;
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
for_each_node(node) {
|
||||||
|
if (kfence_init_pool_node(node))
|
||||||
|
ret = true;
|
||||||
|
else
|
||||||
|
pr_err("failed to init kfence pool on node %d\n", node);
|
||||||
|
}
|
||||||
|
|
||||||
|
kmemleak_free(kfence_metadata_node);
|
||||||
|
kmemleak_free(__kfence_pool_node);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* === DebugFS Interface ==================================================== */
|
/* === DebugFS Interface ==================================================== */
|
||||||
|
|
||||||
static int stats_show(struct seq_file *seq, void *v)
|
static int stats_show(struct seq_file *seq, void *v)
|
||||||
|
@ -564,7 +614,7 @@ DEFINE_SHOW_ATTRIBUTE(stats);
|
||||||
*/
|
*/
|
||||||
static void *start_object(struct seq_file *seq, loff_t *pos)
|
static void *start_object(struct seq_file *seq, loff_t *pos)
|
||||||
{
|
{
|
||||||
if (*pos < kfence_num_objects)
|
if (*pos < kfence_num_objects * nr_node_ids)
|
||||||
return (void *)((long)*pos + 1);
|
return (void *)((long)*pos + 1);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -576,21 +626,28 @@ static void stop_object(struct seq_file *seq, void *v)
|
||||||
static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
|
static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
|
||||||
{
|
{
|
||||||
++*pos;
|
++*pos;
|
||||||
if (*pos < kfence_num_objects)
|
if (*pos < kfence_num_objects * nr_node_ids)
|
||||||
return (void *)((long)*pos + 1);
|
return (void *)((long)*pos + 1);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int show_object(struct seq_file *seq, void *v)
|
static int show_object(struct seq_file *seq, void *v)
|
||||||
{
|
{
|
||||||
struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
|
long pos = (long)v - 1;
|
||||||
|
int node = pos / kfence_num_objects;
|
||||||
|
struct kfence_metadata *meta;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
char buf[20];
|
||||||
|
|
||||||
if (!kfence_metadata)
|
if (!kfence_metadata_node[node])
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
pos %= kfence_num_objects;
|
||||||
|
sprintf(buf, "node %d:\n", node);
|
||||||
|
seq_puts(seq, buf);
|
||||||
|
meta = &kfence_metadata_node[node][pos];
|
||||||
raw_spin_lock_irqsave(&meta->lock, flags);
|
raw_spin_lock_irqsave(&meta->lock, flags);
|
||||||
kfence_print_object(seq, meta);
|
kfence_print_object(seq, meta, node);
|
||||||
raw_spin_unlock_irqrestore(&meta->lock, flags);
|
raw_spin_unlock_irqrestore(&meta->lock, flags);
|
||||||
seq_puts(seq, "---------------------------------\n");
|
seq_puts(seq, "---------------------------------\n");
|
||||||
|
|
||||||
|
@ -684,57 +741,93 @@ static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
|
||||||
|
|
||||||
void __init kfence_alloc_pool(void)
|
void __init kfence_alloc_pool(void)
|
||||||
{
|
{
|
||||||
|
int node;
|
||||||
phys_addr_t metadata_size = sizeof(struct kfence_metadata) * kfence_num_objects;
|
phys_addr_t metadata_size = sizeof(struct kfence_metadata) * kfence_num_objects;
|
||||||
|
|
||||||
if (!kfence_sample_interval)
|
kfence_metadata_node = memblock_alloc(sizeof(struct kfence_metadata *) *
|
||||||
return;
|
nr_node_ids, PAGE_SIZE);
|
||||||
|
__kfence_pool_node = memblock_alloc(sizeof(char *) * nr_node_ids, PAGE_SIZE);
|
||||||
|
|
||||||
kfence_metadata = memblock_alloc(metadata_size, PAGE_SIZE);
|
/* Setting kfence_sample_interval or kfence_num_objects to 0 on boot disables KFENCE. */
|
||||||
|
if (!READ_ONCE(kfence_sample_interval) || !kfence_metadata_node || !__kfence_pool_node) {
|
||||||
if (!kfence_metadata) {
|
WRITE_ONCE(kfence_sample_interval, 0);
|
||||||
pr_err("failed to allocate metadata\n");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kfence_pool = memblock_alloc(READ_ONCE(kfence_pool_size), PAGE_SIZE);
|
for_each_node(node) {
|
||||||
|
kfence_metadata_node[node] = memblock_alloc_node(metadata_size, PAGE_SIZE, node);
|
||||||
|
if (!kfence_metadata_node[node]) {
|
||||||
|
pr_err("kfence alloc metadata on node %d failed\n", node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!__kfence_pool) {
|
__kfence_pool_node[node] = memblock_alloc_node(kfence_pool_size, PAGE_SIZE, node);
|
||||||
memblock_free(__pa(kfence_metadata), metadata_size);
|
if (!__kfence_pool_node[node]) {
|
||||||
kfence_metadata = NULL;
|
memblock_free(__pa(kfence_metadata_node[node]), metadata_size);
|
||||||
pr_err("failed to allocate pool\n");
|
kfence_metadata_node[node] = NULL;
|
||||||
|
pr_err("kfence alloc pool on node %d failed\n", node);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init kfence_init(void)
|
void __init kfence_init(void)
|
||||||
{
|
{
|
||||||
|
int node;
|
||||||
phys_addr_t metadata_size = sizeof(struct kfence_metadata) * kfence_num_objects;
|
phys_addr_t metadata_size = sizeof(struct kfence_metadata) * kfence_num_objects;
|
||||||
|
|
||||||
/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
|
if (!READ_ONCE(kfence_sample_interval))
|
||||||
if (!kfence_sample_interval)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
freelist.node = kmalloc_array(nr_node_ids, sizeof(struct kfence_freelist_node),
|
||||||
|
GFP_KERNEL);
|
||||||
|
|
||||||
|
if (!freelist.node)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
for_each_node(node) {
|
||||||
|
INIT_LIST_HEAD(&freelist.node[node].freelist);
|
||||||
|
raw_spin_lock_init(&freelist.node[node].lock);
|
||||||
|
}
|
||||||
|
|
||||||
if (!kfence_init_pool()) {
|
if (!kfence_init_pool()) {
|
||||||
memblock_free_late(__pa(kfence_metadata), metadata_size);
|
pr_err("%s failed on all nodes!\n", __func__);
|
||||||
kfence_metadata = NULL;
|
goto fail;
|
||||||
pr_err("%s failed\n", __func__);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WRITE_ONCE(kfence_enabled, true);
|
WRITE_ONCE(kfence_enabled, true);
|
||||||
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
|
queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
|
||||||
pr_info("initialized - using %lu bytes for %lu objects", kfence_pool_size,
|
for_each_node(node) {
|
||||||
kfence_num_objects);
|
if (!__kfence_pool_node[node])
|
||||||
if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
|
continue;
|
||||||
pr_cont(" at 0x%px-0x%px\n", (void *)__kfence_pool,
|
pr_info("initialized - using %lu bytes for %lu objects on node %d",
|
||||||
(void *)(__kfence_pool + kfence_pool_size));
|
kfence_pool_size, kfence_num_objects, node);
|
||||||
else
|
if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
|
||||||
pr_cont("\n");
|
pr_cont(" at 0x%px-0x%px\n", (void *)__kfence_pool_node[node],
|
||||||
|
(void *)(__kfence_pool_node[node] + kfence_pool_size));
|
||||||
|
else
|
||||||
|
pr_cont("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
for_each_node(node) {
|
||||||
|
if (__kfence_pool_node[node]) {
|
||||||
|
memblock_free_late(__pa(kfence_metadata_node[node]), metadata_size);
|
||||||
|
kfence_metadata_node[node] = NULL;
|
||||||
|
memblock_free_late(__pa(__kfence_pool_node[node]), kfence_pool_size);
|
||||||
|
__kfence_pool_node[node] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kfree(freelist.node);
|
||||||
|
freelist.node = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kfence_shutdown_cache(struct kmem_cache *s)
|
static void kfence_shutdown_cache_node(struct kmem_cache *s, int node)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct kfence_metadata *meta;
|
struct kfence_metadata *meta, *kfence_metadata = kfence_metadata_node[node];
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!kfence_metadata)
|
if (!kfence_metadata)
|
||||||
|
@ -793,7 +886,18 @@ void kfence_shutdown_cache(struct kmem_cache *s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
|
void kfence_shutdown_cache(struct kmem_cache *s)
|
||||||
|
{
|
||||||
|
int node;
|
||||||
|
|
||||||
|
if (!kfence_metadata_node)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for_each_node(node)
|
||||||
|
kfence_shutdown_cache_node(s, node);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags, int node)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Perform size check before switching kfence_allocation_gate, so that
|
* Perform size check before switching kfence_allocation_gate, so that
|
||||||
|
@ -835,7 +939,10 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
|
||||||
if (!READ_ONCE(kfence_enabled))
|
if (!READ_ONCE(kfence_enabled))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return kfence_guarded_alloc(s, size, flags);
|
if (node == NUMA_NO_NODE)
|
||||||
|
node = numa_node_id();
|
||||||
|
|
||||||
|
return kfence_guarded_alloc(s, size, flags, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t kfence_ksize(const void *addr)
|
size_t kfence_ksize(const void *addr)
|
||||||
|
@ -878,12 +985,15 @@ void __kfence_free(void *addr)
|
||||||
|
|
||||||
bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
|
bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
|
int node, page_index;
|
||||||
struct kfence_metadata *to_report = NULL;
|
struct kfence_metadata *to_report = NULL;
|
||||||
enum kfence_error_type error_type;
|
enum kfence_error_type error_type;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
if (!is_kfence_address((void *)addr))
|
if (!virt_addr_valid(addr))
|
||||||
|
return false;
|
||||||
|
node = virt_to_nid(addr);
|
||||||
|
if (!is_kfence_address_node((void *)addr, node))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
|
if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
|
||||||
|
@ -891,6 +1001,8 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs
|
||||||
|
|
||||||
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
|
atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
|
||||||
|
|
||||||
|
page_index = (addr - (unsigned long)__kfence_pool_node[node]) / PAGE_SIZE;
|
||||||
|
|
||||||
if (page_index % 2) {
|
if (page_index % 2) {
|
||||||
/* This is a redzone, report a buffer overflow. */
|
/* This is a redzone, report a buffer overflow. */
|
||||||
struct kfence_metadata *meta;
|
struct kfence_metadata *meta;
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
* probability, where similar constants are used.
|
* probability, where similar constants are used.
|
||||||
*/
|
*/
|
||||||
#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
|
#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
|
||||||
|
#define virt_to_nid(addr) page_to_nid(virt_to_page((unsigned long)addr))
|
||||||
|
|
||||||
/* Maximum stack depth for reports. */
|
/* Maximum stack depth for reports. */
|
||||||
#define KFENCE_STACK_DEPTH 64
|
#define KFENCE_STACK_DEPTH 64
|
||||||
|
@ -97,7 +98,7 @@ struct kfence_metadata {
|
||||||
};
|
};
|
||||||
|
|
||||||
extern unsigned long kfence_num_objects;
|
extern unsigned long kfence_num_objects;
|
||||||
extern struct kfence_metadata *kfence_metadata;
|
extern struct kfence_metadata **kfence_metadata_node;
|
||||||
|
|
||||||
/* KFENCE error types for report generation. */
|
/* KFENCE error types for report generation. */
|
||||||
enum kfence_error_type {
|
enum kfence_error_type {
|
||||||
|
@ -111,6 +112,6 @@ enum kfence_error_type {
|
||||||
void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
|
void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
|
||||||
const struct kfence_metadata *meta, enum kfence_error_type type);
|
const struct kfence_metadata *meta, enum kfence_error_type type);
|
||||||
|
|
||||||
void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
|
void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta, int node);
|
||||||
|
|
||||||
#endif /* MM_KFENCE_KFENCE_H */
|
#endif /* MM_KFENCE_KFENCE_H */
|
||||||
|
|
|
@ -591,7 +591,7 @@ static void test_gfpzero(struct kunit *test)
|
||||||
char *buf1, *buf2;
|
char *buf1, *buf2;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100 || kfence_num_objects > 255) {
|
if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100 || kfence_num_objects * nr_node_ids > 255) {
|
||||||
kunit_warn(test, "skipping ... would take too long\n");
|
kunit_warn(test, "skipping ... would take too long\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -628,11 +628,11 @@ static void test_invalid_access(struct kunit *test)
|
||||||
const struct expect_report expect = {
|
const struct expect_report expect = {
|
||||||
.type = KFENCE_ERROR_INVALID,
|
.type = KFENCE_ERROR_INVALID,
|
||||||
.fn = test_invalid_access,
|
.fn = test_invalid_access,
|
||||||
.addr = &__kfence_pool[10],
|
.addr = &__kfence_pool_node[0][10],
|
||||||
.is_write = false,
|
.is_write = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
READ_ONCE(__kfence_pool[10]);
|
READ_ONCE(__kfence_pool_node[0][10]);
|
||||||
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
KUNIT_EXPECT_TRUE(test, report_matches(&expect));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -117,8 +117,9 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta)
|
void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta, int node)
|
||||||
{
|
{
|
||||||
|
struct kfence_metadata *kfence_metadata = kfence_metadata_node[node];
|
||||||
const int size = abs(meta->size);
|
const int size = abs(meta->size);
|
||||||
const unsigned long start = meta->addr;
|
const unsigned long start = meta->addr;
|
||||||
const struct kmem_cache *const cache = meta->cache;
|
const struct kmem_cache *const cache = meta->cache;
|
||||||
|
@ -178,8 +179,9 @@ static const char *get_access_type(bool is_write)
|
||||||
void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
|
void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
|
||||||
const struct kfence_metadata *meta, enum kfence_error_type type)
|
const struct kfence_metadata *meta, enum kfence_error_type type)
|
||||||
{
|
{
|
||||||
|
int node;
|
||||||
unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
|
unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
|
||||||
const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1;
|
ptrdiff_t object_index = -1;
|
||||||
int num_stack_entries;
|
int num_stack_entries;
|
||||||
int skipnr = 0;
|
int skipnr = 0;
|
||||||
|
|
||||||
|
@ -194,8 +196,12 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
||||||
if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
|
if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (meta)
|
if (meta) {
|
||||||
lockdep_assert_held(&meta->lock);
|
lockdep_assert_held(&meta->lock);
|
||||||
|
node = virt_to_nid(meta->addr);
|
||||||
|
object_index = meta - kfence_metadata_node[node];
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Because we may generate reports in printk-unfriendly parts of the
|
* Because we may generate reports in printk-unfriendly parts of the
|
||||||
* kernel, such as scheduler code, the use of printk() could deadlock.
|
* kernel, such as scheduler code, the use of printk() could deadlock.
|
||||||
|
@ -251,7 +257,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
||||||
|
|
||||||
if (meta) {
|
if (meta) {
|
||||||
pr_err("\n");
|
pr_err("\n");
|
||||||
kfence_print_object(NULL, meta);
|
kfence_print_object(NULL, meta, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Print report footer. */
|
/* Print report footer. */
|
||||||
|
|
|
@ -3221,7 +3221,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_
|
||||||
if (unlikely(!cachep))
|
if (unlikely(!cachep))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
ptr = kfence_alloc(cachep, orig_size, flags);
|
ptr = kfence_alloc_node(cachep, orig_size, flags, nodeid);
|
||||||
if (unlikely(ptr))
|
if (unlikely(ptr))
|
||||||
goto out_hooks;
|
goto out_hooks;
|
||||||
|
|
||||||
|
|
|
@ -2828,7 +2828,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
|
||||||
if (!s)
|
if (!s)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
object = kfence_alloc(s, orig_size, gfpflags);
|
object = kfence_alloc_node(s, orig_size, gfpflags, node);
|
||||||
if (unlikely(object))
|
if (unlikely(object))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue