anolis: mm: support allocating page table pages binded with local numa node
ANBZ: #6618 Currently page table can not be migrated through numa balancing. If most of page table pages are located on remote numa node, the performance can be degraded in some scenarios. Thus this patch provides a way to allocate page table pages in local numa node with reserved memory range. To switch on the pgtable bind feature globally: echo 2 > /sys/kernel/mm/pgtable_bind/enabled Only for misplaced page table pages statisitcs echo 1 > /sys/kernel/mm/pgtable_bind/enabled To enable the pgtable bind feature of cgroup level: echo 1 > /sys/fs/cgroup/memory/<online_memcg>/memory.pgtable_bind To get the amount of misplaced page table pages cat /sys/fs/cgroup/memory/<online_memcg>/memory.pgtable_misplaced Besides, if needs to reset the value of pgtable_misplaced echo 0 > /sys/fs/cgroup/memory/<online_memcg>/memory.pgtable_misplaced Signed-off-by: Kaihao Bai <carlo.bai@linux.alibaba.com> Reviewed-by: Xu Yu <xuyu@linux.alibaba.com> Reviewed-by: Guixin Liu <kanie@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/2254
This commit is contained in:
parent
e17630f874
commit
deaf2108f0
|
@ -10,6 +10,8 @@
|
|||
#include <linux/gfp.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/pgtable_bind.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/page.h>
|
||||
|
@ -21,6 +23,27 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
if (pgtable_stat_enabled()) {
|
||||
struct mem_cgroup *memcg;
|
||||
bool pgtable_alloc = false;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(mm);
|
||||
if (memcg) {
|
||||
pgtable_alloc = memcg->allow_pgtable_bind;
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
/* Only target on user processes */
|
||||
if (pgtable_alloc) {
|
||||
gfp |= __GFP_PGTABLE;
|
||||
|
||||
if (pgtable_bind_enabled())
|
||||
gfp |= __GFP_HIGH | __GFP_THISNODE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (PGD_SIZE == PAGE_SIZE)
|
||||
return (pgd_t *)__get_free_page(gfp);
|
||||
else
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
#ifndef __ASM_GENERIC_PGALLOC_H
|
||||
#define __ASM_GENERIC_PGALLOC_H
|
||||
|
||||
#include <linux/memcontrol.h>
|
||||
#include <linux/pgtable_bind.h>
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
|
||||
#define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO | __GFP_NOKFENCE)
|
||||
|
@ -60,6 +63,27 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
|
|||
{
|
||||
struct page *pte;
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
if (pgtable_stat_enabled()) {
|
||||
struct mem_cgroup *memcg;
|
||||
bool pgtable_alloc = false;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(mm);
|
||||
if (memcg) {
|
||||
pgtable_alloc = memcg->allow_pgtable_bind;
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
/* Only target on user processes */
|
||||
if (pgtable_alloc) {
|
||||
gfp |= __GFP_PGTABLE;
|
||||
|
||||
if (pgtable_bind_enabled())
|
||||
gfp |= __GFP_HIGH | __GFP_THISNODE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
pte = alloc_page(gfp);
|
||||
if (!pte)
|
||||
return NULL;
|
||||
|
@ -121,6 +145,27 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|||
struct page *page;
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
if (pgtable_stat_enabled()) {
|
||||
struct mem_cgroup *memcg;
|
||||
bool pgtable_alloc = false;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(mm);
|
||||
if (memcg) {
|
||||
pgtable_alloc = memcg->allow_pgtable_bind;
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
/* Only target on user processes */
|
||||
if (pgtable_alloc) {
|
||||
gfp |= __GFP_PGTABLE;
|
||||
|
||||
if (pgtable_bind_enabled())
|
||||
gfp |= __GFP_HIGH | __GFP_THISNODE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mm == &init_mm)
|
||||
gfp = GFP_PGTABLE_KERNEL;
|
||||
page = alloc_pages(gfp, 0);
|
||||
|
@ -161,6 +206,27 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
|
|||
{
|
||||
gfp_t gfp = GFP_PGTABLE_USER;
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
if (pgtable_stat_enabled()) {
|
||||
struct mem_cgroup *memcg;
|
||||
bool pgtable_alloc = false;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(mm);
|
||||
if (memcg) {
|
||||
pgtable_alloc = memcg->allow_pgtable_bind;
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
|
||||
/* Only target on user processes */
|
||||
if (pgtable_alloc) {
|
||||
gfp |= __GFP_PGTABLE;
|
||||
|
||||
if (pgtable_bind_enabled())
|
||||
gfp |= __GFP_HIGH | __GFP_THISNODE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mm == &init_mm)
|
||||
gfp = GFP_PGTABLE_KERNEL;
|
||||
return (pud_t *)get_zeroed_page(gfp);
|
||||
|
|
|
@ -44,6 +44,7 @@ struct vm_area_struct;
|
|||
#else
|
||||
#define ___GFP_NOLOCKDEP 0
|
||||
#endif
|
||||
#define ___GFP_PGTABLE 0x4000000u
|
||||
#define ___GFP_NOKFENCE 0x8000000u
|
||||
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
|
||||
|
||||
|
@ -87,6 +88,8 @@ struct vm_area_struct;
|
|||
*
|
||||
* %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
|
||||
*
|
||||
* %__GFP_PGTABLE indicates the allocation of page table pages.
|
||||
*
|
||||
* %__GFP_NOKFENCE informs DO NOT try to alloc page from kfence pool.
|
||||
*/
|
||||
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
|
||||
|
@ -94,6 +97,7 @@ struct vm_area_struct;
|
|||
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
|
||||
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
|
||||
#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
|
||||
#define __GFP_PGTABLE ((__force gfp_t)___GFP_PGTABLE)
|
||||
#define __GFP_NOKFENCE ((__force gfp_t)___GFP_NOKFENCE)
|
||||
|
||||
/**
|
||||
|
|
|
@ -520,6 +520,11 @@ struct mem_cgroup {
|
|||
struct lru_gen_mm_list mm_list;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
unsigned long pgtable_misplaced;
|
||||
bool allow_pgtable_bind;
|
||||
#endif
|
||||
|
||||
CK_KABI_RESERVE(1)
|
||||
CK_KABI_RESERVE(2)
|
||||
CK_KABI_RESERVE(3)
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_PGTABLE_BIND_H_
|
||||
#define _LINUX_PGTABLE_BIND_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
DECLARE_STATIC_KEY_FALSE(pgtable_bind_enabled_key);
|
||||
DECLARE_STATIC_KEY_FALSE(pgtable_stat_enabled_key);
|
||||
static inline bool pgtable_bind_enabled(void)
|
||||
{
|
||||
return static_key_enabled(&pgtable_bind_enabled_key);
|
||||
}
|
||||
|
||||
static inline bool pgtable_stat_enabled(void)
|
||||
{
|
||||
return static_key_enabled(&pgtable_stat_enabled_key);
|
||||
}
|
||||
#else
|
||||
static inline bool pgtable_bind_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool pgtable_stat_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_PGTABLE_BIND_H_ */
|
|
@ -48,7 +48,8 @@
|
|||
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
|
||||
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
|
||||
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
|
||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
|
||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
|
||||
{(unsigned long)__GFP_PGTABLE, "__GFP_PGTABLE"} \
|
||||
|
||||
#define show_gfp_flags(flags) \
|
||||
(flags) ? __print_flags(flags, "|", \
|
||||
|
|
12
mm/Kconfig
12
mm/Kconfig
|
@ -982,6 +982,18 @@ config PAGECACHE_LIMIT
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config PGTABLE_BIND
|
||||
bool "Enable page table allocation binded with CPUs and misplaced statistics"
|
||||
depends on MEMCG
|
||||
default n
|
||||
help
|
||||
This feature is used to solve the problem that the page table pages can not be
|
||||
migrated through numa balancing. If pages are located on remote numa node, the
|
||||
performance can be degraded in some scenarios. Thus this configuration provides a
|
||||
way to allocate page table pages in local numa node with reserved memory range.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
# multi-gen LRU {
|
||||
config LRU_GEN
|
||||
bool "Multi-Gen LRU"
|
||||
|
|
|
@ -136,3 +136,4 @@ obj-$(CONFIG_ASYNC_FORK) += async_fork.o
|
|||
obj-$(CONFIG_PAGE_PREZERO) += prezero.o
|
||||
obj-$(CONFIG_PAGECACHE_LIMIT) += pagecache_limit.o
|
||||
obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
|
||||
obj-$(CONFIG_PGTABLE_BIND) += pgtable_bind.o
|
||||
|
|
|
@ -6615,6 +6615,51 @@ static int mem_cgroup_allow_pgcache_sync_write(struct cgroup_subsys_state *css,
|
|||
}
|
||||
#endif /* CONFIG_PAGECACHE_LIMIT */
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
static u64 memcg_pgtable_bind_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
return READ_ONCE(memcg->allow_pgtable_bind);
|
||||
}
|
||||
|
||||
static int memcg_pgtable_bind_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 val)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
if (val)
|
||||
memcg->allow_pgtable_bind = true;
|
||||
else
|
||||
memcg->allow_pgtable_bind = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 memcg_pgtable_misplaced_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
return READ_ONCE(memcg->pgtable_misplaced);
|
||||
}
|
||||
|
||||
static int memcg_pgtable_misplaced_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 val)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
if (val)
|
||||
return -EINVAL;
|
||||
|
||||
/* reset the stat of current memcg */
|
||||
memcg->pgtable_misplaced = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_BIND */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static int memcg_thp_reclaim_show(struct seq_file *m, void *v)
|
||||
{
|
||||
|
@ -7208,7 +7253,18 @@ static struct cftype mem_cgroup_legacy_files[] = {
|
|||
.write = memcg_thp_control_write,
|
||||
},
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
{
|
||||
.name = "pgtable_bind",
|
||||
.write_u64 = memcg_pgtable_bind_write,
|
||||
.read_u64 = memcg_pgtable_bind_read,
|
||||
},
|
||||
{
|
||||
.name = "pgtable_misplaced",
|
||||
.write_u64 = memcg_pgtable_misplaced_write,
|
||||
.read_u64 = memcg_pgtable_misplaced_read,
|
||||
},
|
||||
#endif
|
||||
{ }, /* terminate */
|
||||
};
|
||||
|
||||
|
|
|
@ -4196,6 +4196,24 @@ try_this_zone:
|
|||
if (page) {
|
||||
prep_new_page(page, order, gfp_mask, alloc_flags);
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
/*
|
||||
* If allocated page belongs to remote numa node,
|
||||
* accumulate memcg->ck_reserved2 to show how many pages
|
||||
* are from remote node.
|
||||
*/
|
||||
if ((gfp_mask & __GFP_PGTABLE) &&
|
||||
(zone_to_nid(ac->preferred_zoneref->zone) != zone_to_nid(zone))) {
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
memcg = get_mem_cgroup_from_mm(current->mm);
|
||||
if (memcg) {
|
||||
memcg->pgtable_misplaced++;
|
||||
css_put(&memcg->css);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If this is a high-order atomic allocation then check
|
||||
* if the pageblock should be reserved for the future
|
||||
|
@ -5280,6 +5298,16 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
|
|||
*/
|
||||
ac.nodemask = nodemask;
|
||||
|
||||
/*
|
||||
* Restore the __GFP_THISNODE restriction if current allocation is page
|
||||
* table.
|
||||
*/
|
||||
if (gfp_mask & __GFP_PGTABLE) {
|
||||
gfp_mask &= ~__GFP_THISNODE;
|
||||
alloc_mask &= ~__GFP_THISNODE;
|
||||
ac.zonelist = node_zonelist(preferred_nid, gfp_mask);
|
||||
}
|
||||
|
||||
page = __alloc_pages_slowpath(alloc_mask, order, &ac);
|
||||
|
||||
out:
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/init.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/pgtable_bind.h>
|
||||
|
||||
#ifdef CONFIG_PGTABLE_BIND
|
||||
#ifdef CONFIG_SYSFS
|
||||
DEFINE_STATIC_KEY_FALSE(pgtable_bind_enabled_key);
|
||||
DEFINE_STATIC_KEY_FALSE(pgtable_stat_enabled_key);
|
||||
|
||||
static ssize_t pgtable_bind_enabled_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", !!static_key_enabled(&pgtable_bind_enabled_key) +
|
||||
!!static_key_enabled(&pgtable_stat_enabled_key));
|
||||
}
|
||||
|
||||
static ssize_t pgtable_bind_enabled_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
static DEFINE_MUTEX(mutex);
|
||||
ssize_t ret = count;
|
||||
|
||||
mutex_lock(&mutex);
|
||||
|
||||
if (!strncmp(buf, "2", 1)) {
|
||||
static_branch_enable(&pgtable_bind_enabled_key);
|
||||
static_branch_enable(&pgtable_stat_enabled_key);
|
||||
} else if (!strncmp(buf, "1", 1)) {
|
||||
static_branch_disable(&pgtable_bind_enabled_key);
|
||||
static_branch_enable(&pgtable_stat_enabled_key);
|
||||
} else if (!strncmp(buf, "0", 1)) {
|
||||
static_branch_disable(&pgtable_bind_enabled_key);
|
||||
static_branch_disable(&pgtable_stat_enabled_key);
|
||||
}
|
||||
|
||||
mutex_unlock(&mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kobj_attribute pgtable_bind_enabled_attr =
|
||||
__ATTR(enabled, 0644, pgtable_bind_enabled_show,
|
||||
pgtable_bind_enabled_store);
|
||||
static struct attribute *pgtable_bind_attrs[] = {
|
||||
&pgtable_bind_enabled_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
static const struct attribute_group pgtable_bind_attr_group = {
|
||||
.attrs = pgtable_bind_attrs,
|
||||
.name = "pgtable_bind",
|
||||
};
|
||||
|
||||
static int __init pgtable_bind_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = sysfs_create_group(mm_kobj, &pgtable_bind_attr_group);
|
||||
if (ret)
|
||||
pr_err("pgtable_bind: register sysfs failed\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
subsys_initcall(pgtable_bind_init);
|
||||
#endif /* CONFIG_SYSFS */
|
||||
#endif /* CONFIG_PGTABLE_BIND */
|
|
@ -660,6 +660,7 @@ static const struct {
|
|||
{ "__GFP_RECLAIM", "R" },
|
||||
{ "__GFP_DIRECT_RECLAIM", "DR" },
|
||||
{ "__GFP_KSWAPD_RECLAIM", "KR" },
|
||||
{ "__GFP_PGTABLE", "PT" },
|
||||
};
|
||||
|
||||
static size_t max_gfp_len;
|
||||
|
|
Loading…
Reference in New Issue