anolis: mm: pagecache_limit: add memcg asynchronous reclaim support

ANBZ: #3907

This patch add memcg pagecache limit asynchronous reclaim support, since
pagecache limit asynchronous reclaim is scheduled by workqueue, it could
do more work than synchronous reclaim, i.e. write out dirty page, etc,
and in order to minimize the performance jitter when dirty pages to be
reclaimed, we only enable dirty pages to be reclaimed when priority value
is smaller than DEF_PRIORITY - 2, and the reclaim must be in asynchronous
scenario.

Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Reviewed-by: Xu Yu <xuyu@linux.alibaba.com>
Reviewed-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Reviewed-by: Kaihao Bai <carlo.bai@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/1157
This commit is contained in:
Xin Hao 2023-01-10 21:56:18 +08:00 committed by 小龙
parent f5b3f97e84
commit de6b458f94
5 changed files with 82 additions and 1 deletions

View File

@ -471,6 +471,8 @@ struct mem_cgroup {
#ifdef CONFIG_PAGECACHE_LIMIT
bool allow_pgcache_limit;
unsigned long pgcache_limit_size;
bool pgcache_limit_sync;
struct work_struct pgcache_limit_work;
#endif
#if IS_ENABLED(CONFIG_RECLAIM_COLDPGS)

View File

@ -6,6 +6,7 @@
#ifdef CONFIG_PAGECACHE_LIMIT
DECLARE_STATIC_KEY_FALSE(pagecache_limit_enabled_key);
extern struct workqueue_struct *memcg_pgcache_limit_wq;
enum pgcache_limit_reclaim_type {
/* per-memcg or global pagecaeche reclaim defaut way is async */
@ -24,6 +25,7 @@ unsigned long memcg_get_pgcache_overflow_size(struct mem_cgroup *memcg);
void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
bool may_unmap, gfp_t gfp_mask);
void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask);
void memcg_pgcache_limit_work_func(struct work_struct *work);
#else
static inline bool pagecache_limit_enabled(void)
@ -50,5 +52,8 @@ static inline void memcg_pagecache_shrink(struct mem_cgroup *memcg,
gfp_t gfp_mask)
{
}
static inline void memcg_pgcache_limit_work_func(struct work_struct *work)
{
}
#endif
#endif

View File

@ -6395,6 +6395,32 @@ static ssize_t mem_cgroup_pgcache_limit_size_write(struct kernfs_open_file *of,
return nbytes;
}
static u64 mem_cgroup_allow_pgcache_sync_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
return READ_ONCE(memcg->pgcache_limit_sync);
}
static int mem_cgroup_allow_pgcache_sync_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
if (val > 1)
return -EINVAL;
if (memcg->pgcache_limit_sync == val)
return 0;
if (val)
memcg->pgcache_limit_sync = PGCACHE_RECLAIM_DIRECT;
else
memcg->pgcache_limit_sync = PGCACHE_RECLAIM_ASYNC;
return 0;
}
#endif /* CONFIG_PAGECACHE_LIMIT */
static struct cftype mem_cgroup_legacy_files[] = {
@ -6689,6 +6715,11 @@ static struct cftype mem_cgroup_legacy_files[] = {
.read_u64 = mem_cgroup_pgcache_limit_size_read,
.write = mem_cgroup_pgcache_limit_size_write,
},
{
.name = "pagecache_limit.sync",
.read_u64 = mem_cgroup_allow_pgcache_sync_read,
.write_u64 = mem_cgroup_allow_pgcache_sync_write,
},
#endif
{ }, /* terminate */
};
@ -6897,6 +6928,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
INIT_WORK(&memcg->high_work, high_work_func);
INIT_WORK(&memcg->wmark_work, wmark_work_func);
#ifdef CONFIG_PAGECACHE_LIMIT
INIT_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
#endif
INIT_LIST_HEAD(&memcg->oom_notify);
mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
@ -7097,6 +7131,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
vmpressure_cleanup(&memcg->vmpressure);
cancel_work_sync(&memcg->high_work);
cancel_work_sync(&memcg->wmark_work);
#ifdef CONFIG_PAGECACHE_LIMIT
cancel_work_sync(&memcg->pgcache_limit_work);
#endif
mem_cgroup_remove_from_trees(memcg);
memcg_free_shrinker_maps(memcg);
memcg_free_kmem(memcg);
@ -8937,6 +8974,15 @@ static int __init mem_cgroup_init(void)
if (!memcg_wmark_wq)
return -ENOMEM;
#ifdef CONFIG_PAGECACHE_LIMIT
memcg_pgcache_limit_wq = alloc_workqueue("memcg_pgcache_limit",
WQ_FREEZABLE |
WQ_UNBOUND | WQ_MEM_RECLAIM,
WQ_UNBOUND_MAX_ACTIVE);
if (!memcg_pgcache_limit_wq)
return -ENOMEM;
#endif
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
memcg_hotplug_cpu_dead);

View File

@ -12,6 +12,7 @@
#include <linux/pagecache_limit.h>
DEFINE_STATIC_KEY_FALSE(pagecache_limit_enabled_key);
struct workqueue_struct *memcg_pgcache_limit_wq;
static int __init setup_pagecache_limit(char *s)
{
@ -66,6 +67,19 @@ void memcg_add_pgcache_limit_reclaimed(struct mem_cgroup *memcg,
nr);
}
void memcg_pgcache_limit_work_func(struct work_struct *work)
{
struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, pgcache_limit_work);
if (!is_memcg_pgcache_limit_enabled(memcg))
return;
current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
__memcg_pagecache_shrink(memcg, true, GFP_KERNEL);
current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
}
void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
{
struct mem_cgroup *tmp_memcg = memcg;
@ -88,7 +102,11 @@ void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
* traverses, we select the appropriate time to enable mapped pagecache
* to be reclaimed.
*/
__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
if (tmp_memcg->pgcache_limit_sync == PGCACHE_RECLAIM_DIRECT)
__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
else
queue_work(memcg_pgcache_limit_wq,
&tmp_memcg->pgcache_limit_work);
} while ((tmp_memcg = parent_mem_cgroup(tmp_memcg)) &&
is_memcg_pgcache_limit_enabled(tmp_memcg));
}

View File

@ -4800,6 +4800,16 @@ void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
(sc.priority < DEF_PRIORITY - 4))
sc.may_unmap = 1;
/*
* We only enable dirty pages to be reclaimed when priority
* value is smaller than DEF_PRIORITY - 2, and the reclaim
* must be in asynchronous scenario, in order to minimize the
* performance jitter when dirty pages to be reclaimed.
*/
if (current_is_kswapd() && !memcg->pgcache_limit_sync &&
(sc.priority < DEF_PRIORITY - 2))
sc.may_writepage = 1;
if (__pagecache_shrink(memcg, &sc) < 0)
break;
} while (--sc.priority >= 0);