anolis: mm: pagecache_limit: reclaim pagecache smoothly

ANBZ: #20248

In the implementation of the previous version of the pagecache limit,
when the pagecache generated by processes in a memory cgroup reaches
the set upper limit, the pagecache recycling logic will be triggered
immediately to reduce the pagecache usage to below the limit.

In some scenarios, the pagecache generated by the process will
repeatedly reach the pagecache limit in a short period of time. In
this case, whether the pagecache limit uses synchronous or
asynchronous reclaim, a large amount of CPU resources will be
consumed for pagecache reclaim, resulting in a decline in overall
system performance.

This patch adds two parameters to memory cgroup:
`pagecache_limit.reclaim_interval` and `pagecache_limit.reclaim_bytes`.
When the `reclaim_interval` parameter is not configured or equals zero,
the logic of the pagecache limit recovery function remains consistent
with before.

After setting the `reclaim_interval` parameter to a non-zero integer,
when the pagecache generated by the process reaches the configured
limit, the pagecache reclaim work will start after waiting for the
specified integer in milliseconds instead of starting the reclaim
immediately. If the reclaim_bytes parameter is set at the same time,
the number of bytes reclaimed each time will not exceed the value
specified by this parameter. If the number of pagecaches in memcg is
still greater than the pagecache limit restriction after the reclaim
is completed, the reclaim work will be executed again after waiting
for `reclaim_interval` milliseconds.

Signed-off-by: hr567 <hr567@linux.alibaba.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/5040
This commit is contained in:
hr567 2025-04-08 19:48:28 +08:00 committed by 小龙
parent 6d3fcc04dd
commit 0d2c921909
4 changed files with 75 additions and 6 deletions

View File

@ -23,6 +23,8 @@
#include <linux/writeback.h>
#include <linux/page-flags.h>
#include <linux/kidled.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
struct mem_cgroup;
struct obj_cgroup;
@ -503,7 +505,9 @@ struct mem_cgroup {
bool allow_pgcache_limit;
unsigned long pgcache_limit_size;
bool pgcache_limit_sync;
struct work_struct pgcache_limit_work;
struct delayed_work pgcache_limit_work;
unsigned long pgcache_limit_reclaim_interval; /* jiffies of millisecond */
size_t pgcache_limit_reclaim_bytes;
#endif
#if IS_ENABLED(CONFIG_RECLAIM_COLDPGS)

View File

@ -6689,6 +6689,50 @@ static int mem_cgroup_allow_pgcache_sync_write(struct cgroup_subsys_state *css,
return 0;
}
static u64
mem_cgroup_pgcache_reclaim_interval_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
return jiffies_to_msecs(memcg->pgcache_limit_reclaim_interval);
}
static ssize_t
mem_cgroup_pgcache_reclaim_interval_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned long long interval;
if (kstrtou64(strstrip(buf), 0, &interval))
return -EINVAL;
memcg->pgcache_limit_reclaim_interval = msecs_to_jiffies(interval);
return nbytes;
}
static u64
mem_cgroup_pgcache_reclaim_bytes_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
return READ_ONCE(memcg->pgcache_limit_reclaim_bytes);
}
static ssize_t
mem_cgroup_pgcache_reclaim_bytes_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
memcg->pgcache_limit_reclaim_bytes =
(unsigned long)memparse(strstrip(buf), NULL);
return nbytes;
}
#endif /* CONFIG_PAGECACHE_LIMIT */
#ifdef CONFIG_PGTABLE_BIND
@ -7397,6 +7441,16 @@ static struct cftype mem_cgroup_legacy_files[] = {
.read_u64 = mem_cgroup_allow_pgcache_sync_read,
.write_u64 = mem_cgroup_allow_pgcache_sync_write,
},
{
.name = "pagecache_limit.reclaim_interval_ms",
.read_u64 = mem_cgroup_pgcache_reclaim_interval_read,
.write = mem_cgroup_pgcache_reclaim_interval_write,
},
{
.name = "pagecache_limit.reclaim_bytes",
.read_u64 = mem_cgroup_pgcache_reclaim_bytes_read,
.write = mem_cgroup_pgcache_reclaim_bytes_write,
},
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
{
@ -7674,7 +7728,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
INIT_WORK(&memcg->high_work, high_work_func);
INIT_WORK(&memcg->wmark_work, wmark_work_func);
#ifdef CONFIG_PAGECACHE_LIMIT
INIT_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
INIT_DELAYED_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
#endif
INIT_LIST_HEAD(&memcg->oom_notify);
mutex_init(&memcg->thresholds_lock);
@ -7762,6 +7816,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
#ifdef CONFIG_PAGECACHE_LIMIT
memcg->allow_pgcache_limit = parent->allow_pgcache_limit;
memcg->pgcache_limit_sync = parent->pgcache_limit_sync;
memcg->pgcache_limit_reclaim_interval = parent->pgcache_limit_reclaim_interval;
memcg->pgcache_limit_reclaim_bytes = parent->pgcache_limit_reclaim_bytes;
#endif
}
if (!parent) {
@ -7910,7 +7966,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
cancel_work_sync(&memcg->high_work);
cancel_work_sync(&memcg->wmark_work);
#ifdef CONFIG_PAGECACHE_LIMIT
cancel_work_sync(&memcg->pgcache_limit_work);
cancel_delayed_work_sync(&memcg->pgcache_limit_work);
#endif
mem_cgroup_remove_from_trees(memcg);
memcg_free_shrinker_maps(memcg);

View File

@ -73,15 +73,20 @@ void memcg_add_pgcache_limit_reclaimed(struct mem_cgroup *memcg,
void memcg_pgcache_limit_work_func(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, pgcache_limit_work);
memcg = container_of(dwork, struct mem_cgroup, pgcache_limit_work);
if (!is_memcg_pgcache_limit_enabled(memcg))
return;
current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
__memcg_pagecache_shrink(memcg, true, GFP_KERNEL);
current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
if (memcg->pgcache_limit_reclaim_interval != 0 &&
memcg_get_pgcache_overflow_size(memcg))
queue_delayed_work(memcg_pgcache_limit_wq, dwork,
memcg->pgcache_limit_reclaim_interval);
}
void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
@ -109,8 +114,9 @@ void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
if (tmp_memcg->pgcache_limit_sync == PGCACHE_RECLAIM_DIRECT)
__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
else
queue_work(memcg_pgcache_limit_wq,
&tmp_memcg->pgcache_limit_work);
queue_delayed_work(memcg_pgcache_limit_wq,
&tmp_memcg->pgcache_limit_work,
tmp_memcg->pgcache_limit_reclaim_interval);
} while ((tmp_memcg = parent_mem_cgroup(tmp_memcg)) &&
is_memcg_pgcache_limit_enabled(tmp_memcg));
}

View File

@ -7811,6 +7811,9 @@ void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
nr_should_reclaim = memcg_get_pgcache_overflow_size(memcg);
if (!nr_should_reclaim)
return;
if (memcg->pgcache_limit_reclaim_bytes &&
memcg->pgcache_limit_reclaim_bytes < nr_should_reclaim)
nr_should_reclaim = memcg->pgcache_limit_reclaim_bytes;
sc.nr_to_reclaim = max(nr_should_reclaim, SWAP_CLUSTER_MAX);
do {