anolis: mm: pagecache_limit: reclaim pagecache smoothly

ANBZ: #20248 In the implementation of the previous version of the pagecache limit, when the pagecache generated by processes in a memory cgroup reaches the set upper limit, the pagecache recycling logic will be triggered immediately to reduce the pagecache usage to below the limit. In some scenarios, the pagecache generated by the process will repeatedly reach the pagecache limit in a short period of time. In this case, whether the pagecache limit uses synchronous or asynchronous reclaim, a large amount of CPU resources will be consumed for pagecache reclaim, resulting in a decline in overall system performance. This patch adds two parameters to memory cgroup: `pagecache_limit.reclaim_interval` and `pagecache_limit.reclaim_bytes`. When the `reclaim_interval` parameter is not configured or equals zero, the logic of the pagecache limit recovery function remains consistent with before. After setting the `reclaim_interval` parameter to a non-zero integer, when the pagecache generated by the process reaches the configured limit, the pagecache reclaim work will start after waiting for the specified integer in milliseconds instead of starting the reclaim immediately. If the reclaim_bytes parameter is set at the same time, the number of bytes reclaimed each time will not exceed the value specified by this parameter. If the number of pagecaches in memcg is still greater than the pagecache limit restriction after the reclaim is completed, the reclaim work will be executed again after waiting for `reclaim_interval` milliseconds. Signed-off-by: hr567 <hr567@linux.alibaba.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/5040
2025-04-08 19:48:28 +08:00 · 2025-04-08 19:48:28 +08:00 · 0d2c921909
parent 6d3fcc04dd
commit 0d2c921909
4 changed files with 75 additions and 6 deletions
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@ -23,6 +23,8 @@
 #include <linux/writeback.h>
 #include <linux/page-flags.h>
 #include <linux/kidled.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>

 struct mem_cgroup;
 struct obj_cgroup;
@ -503,7 +505,9 @@ struct mem_cgroup {
 	bool allow_pgcache_limit;
 	unsigned long pgcache_limit_size;
 	bool pgcache_limit_sync;
-	struct work_struct pgcache_limit_work;
+	struct delayed_work pgcache_limit_work;
+	unsigned long pgcache_limit_reclaim_interval; /* jiffies of millisecond */
+	size_t pgcache_limit_reclaim_bytes;
 #endif

 #if IS_ENABLED(CONFIG_RECLAIM_COLDPGS)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@ -6689,6 +6689,50 @@ static int mem_cgroup_allow_pgcache_sync_write(struct cgroup_subsys_state *css,

 	return 0;
 }
+
+static u64
+mem_cgroup_pgcache_reclaim_interval_read(struct cgroup_subsys_state *css,
+					 struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return jiffies_to_msecs(memcg->pgcache_limit_reclaim_interval);
+}
+
+static ssize_t
+mem_cgroup_pgcache_reclaim_interval_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned long long interval;
+
+	if (kstrtou64(strstrip(buf), 0, &interval))
+		return -EINVAL;
+	memcg->pgcache_limit_reclaim_interval = msecs_to_jiffies(interval);
+
+	return nbytes;
+}
+
+static u64
+mem_cgroup_pgcache_reclaim_bytes_read(struct cgroup_subsys_state *css,
+				      struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return READ_ONCE(memcg->pgcache_limit_reclaim_bytes);
+}
+
+static ssize_t
+mem_cgroup_pgcache_reclaim_bytes_write(struct kernfs_open_file *of, char *buf,
+				       size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	memcg->pgcache_limit_reclaim_bytes =
+		(unsigned long)memparse(strstrip(buf), NULL);
+
+	return nbytes;
+}
 #endif /* CONFIG_PAGECACHE_LIMIT */

 #ifdef CONFIG_PGTABLE_BIND
@ -7397,6 +7441,16 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.read_u64 = mem_cgroup_allow_pgcache_sync_read,
 		.write_u64 = mem_cgroup_allow_pgcache_sync_write,
 	},
+	{
+		.name = "pagecache_limit.reclaim_interval_ms",
+		.read_u64 = mem_cgroup_pgcache_reclaim_interval_read,
+		.write = mem_cgroup_pgcache_reclaim_interval_write,
+	},
+	{
+		.name = "pagecache_limit.reclaim_bytes",
+		.read_u64 = mem_cgroup_pgcache_reclaim_bytes_read,
+		.write = mem_cgroup_pgcache_reclaim_bytes_write,
+	},
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{
@ -7674,7 +7728,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	INIT_WORK(&memcg->high_work, high_work_func);
 	INIT_WORK(&memcg->wmark_work, wmark_work_func);
 #ifdef CONFIG_PAGECACHE_LIMIT
-	INIT_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
+	INIT_DELAYED_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
 #endif
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	mutex_init(&memcg->thresholds_lock);
@ -7762,6 +7816,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 #ifdef CONFIG_PAGECACHE_LIMIT
 		memcg->allow_pgcache_limit = parent->allow_pgcache_limit;
 		memcg->pgcache_limit_sync = parent->pgcache_limit_sync;
+		memcg->pgcache_limit_reclaim_interval = parent->pgcache_limit_reclaim_interval;
+		memcg->pgcache_limit_reclaim_bytes = parent->pgcache_limit_reclaim_bytes;
 #endif
 	}
 	if (!parent) {
@ -7910,7 +7966,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	cancel_work_sync(&memcg->high_work);
 	cancel_work_sync(&memcg->wmark_work);
 #ifdef CONFIG_PAGECACHE_LIMIT
-	cancel_work_sync(&memcg->pgcache_limit_work);
+	cancel_delayed_work_sync(&memcg->pgcache_limit_work);
 #endif
 	mem_cgroup_remove_from_trees(memcg);
 	memcg_free_shrinker_maps(memcg);
--- a/mm/pagecache_limit.c
+++ b/mm/pagecache_limit.c
@ -73,15 +73,20 @@ void memcg_add_pgcache_limit_reclaimed(struct mem_cgroup *memcg,

 void memcg_pgcache_limit_work_func(struct work_struct *work)
 {
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct mem_cgroup *memcg;

-	memcg = container_of(work, struct mem_cgroup, pgcache_limit_work);
+	memcg = container_of(dwork, struct mem_cgroup, pgcache_limit_work);
 	if (!is_memcg_pgcache_limit_enabled(memcg))
 		return;

 	current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
 	__memcg_pagecache_shrink(memcg, true, GFP_KERNEL);
 	current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
+	if (memcg->pgcache_limit_reclaim_interval != 0 &&
+	    memcg_get_pgcache_overflow_size(memcg))
+		queue_delayed_work(memcg_pgcache_limit_wq, dwork,
+				   memcg->pgcache_limit_reclaim_interval);
 }

 void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
@ -109,8 +114,9 @@ void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
 		if (tmp_memcg->pgcache_limit_sync == PGCACHE_RECLAIM_DIRECT)
 			__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
 		else
-			queue_work(memcg_pgcache_limit_wq,
-				   &tmp_memcg->pgcache_limit_work);
+			queue_delayed_work(memcg_pgcache_limit_wq,
+					   &tmp_memcg->pgcache_limit_work,
+					   tmp_memcg->pgcache_limit_reclaim_interval);
 	} while ((tmp_memcg = parent_mem_cgroup(tmp_memcg)) &&
 		 is_memcg_pgcache_limit_enabled(tmp_memcg));
 }
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@ -7811,6 +7811,9 @@ void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
 	nr_should_reclaim = memcg_get_pgcache_overflow_size(memcg);
 	if (!nr_should_reclaim)
 		return;
+	if (memcg->pgcache_limit_reclaim_bytes &&
+	    memcg->pgcache_limit_reclaim_bytes < nr_should_reclaim)
+		nr_should_reclaim = memcg->pgcache_limit_reclaim_bytes;

 	sc.nr_to_reclaim = max(nr_should_reclaim, SWAP_CLUSTER_MAX);
 	do {