anolis: mm: pagecache_limit: add memcg asynchronous reclaim support

ANBZ: #3907 This patch add memcg pagecache limit asynchronous reclaim support, since pagecache limit asynchronous reclaim is scheduled by workqueue, it could do more work than synchronous reclaim, i.e. write out dirty page, etc, and in order to minimize the performance jitter when dirty pages to be reclaimed, we only enable dirty pages to be reclaimed when priority value is smaller than DEF_PRIORITY - 2, and the reclaim must be in asynchronous scenario. Signed-off-by: Xin Hao <xhao@linux.alibaba.com> Reviewed-by: Xu Yu <xuyu@linux.alibaba.com> Reviewed-by: Rongwei Wang <rongwei.wang@linux.alibaba.com> Reviewed-by: Kaihao Bai <carlo.bai@linux.alibaba.com> Link: https://gitee.com/anolis/cloud-kernel/pulls/1157
2023-01-10 21:56:18 +08:00 · 2023-01-10 21:56:18 +08:00 · de6b458f94
parent f5b3f97e84
commit de6b458f94
5 changed files with 82 additions and 1 deletions
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@ -471,6 +471,8 @@ struct mem_cgroup {
 #ifdef CONFIG_PAGECACHE_LIMIT
 	bool allow_pgcache_limit;
 	unsigned long pgcache_limit_size;
+	bool pgcache_limit_sync;
+	struct work_struct pgcache_limit_work;
 #endif

 #if IS_ENABLED(CONFIG_RECLAIM_COLDPGS)
--- a/include/linux/pagecache_limit.h
+++ b/include/linux/pagecache_limit.h
@ -6,6 +6,7 @@
 #ifdef CONFIG_PAGECACHE_LIMIT

 DECLARE_STATIC_KEY_FALSE(pagecache_limit_enabled_key);
+extern struct workqueue_struct *memcg_pgcache_limit_wq;

 enum pgcache_limit_reclaim_type {
 	/* per-memcg or global pagecaeche reclaim defaut way is async */
@ -24,6 +25,7 @@ unsigned long memcg_get_pgcache_overflow_size(struct mem_cgroup *memcg);
 void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
 			      bool may_unmap, gfp_t gfp_mask);
 void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask);
+void memcg_pgcache_limit_work_func(struct work_struct *work);

 #else
 static inline bool pagecache_limit_enabled(void)
@ -50,5 +52,8 @@ static inline void memcg_pagecache_shrink(struct mem_cgroup *memcg,
 					  gfp_t gfp_mask)
 {
 }
+static inline void memcg_pgcache_limit_work_func(struct work_struct *work)
+{
+}
 #endif
 #endif
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@ -6395,6 +6395,32 @@ static ssize_t mem_cgroup_pgcache_limit_size_write(struct kernfs_open_file *of,

 	return nbytes;
 }
+
+static u64 mem_cgroup_allow_pgcache_sync_read(struct cgroup_subsys_state *css,
+					      struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	return READ_ONCE(memcg->pgcache_limit_sync);
+}
+
+static int mem_cgroup_allow_pgcache_sync_write(struct cgroup_subsys_state *css,
+					      struct cftype *cft, u64 val)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	if (val > 1)
+		return -EINVAL;
+	if (memcg->pgcache_limit_sync == val)
+		return 0;
+
+	if (val)
+		memcg->pgcache_limit_sync = PGCACHE_RECLAIM_DIRECT;
+	else
+		memcg->pgcache_limit_sync = PGCACHE_RECLAIM_ASYNC;
+
+	return 0;
+}
 #endif /* CONFIG_PAGECACHE_LIMIT */

 static struct cftype mem_cgroup_legacy_files[] = {
@ -6689,6 +6715,11 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.read_u64 = mem_cgroup_pgcache_limit_size_read,
 		.write = mem_cgroup_pgcache_limit_size_write,
 	},
+	{
+		.name = "pagecache_limit.sync",
+		.read_u64 = mem_cgroup_allow_pgcache_sync_read,
+		.write_u64 = mem_cgroup_allow_pgcache_sync_write,
+	},
 #endif
 	{ },	/* terminate */
 };
@ -6897,6 +6928,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)

 	INIT_WORK(&memcg->high_work, high_work_func);
 	INIT_WORK(&memcg->wmark_work, wmark_work_func);
+#ifdef CONFIG_PAGECACHE_LIMIT
+	INIT_WORK(&memcg->pgcache_limit_work, memcg_pgcache_limit_work_func);
+#endif
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	mutex_init(&memcg->thresholds_lock);
 	spin_lock_init(&memcg->move_lock);
@ -7097,6 +7131,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	vmpressure_cleanup(&memcg->vmpressure);
 	cancel_work_sync(&memcg->high_work);
 	cancel_work_sync(&memcg->wmark_work);
+#ifdef CONFIG_PAGECACHE_LIMIT
+	cancel_work_sync(&memcg->pgcache_limit_work);
+#endif
 	mem_cgroup_remove_from_trees(memcg);
 	memcg_free_shrinker_maps(memcg);
 	memcg_free_kmem(memcg);
@ -8937,6 +8974,15 @@ static int __init mem_cgroup_init(void)

 	if (!memcg_wmark_wq)
 		return -ENOMEM;
+#ifdef CONFIG_PAGECACHE_LIMIT
+	memcg_pgcache_limit_wq = alloc_workqueue("memcg_pgcache_limit",
+						 WQ_FREEZABLE |
+						 WQ_UNBOUND | WQ_MEM_RECLAIM,
+						 WQ_UNBOUND_MAX_ACTIVE);
+
+	if (!memcg_pgcache_limit_wq)
+		return -ENOMEM;
+#endif

 	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
 				  memcg_hotplug_cpu_dead);
--- a/mm/pagecache_limit.c
+++ b/mm/pagecache_limit.c
@ -12,6 +12,7 @@
 #include <linux/pagecache_limit.h>

 DEFINE_STATIC_KEY_FALSE(pagecache_limit_enabled_key);
+struct workqueue_struct *memcg_pgcache_limit_wq;

 static int __init setup_pagecache_limit(char *s)
 {
@ -66,6 +67,19 @@ void memcg_add_pgcache_limit_reclaimed(struct mem_cgroup *memcg,
 			       nr);
 }

+void memcg_pgcache_limit_work_func(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, pgcache_limit_work);
+	if (!is_memcg_pgcache_limit_enabled(memcg))
+		return;
+
+	current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD;
+	__memcg_pagecache_shrink(memcg, true, GFP_KERNEL);
+	current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
+}
+
 void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
 {
 	struct mem_cgroup *tmp_memcg = memcg;
@ -88,7 +102,11 @@ void memcg_pagecache_shrink(struct mem_cgroup *memcg, gfp_t gfp_mask)
 		 * traverses, we select the appropriate time to enable mapped pagecache
 		 * to be reclaimed.
 		 */
-		__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
+		if (tmp_memcg->pgcache_limit_sync == PGCACHE_RECLAIM_DIRECT)
+			__memcg_pagecache_shrink(tmp_memcg, false, gfp_mask);
+		else
+			queue_work(memcg_pgcache_limit_wq,
+				   &tmp_memcg->pgcache_limit_work);
 	} while ((tmp_memcg = parent_mem_cgroup(tmp_memcg)) &&
 		 is_memcg_pgcache_limit_enabled(tmp_memcg));
 }
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@ -4800,6 +4800,16 @@ void __memcg_pagecache_shrink(struct mem_cgroup *memcg,
 		    (sc.priority < DEF_PRIORITY - 4))
 			sc.may_unmap = 1;

+		/*
+		 * We only enable dirty pages to be reclaimed when priority
+		 * value is smaller than DEF_PRIORITY - 2, and the reclaim
+		 * must be in asynchronous scenario, in order to minimize the
+		 * performance jitter when dirty pages to be reclaimed.
+		 */
+		if (current_is_kswapd() && !memcg->pgcache_limit_sync &&
+		    (sc.priority < DEF_PRIORITY - 2))
+			sc.may_writepage = 1;
+
 		if (__pagecache_shrink(memcg, &sc) < 0)
 			break;
 	} while (--sc.priority >= 0);