mm/memcg: Introduce watermark stepping

Increase global watermark and watermark stepping based on request:

1. If wmark_step_enable=0, global watermark increased 80M.
2. If wmark_step_enable=1, global watermark increased 80M then
stepping based on allocation request type (1 step for decreasing 16M):
  - Maintain current watermark for ALLOC_RESERVES
  - Apply wmark-step=5 for irq (origin min wmark, highest priority)
  - Apply wmark-step=3 for kthread (can be set by sysfs)
  - Apply wmark-step=0 for process (default, can be set to 0-5)

Signed-off-by: Winston Wen <wentao@uniontech.com>
Signed-off-by: Fan Jie <fanjie@uniontech.com>
This commit is contained in:
Winston Wen 2024-11-19 09:23:55 +08:00 committed by Avenger-285714
parent 6be40decae
commit 92c796f851
5 changed files with 219 additions and 0 deletions

View File

@ -256,6 +256,10 @@ struct mem_cgroup {
/* OOM-Killer disable */
int oom_kill_disable;
#ifdef CONFIG_WMARK_STEP
int wmark_step;
#endif
/* memory.events and memory.events.local */
struct cgroup_file events_file;
struct cgroup_file events_local_file;

View File

@ -1282,6 +1282,20 @@ config LOCK_MM_AND_FIND_VMA
bool
depends on !STACK_GROWSUP
config WMARK_STEP
bool "WaterMark step"
depends on MEMCG
help
Increase global watermark and watermark stepping based on request:
1. If wmark_step_enable=0, global watermark increasing 80M.
2. If wmark_step_enable=1, global watermark increasing 80M then
stepping based on allocation request type (1 step for decreasing 16M):
- Maintain current watermark for ALLOC_RESERVES
- Apply wmark-step=5 for irq (origin min wmark, highest priority)
- Apply wmark-step=3 for kthread (can be set by sysfs)
- Apply wmark-step=0 for process (default, can be set to 0-5)
source "mm/damon/Kconfig"
endmenu

View File

@ -913,9 +913,23 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
#ifdef CONFIG_WMARK_STEP
#define ALLOC_WMARK_STEP 0x10000 /* Allow use user-reserve */
#endif
/* Flags that allow allocations below the min watermark. */
#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
#ifdef CONFIG_WMARK_STEP
extern bool wmark_step_enable;
extern unsigned int wmark_step_max;
extern unsigned int wmark_step_irq;
extern unsigned int wmark_step_kthread;
extern unsigned int wmark_step_default;
extern unsigned int wmark_step_size;
int get_wmark_step_value(void);
#endif
enum ttu_flags;
struct tlbflush_unmap_batch;

View File

@ -5463,6 +5463,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent) {
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
#ifdef CONFIG_WMARK_STEP
WRITE_ONCE(memcg->wmark_step, READ_ONCE(parent->wmark_step));
#endif
page_counter_init(&memcg->memory, &parent->memory);
page_counter_init(&memcg->swap, &parent->swap);
@ -5474,6 +5477,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_init(&memcg->swap, NULL);
page_counter_init(&memcg->kmem, NULL);
page_counter_init(&memcg->tcpmem, NULL);
#ifdef CONFIG_WMARK_STEP
memcg->wmark_step = wmark_step_default;
#endif
root_mem_cgroup = memcg;
return &memcg->css;
@ -6863,6 +6869,67 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
#ifdef CONFIG_WMARK_STEP
int get_wmark_step_value(void)
{
struct mem_cgroup *memcg;
int wmark_step = wmark_step_default;
if (!in_task())
return wmark_step_irq;
if (current->flags & PF_KTHREAD)
return wmark_step_kthread;
if (mem_cgroup_disabled())
return wmark_step_default;
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
if (memcg)
wmark_step = memcg->wmark_step;
rcu_read_unlock();
return wmark_step;
}
static int memory_wmark_step_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
seq_printf(m, "%d\n", READ_ONCE(memcg->wmark_step));
return 0;
}
static ssize_t memory_wmark_step_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
struct mem_cgroup *iter;
int ret, wmark_step;
if (mem_cgroup_is_root(memcg))
return -EPERM;
buf = strstrip(buf);
if (!buf)
return -EINVAL;
ret = kstrtoint(buf, 0, &wmark_step);
if (ret)
return ret;
if (wmark_step < 0 || wmark_step > wmark_step_max)
return -EINVAL;
for_each_mem_cgroup_tree(iter, memcg)
WRITE_ONCE(iter->wmark_step, wmark_step);
return nbytes;
}
#endif
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@ -6968,6 +7035,14 @@ static struct cftype memory_files[] = {
.seq_show = memory_oom_group_show,
.write = memory_oom_group_write,
},
#ifdef CONFIG_WMARK_STEP
{
.name = "wmark_step",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_wmark_step_show,
.write = memory_wmark_step_write,
},
#endif
{
.name = "reclaim",
.flags = CFTYPE_NS_DELEGATABLE,
@ -7550,6 +7625,63 @@ static int __init cgroup_memory(char *s)
}
__setup("cgroup.memory=", cgroup_memory);
#ifdef CONFIG_WMARK_STEP
static ssize_t wmark_step_kthread_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%u\n", wmark_step_kthread);
}
static ssize_t wmark_step_kthread_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t len)
{
unsigned int value;
if (kstrtouint(buf, 0, &value))
return -EINVAL;
if (value > wmark_step_max)
return -EINVAL;
wmark_step_kthread = value;
return len;
}
static struct kobj_attribute wmark_step_kthread_attr = __ATTR_RW(wmark_step_kthread);
static ssize_t wmark_step_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%d\n", wmark_step_enable);
}
static ssize_t wmark_step_enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t len)
{
bool enable;
if (kstrtobool(buf, &enable))
return -EINVAL;
wmark_step_enable = enable;
return len;
}
static struct kobj_attribute wmark_step_enabled_attr = __ATTR(enabled, 0644,
wmark_step_enabled_show, wmark_step_enabled_store);
static struct attribute *wmark_step_attrs[] = {
&wmark_step_enabled_attr.attr,
&wmark_step_kthread_attr.attr,
NULL
};
static const struct attribute_group wmark_step_attr_group = {
.name = "wmark_step",
.attrs = wmark_step_attrs,
};
#endif
/*
* subsys_initcall() for memory controller.
*
@ -7588,6 +7720,11 @@ static int __init mem_cgroup_init(void)
soft_limit_tree.rb_tree_per_node[node] = rtpn;
}
#ifdef CONFIG_WMARK_STEP
if (sysfs_create_group(mm_kobj, &wmark_step_attr_group))
pr_warn("wmark-step: failed to create sysfs group\n");
#endif
return 0;
}
subsys_initcall(mem_cgroup_init);

View File

@ -205,6 +205,15 @@ EXPORT_SYMBOL(node_states);
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
#ifdef CONFIG_WMARK_STEP
bool wmark_step_enable __read_mostly = false;
unsigned int wmark_step_max __read_mostly = 5;
unsigned int wmark_step_irq __read_mostly = 5;
unsigned int wmark_step_kthread __read_mostly = 3;
unsigned int wmark_step_default __read_mostly = 0;
unsigned int wmark_step_size __read_mostly = SZ_16M;
#endif
/*
* A cached value of the page's pageblock's migratetype, used when the page is
* put on a pcplist. Used to avoid the pageblock migratetype lookup when
@ -2982,10 +2991,37 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
{
long min = mark;
int o;
#ifdef CONFIG_WMARK_STEP
long pages_wmark_step = (wmark_step_max * wmark_step_size) >> PAGE_SHIFT;
int wmark_step;
#endif
/* free_pages may go negative - that's OK */
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
#ifdef CONFIG_WMARK_STEP
if (zone_idx(z) == ZONE_NORMAL && wmark_step_enable) {
if (unlikely(min <= pages_wmark_step)) {
pr_err_once("uosste: min[%ld] < pages_wmark_step[%ld]\n",
min, pages_wmark_step);
goto skip;
}
if (unlikely(alloc_flags & ALLOC_RESERVES))
min -= pages_wmark_step;
else if (alloc_flags & ALLOC_WMARK_STEP) {
wmark_step = get_wmark_step_value();
if (wmark_step < 0 || wmark_step > wmark_step_max) {
pr_err_once("uosste: wmark_step error: %d\n", wmark_step);
goto skip;
}
min -= (wmark_step * wmark_step_size) >> PAGE_SHIFT;
}
}
skip:
#endif
if (unlikely(alloc_flags & ALLOC_RESERVES)) {
/*
* __GFP_HIGH allows access to 50% of the min reserve as well
@ -3854,6 +3890,11 @@ static inline unsigned int
gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
{
unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
#ifdef CONFIG_WMARK_STEP
/* Only enabled when ALLOC_WMARK_MIN */
if (wmark_step_enable)
alloc_flags |= ALLOC_WMARK_STEP;
#endif
/*
* __GFP_HIGH is assumed to be the same as ALLOC_MIN_RESERVE
@ -5856,6 +5897,9 @@ static void setup_per_zone_lowmem_reserve(void)
static void __setup_per_zone_wmarks(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
#ifdef CONFIG_WMARK_STEP
unsigned long pages_wmark_step = (wmark_step_max * wmark_step_size) >> PAGE_SHIFT;
#endif
unsigned long lowmem_pages = 0;
struct zone *zone;
unsigned long flags;
@ -5893,6 +5937,12 @@ static void __setup_per_zone_wmarks(void)
* proportionate to the zone's size.
*/
zone->_watermark[WMARK_MIN] = tmp;
#ifdef CONFIG_WMARK_STEP
/* Put all wmark step in zone normal */
if (zone_idx(zone) == ZONE_NORMAL)
zone->_watermark[WMARK_MIN] += pages_wmark_step;
#endif
}
/*