anolis: sched: don't account util_est for each cfs_rq when group_balancer disabled

ANBZ: #8765

If we account util_est for each cfs_rq when group_balancer disabled,
there will be some overhead. To avoid the overhead, we only account it
when group balancer enabled, and when the switch is turned, we
recalculate for each cfs_rq at once.

Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
Reviewed-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/5529
This commit is contained in:
Cruz Zhao 2025-07-18 14:55:23 +08:00 committed by 小龙
parent c44eb4e82c
commit 98170019bf
3 changed files with 91 additions and 10 deletions

View File

@ -99,10 +99,17 @@ static void group_balancer_enable(void)
{
sched_init_group_balancer_sched_domains();
static_branch_enable(&__group_balancer_enabled);
/*
* Ensure all previous instances of raw_spin_rq_*lock() have finished
* and future ones will observe group_balancer_enabled().
*/
synchronize_rcu();
util_est_reenqueue_all();
}
static void group_balancer_disable(void)
{
util_est_clear_all();
static_branch_disable(&__group_balancer_enabled);
sched_clear_group_balancer_sched_domains();
}
@ -112,6 +119,11 @@ bool group_balancer_enabled(void)
return static_branch_unlikely(&__group_balancer_enabled);
}
bool group_balancer_rq_enabled(struct rq *rq)
{
return static_branch_unlikely(&__group_balancer_enabled) && rq->group_balancer_enabled;
}
int sched_group_balancer_enable_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{

View File

@ -6038,6 +6038,64 @@ static inline void util_est_dequeue(struct cfs_rq *cfs_rq,
trace_sched_util_est_cfs_tp(cfs_rq);
}
#ifdef CONFIG_GROUP_BALANCER
void util_est_reenqueue_all(void)
{
int cpu;
struct rq *rq;
struct rq_flags rf;
struct cfs_rq *cfs_rq;
struct sched_entity *se;
struct task_struct *p;
cpus_read_lock();
for_each_online_cpu(cpu) {
rq = cpu_rq(cpu);
rq_lock_irqsave(rq, &rf);
list_for_each_entry(p, &rq->cfs_tasks, se.group_node) {
se = &p->se;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
if (cfs_rq != &rq->cfs)
util_est_enqueue(cfs_rq, p);
}
}
rq->group_balancer_enabled = true;
rq_unlock_irqrestore(rq, &rf);
}
cpus_read_unlock();
}
static int tg_util_est_clear_down(struct task_group *tg, void *data)
{
int cpu;
struct rq *rq;
struct rq_flags rf;
struct cfs_rq *cfs_rq;
if (tg == &root_task_group)
return 0;
cpus_read_lock();
for_each_online_cpu(cpu) {
rq = cpu_rq(cpu);
cfs_rq = tg->cfs_rq[cpu];
rq_lock_irqsave(rq, &rf);
WRITE_ONCE(cfs_rq->avg.util_est.enqueued, 0);
rq->group_balancer_enabled = false;
rq_unlock_irqrestore(rq, &rf);
}
cpus_read_unlock();
return 0;
}
void util_est_clear_all(void)
{
walk_tg_tree_from(&root_task_group, tg_util_est_clear_down, tg_nop, NULL);
}
#endif
#define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
/*
@ -8221,13 +8279,16 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* Let's add the task's estimated utilization to the cfs_rq's
* estimated utilization, before we update schedutil.
*/
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
util_est_enqueue(cfs_rq, p);
if (group_balancer_rq_enabled(rq)) {
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
util_est_enqueue(cfs_rq, p);
}
se = &p->se;
} else {
util_est_enqueue(&rq->cfs, p);
}
se = &p->se;
/*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
@ -8351,13 +8412,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int idle_h_nr_running = task_has_idle_policy(p);
bool was_sched_idle = sched_idle_rq(rq);
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
util_est_dequeue(cfs_rq, p);
if (group_balancer_rq_enabled(rq)) {
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
util_est_dequeue(cfs_rq, p);
}
se = &p->se;
} else {
util_est_dequeue(&rq->cfs, p);
}
se = &p->se;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);

View File

@ -1542,6 +1542,7 @@ struct rq {
#ifdef CONFIG_GROUP_BALANCER
struct group_balancer_sched_domain *gb_sd;
bool group_balancer_enabled;
#endif
CK_KABI_RESERVE(1)
@ -3705,6 +3706,7 @@ extern void sched_dynamic_update(int mode);
#ifdef CONFIG_GROUP_BALANCER
extern bool group_balancer_enabled(void);
extern bool group_balancer_rq_enabled(struct rq *rq);
static inline const struct cpumask *task_allowed_cpu(struct task_struct *p)
{
if (group_balancer_enabled()) {
@ -3746,7 +3748,10 @@ extern unsigned long cfs_h_load(struct cfs_rq *cfs_rq);
extern bool gb_cpu_overutilized(int cpu);
extern void gb_load_balance(struct lb_env *env);
extern void task_tick_gb(struct task_struct *p);
extern void util_est_reenqueue_all(void);
extern void util_est_clear_all(void);
#else
static inline bool group_balancer_rq_enabled(struct rq *rq) { return false; }
static inline const struct cpumask *task_allowed_cpu(struct task_struct *p)
{
return p->cpus_ptr;