anolis: sched: introduce ID_BOOK_CPU

ANBZ: #22143

In high-concurrency scenarios, highclass tasks often select the same
idle CPU when waking up, which can cause scheduling delays. The reason
is that To reduce
the scheduling delays caused by this situation, we introduce
ID_BOOK_CPU:

When highclass task selecting idle cpu, check whether it has been
booked by other tasks and whether it's still idle before we select it
to wake up. If the idle cpu we found has been booked by other tasks,
select again, until book the idle cpu successfully or reach the retry
limit. If the idle cpu hasn't been booked by any other, make rq->booked
true to mark that the cpu is booked, and make rq->booked false after the
highclass task actually enqueues into the rq of the cpu.

To Enable ID_BOOK_CPU,
echo ID_BOOK_CPU > /sys/kernel/debug/sched_features.

To set retry limit, modify /proc/sys/kernel/sched_id_book_cpu_nr_tries.

Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
Reviewed-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/5455
This commit is contained in:
Cruz Zhao 2025-02-10 17:28:16 +08:00
parent f55326b5b3
commit ad071c796e
6 changed files with 90 additions and 1 deletions

View File

@ -80,6 +80,7 @@ extern unsigned int sysctl_sched_cfs_bw_burst_enabled;
#ifdef CONFIG_GROUP_IDENTITY
extern unsigned int sysctl_sched_bvt_place_epsilon;
extern unsigned int sysctl_sched_idle_saver_wmark;
extern unsigned int sysctl_sched_id_book_cpu_nr_tries;
extern unsigned int sysctl_sched_group_indentity_enabled;
extern int sched_group_identity_enable_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,

View File

@ -8657,6 +8657,7 @@ void __init sched_init(void)
rq->expel_start = rq->clock;
rq->expel_sum = 0;
seqcount_init(&rq->expel_seq);
rq->booked = false;
#endif
#ifdef CONFIG_SCHED_CORE

View File

@ -157,6 +157,7 @@ unsigned int sysctl_sched_bvt_place_epsilon = 1000000UL;
* Default: 0 msec, units: nanoseconds
*/
unsigned int sysctl_sched_idle_saver_wmark;
unsigned int sysctl_sched_id_book_cpu_nr_tries = 5;
#ifdef CONFIG_SCHED_SMT
/*
@ -1008,6 +1009,50 @@ static inline u64 get_avg_idle(struct rq *rq)
return rq->avg_idle;
}
DEFINE_PER_CPU(bool, has_id_idle_cpu);
static inline bool found_id_idle_cpu(void)
{
if (group_identity_disabled())
return false;
if (sched_feat(ID_BOOK_CPU))
return this_cpu_read(has_id_idle_cpu);
return false;
}
static inline void set_has_id_idle_cpu(bool has)
{
if (group_identity_disabled())
return;
if (sched_feat(ID_BOOK_CPU))
this_cpu_write(has_id_idle_cpu, has);
}
static inline bool rq_booked(struct rq *rq)
{
if (!group_identity_enabled(rq))
return false;
if (sched_feat(ID_BOOK_CPU))
return rq->booked;
return false;
}
static inline void set_rq_booked(struct rq *rq, bool booked)
{
if (!group_identity_enabled(rq))
return;
if (sched_feat(ID_BOOK_CPU))
rq->booked = booked;
}
static inline int get_id_book_cpu_nr_tries(void)
{
if (group_identity_disabled())
return 0;
if (sched_feat(ID_BOOK_CPU))
return sysctl_sched_id_book_cpu_nr_tries;
return 0;
}
static noinline bool
id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle)
{
@ -1054,6 +1099,9 @@ id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle)
if (need_expel)
return false;
if (is_highclass_task(p) && rq_booked(rq))
return false;
/* CPU full of underclass is idle for highclass */
if (!is_idle) {
/*
@ -2324,6 +2372,12 @@ id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle)
return is_idle;
}
static inline bool found_id_idle_cpu(void) { return false; }
static inline void set_has_id_idle_cpu(bool has) { }
static inline bool rq_booked(struct rq *rq) { return false; }
static inline void set_rq_booked(struct rq *rq, bool booked) { }
static inline int get_id_book_cpu_nr_tries(void) { return 0; }
static inline void identity_init_cfs_rq(struct cfs_rq *cfs_rq)
{
}
@ -8276,6 +8330,9 @@ enqueue_throttle:
assert_list_leaf_cfs_rq(rq);
hrtick_update(rq);
if (is_highclass_task(p))
set_rq_booked(rq, false);
}
static void set_next_buddy(struct sched_entity *se);
@ -9156,6 +9213,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if ((unsigned int)recent_used_cpu < nr_cpumask_bits)
return recent_used_cpu;
set_has_id_idle_cpu(false);
return target;
}
@ -9571,6 +9629,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
int new_cpu = prev_cpu;
int want_affine = 0;
int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
struct rq *rq;
struct rq_flags rf;
/* Endow LS task the ability to balance at fork */
if (is_highclass_task(p) && (sd_flag & SD_BALANCE_FORK))
@ -9619,9 +9679,27 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
/* Slow path */
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
int nr_tries = get_id_book_cpu_nr_tries();
/* Fast path */
select:
set_has_id_idle_cpu(true);
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
if (is_highclass_task(p) && found_id_idle_cpu()) {
rq = cpu_rq(new_cpu);
rq_lock(rq, &rf);
if (!id_idle_cpu(p, new_cpu, false, NULL)) {
if (nr_tries > 0) {
nr_tries--;
rq_unlock(rq, &rf);
goto select;
} else {
rq_unlock(rq, &rf);
}
} else {
set_rq_booked(rq, true);
rq_unlock(rq, &rf);
}
}
if (want_affine)
current->recent_used_cpu = cpu;

View File

@ -107,6 +107,7 @@ SCHED_FEAT(ID_EXPELLER_SHARE_CORE, true)
SCHED_FEAT(ID_ABSOLUTE_EXPEL, false)
SCHED_FEAT(ID_LOAD_BALANCE, false)
SCHED_FEAT(ID_PUSH_EXPELLEE, false)
SCHED_FEAT(ID_BOOK_CPU, false)
#endif
#ifdef CONFIG_SCHED_CORE

View File

@ -1340,6 +1340,7 @@ struct rq {
u64 under_exec_sum;
u64 under_exec_stamp;
u64 avg_id_idle;
bool booked;
#ifdef CONFIG_SCHED_SMT
unsigned long next_expel_ib;
unsigned long next_expel_update;

View File

@ -2032,6 +2032,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_id_book_cpu_nr_tries",
.data = &sysctl_sched_id_book_cpu_nr_tries,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_expel_update_interval",
.data = &sysctl_sched_expel_update_interval,