anolis: sched: support cpu.exstat

ANBZ: #11863

Use cgroup rstat as cpuusage data source.

Signed-off-by: Yi Tao <escape@linux.alibaba.com>
Acked-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Link: https://gitee.com/anolis/cloud-kernel/pulls/4121
This commit is contained in:
Yi Tao 2024-11-12 19:03:04 +08:00 committed by 小龙
parent e725cd87e9
commit d980cb19ad
6 changed files with 219 additions and 92 deletions

View File

@ -781,6 +781,9 @@ void cgroup_rstat_flush(struct cgroup *cgrp);
void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
void cgroup_rstat_flush_hold(struct cgroup *cgrp);
void cgroup_rstat_flush_release(void);
bool cgroup_on_dfl(const struct cgroup *cgrp);
void __cgroup_get_usage(struct cgroup *cgrp, int cpu,
struct cpuacct_usage_result *res);
/*
* Basic resource stats.

View File

@ -222,7 +222,6 @@ static inline void get_css_set(struct css_set *cset)
}
bool cgroup_ssid_enabled(int ssid);
bool cgroup_on_dfl(const struct cgroup *cgrp);
bool cgroup_is_thread_root(struct cgroup *cgrp);
bool cgroup_is_threaded(struct cgroup *cgrp);

View File

@ -562,3 +562,17 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
seq_printf(seq, "sibidle_task_usec %llu\n", sibidle_task_time);
#endif
}
void __cgroup_get_usage(struct cgroup *cgrp, int cpu,
struct cpuacct_usage_result *res)
{
struct cgroup_rstat_cpu *rstatc;
cgroup_rstat_flush_hold(cgrp);
rstatc = cgroup_rstat_cpu(cgrp, cpu);
res->user = rstatc->bstat.cputime.utime;
res->system = rstatc->bstat.cputime.stime;
cgroup_rstat_flush_release();
}

View File

@ -10684,6 +10684,144 @@ out_show:
return 0;
}
void __cgroup_get_usage_result(struct cgroup_subsys_state *css, int cpu,
struct cpuacct_usage_result *res)
{
struct sched_entity *se;
struct cgroup *cgrp = css->cgroup;
struct task_group *tg = cgroup_tg(cgrp);
memset(res, 0, sizeof(*res));
if (unlikely(!tg))
return;
if (cgroup_on_dfl(cgrp))
__cgroup_get_usage(cgrp, cpu, res);
else
__cpuacct_get_usage(css, cpu, res);
se = tg->se[cpu];
if (se && schedstat_enabled()) {
unsigned int seq;
unsigned long flags;
u64 idle_start, ineff, ineff_start, elapse, complement;
u64 clock, iowait_start;
do {
seq = read_seqcount_begin(&se->idle_seqcount);
res->idle = schedstat_val(se->cg_idle_sum);
idle_start = schedstat_val(se->cg_idle_start);
clock = cpu_clock(cpu);
if (idle_start && clock > idle_start)
res->idle += clock - idle_start;
} while (read_seqcount_retry(&se->idle_seqcount, seq));
ineff = schedstat_val(se->cg_ineffective_sum);
ineff_start = schedstat_val(se->cg_ineffective_start);
if (ineff_start)
__schedstat_add(ineff, clock - ineff_start);
spin_lock_irqsave(&se->iowait_lock, flags);
res->iowait = schedstat_val(se->cg_iowait_sum);
iowait_start = schedstat_val(se->cg_iowait_start);
if (iowait_start)
__schedstat_add(res->iowait, clock - iowait_start);
spin_unlock_irqrestore(&se->iowait_lock, flags);
res->steal = 0;
elapse = clock - schedstat_val(se->cg_init_time);
complement = res->idle + se->sum_exec_runtime + ineff;
if (elapse > complement)
res->steal = elapse - complement;
res->idle -= res->iowait;
} else {
res->idle = res->iowait = res->steal = 0;
}
}
static int cpu_exstat_show(struct seq_file *sf, void *v)
{
struct cgroup_subsys_state *css = seq_css(sf);
struct task_group *tg = css_tg(css);
u64 user, nice, system, idle, iowait, irq, softirq, steal, guest;
u64 nr_migrations = 0;
struct cpu_alistats *alistats;
unsigned long load, avnrun[3], avnrun_r[3];
unsigned long nr_run = 0, nr_uninter = 0;
int cpu;
struct cpuacct_usage_result res;
user = nice = system = idle = iowait =
irq = softirq = steal = guest = 0;
for_each_possible_cpu(cpu) {
if (!housekeeping_cpu(cpu, HK_FLAG_DOMAIN))
continue;
rcu_read_lock();
__cgroup_get_usage_result(css, cpu, &res);
rcu_read_unlock();
user += res.user;
nice += res.nice;
system += res.system;
irq += res.irq;
softirq += res.softirq;
steal += res.steal;
guest += res.guest;
guest += res.guest_nice;
iowait += res.iowait;
idle += res.idle;
alistats = per_cpu_ptr(tg->alistats, cpu);
nr_migrations += alistats->nr_migrations;
nr_run += tg_running(tg, cpu);
nr_uninter += tg_uninterruptible(tg, cpu);
}
__get_cgroup_avenrun(tg, avnrun, FIXED_1/200, 0, false);
__get_cgroup_avenrun(tg, avnrun_r, FIXED_1/200, 0, true);
seq_printf(sf, "user %lld\n", nsec_to_clock_t(user));
seq_printf(sf, "nice %lld\n", nsec_to_clock_t(nice));
seq_printf(sf, "system %lld\n", nsec_to_clock_t(system));
seq_printf(sf, "idle %lld\n", nsec_to_clock_t(idle));
seq_printf(sf, "iowait %lld\n", nsec_to_clock_t(iowait));
seq_printf(sf, "irq %lld\n", nsec_to_clock_t(irq));
seq_printf(sf, "softirq %lld\n", nsec_to_clock_t(softirq));
seq_printf(sf, "steal %lld\n", nsec_to_clock_t(steal));
seq_printf(sf, "guest %lld\n", nsec_to_clock_t(guest));
load = LOAD_INT(avnrun[0]) * 100 + LOAD_FRAC(avnrun[0]);
seq_printf(sf, "load average(1min) %lld\n", (u64)load);
load = LOAD_INT(avnrun[1]) * 100 + LOAD_FRAC(avnrun[1]);
seq_printf(sf, "load average(5min) %lld\n", (u64)load);
load = LOAD_INT(avnrun[2]) * 100 + LOAD_FRAC(avnrun[2]);
seq_printf(sf, "load average(15min) %lld\n", (u64)load);
seq_printf(sf, "nr_running %lld\n", (u64)nr_run);
if ((long) nr_uninter < 0)
nr_uninter = 0;
seq_printf(sf, "nr_uninterruptible %lld\n", (u64)nr_uninter);
seq_printf(sf, "nr_migrations %lld\n", (u64)nr_migrations);
load = LOAD_INT(avnrun_r[0]) * 100 + LOAD_FRAC(avnrun_r[0]);
seq_printf(sf, "running load average(1min) %lld\n", (u64)load);
load = LOAD_INT(avnrun_r[1]) * 100 + LOAD_FRAC(avnrun_r[1]);
seq_printf(sf, "running load average(5min) %lld\n", (u64)load);
load = LOAD_INT(avnrun_r[2]) * 100 + LOAD_FRAC(avnrun_r[2]);
seq_printf(sf, "running load average(15min) %lld\n", (u64)load);
return 0;
}
#endif
static struct cftype cpu_files[] = {
@ -10812,6 +10950,18 @@ static struct cftype cpu_files[] = {
.write_u64 = sched_lat_stat_write,
.seq_show = sched_lat_stat_show
},
{
.name = "exstat",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cpu_exstat_show,
},
{
.name = "enable_sli",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = enable_sli_read,
.write_u64 = enable_sli_write
},
#endif
#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_CFS_BANDWIDTH)
{
@ -10843,11 +10993,23 @@ static struct cftype cpu_files[] = {
{ } /* terminate */
};
#ifdef CONFIG_SCHED_SLI
static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
tg_enable_sli(tg, false);
}
#endif
struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc,
.css_online = cpu_cgroup_css_online,
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
#ifdef CONFIG_SCHED_SLI
.css_offline = cpu_cgroup_css_offline,
#endif
.css_extra_stat_show = cpu_extra_stat_show,
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,

View File

@ -115,7 +115,7 @@ void task_cpu_increase_nr_migrations(struct task_struct *tsk)
static DEFINE_SPINLOCK(sli_tg_lock);
LIST_HEAD(sli_tg_list);
static void tg_enable_sli(struct task_group *tg, bool val)
void tg_enable_sli(struct task_group *tg, bool val)
{
spin_lock(&sli_tg_lock);
if (val && !READ_ONCE(tg->sli_enabled))
@ -146,7 +146,7 @@ void create_rich_container_reaper(struct task_struct *tsk)
}
}
static int enable_sli_write(struct cgroup_subsys_state *css,
int enable_sli_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct task_group *tg;
@ -162,7 +162,7 @@ static int enable_sli_write(struct cgroup_subsys_state *css,
return 0;
}
static u64 enable_sli_read(struct cgroup_subsys_state *css, struct cftype *cft)
u64 enable_sli_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct task_group *tg;
@ -215,21 +215,6 @@ out:
return ERR_PTR(-ENOMEM);
}
#ifdef CONFIG_SCHED_SLI
static void cpuacct_css_offline(struct cgroup_subsys_state *css)
{
struct task_group *tg;
tg = cgroup_tg(css->cgroup);
if (unlikely(!tg)) {
WARN_ONCE(1, "cgroup \"cpu,cpuacct\" are not bound together");
return;
}
tg_enable_sli(tg, false);
}
#endif
/* Destroy an existing CPU accounting group */
static void cpuacct_css_free(struct cgroup_subsys_state *css)
{
@ -439,8 +424,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
}
#ifdef CONFIG_SCHED_SLI
static unsigned long tg_running(struct task_group *tg, int cpu);
static void __get_cgroup_avenrun(struct task_group *tg, unsigned long *loads,
void __get_cgroup_avenrun(struct task_group *tg, unsigned long *loads,
unsigned long offset, int shift, bool running)
{
unsigned long *avenrun;
@ -491,7 +475,7 @@ static inline bool tg_rt_throttled(struct task_group *tg, int cpu)
}
#endif
static unsigned long tg_running(struct task_group *tg, int cpu)
unsigned long tg_running(struct task_group *tg, int cpu)
{
unsigned long nr_running = 0;
/* Make sure it is only called for non-root cpuacct */
@ -511,7 +495,7 @@ static unsigned long tg_running(struct task_group *tg, int cpu)
return nr_running;
}
static unsigned long tg_uninterruptible(struct task_group *tg, int cpu)
unsigned long tg_uninterruptible(struct task_group *tg, int cpu)
{
unsigned long nr = 0;
@ -771,23 +755,22 @@ void calc_cgroup_load(void)
rcu_read_unlock();
}
static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
struct task_group *tg, struct cpuacct_usage_result *res)
void __cpuacct_get_usage(struct cgroup_subsys_state *css, int cpu,
struct cpuacct_usage_result *res)
{
struct cpuacct *ca;
struct kernel_cpustat *kcpustat;
struct cpuacct_usage *cpuusage;
struct task_cputime cputime;
u64 tick_user, tick_nice, tick_sys, tick_irq, tick_softirq;
u64 left, right, left2, right2;
struct sched_entity *se;
ca = cgroup_ca(css->cgroup);
if (!ca)
return;
kcpustat = per_cpu_ptr(ca->cpustat, cpu);
if (unlikely(!tg)) {
memset(res, 0, sizeof(*res));
return;
}
se = tg->se[cpu];
cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
tick_user = kcpustat->cpustat[CPUTIME_USER];
tick_nice = kcpustat->cpustat[CPUTIME_NICE];
@ -825,53 +808,15 @@ static void __cpuacct_get_usage_result(struct cpuacct *ca, int cpu,
res->irq = left;
res->softirq = right;
if (se && schedstat_enabled()) {
unsigned int seq;
unsigned long flags;
u64 idle_start, ineff, ineff_start, elapse, complement;
u64 clock, iowait_start;
do {
seq = read_seqcount_begin(&se->idle_seqcount);
res->idle = schedstat_val(se->cg_idle_sum);
idle_start = schedstat_val(se->cg_idle_start);
clock = cpu_clock(cpu);
if (idle_start && clock > idle_start)
res->idle += clock - idle_start;
} while (read_seqcount_retry(&se->idle_seqcount, seq));
ineff = schedstat_val(se->cg_ineffective_sum);
ineff_start = schedstat_val(se->cg_ineffective_start);
if (ineff_start)
__schedstat_add(ineff, clock - ineff_start);
spin_lock_irqsave(&se->iowait_lock, flags);
res->iowait = schedstat_val(se->cg_iowait_sum);
iowait_start = schedstat_val(se->cg_iowait_start);
if (iowait_start)
__schedstat_add(res->iowait, clock - iowait_start);
spin_unlock_irqrestore(&se->iowait_lock, flags);
res->steal = 0;
elapse = clock - schedstat_val(se->cg_init_time);
complement = res->idle + se->sum_exec_runtime + ineff;
if (elapse > complement)
res->steal = elapse - complement;
res->idle -= res->iowait;
} else {
res->idle = res->iowait = res->steal = 0;
}
res->guest = kcpustat->cpustat[CPUTIME_GUEST];
res->guest_nice = kcpustat->cpustat[CPUTIME_GUEST_NICE];
}
static int cpuacct_proc_stats_show(struct seq_file *sf, void *v)
{
struct cpuacct *ca = css_ca(seq_css(sf));
struct cgroup *cgrp = seq_css(sf)->cgroup;
struct cgroup_subsys_state *css = seq_css(sf);
struct cpuacct *ca = css_ca(css);
struct cgroup *cgrp = css->cgroup;
struct task_group *tg;
u64 user, nice, system, idle, iowait, irq, softirq, steal, guest;
u64 nr_migrations = 0;
@ -897,8 +842,7 @@ static int cpuacct_proc_stats_show(struct seq_file *sf, void *v)
continue;
rcu_read_lock();
__cpuacct_get_usage_result(ca, cpu,
cgroup_tg(cgrp), &res);
__cgroup_get_usage_result(css, cpu, &res);
rcu_read_unlock();
user += res.user;
@ -1165,9 +1109,6 @@ static void cpuacct_cgroup_attach(struct cgroup_taskset *tset)
struct cgroup_subsys cpuacct_cgrp_subsys = {
.css_alloc = cpuacct_css_alloc,
.css_free = cpuacct_css_free,
#ifdef CONFIG_SCHED_SLI
.css_offline = cpuacct_css_offline,
#endif
.attach = cpuacct_cgroup_attach,
.legacy_cftypes = files,
.early_init = true,
@ -1506,41 +1447,35 @@ void rich_container_get_usage(enum rich_container_source from,
struct cpuacct_usage_result *res)
{
struct cgroup_subsys_state *css;
struct cpuacct *ca_src;
struct task_group *tg;
rcu_read_lock();
/* To avoid iterating css for every cpu */
if (likely(from == RICH_CONTAINER_REAPER)) {
ca_src = task_ca(reaper);
css = task_css(reaper, cpu_cgrp_id);
goto ok;
} else if (from == RICH_CONTAINER_CURRENT) {
ca_src = task_ca(current);
css = task_css(current, cpu_cgrp_id);
goto ok;
} else if (from == RICH_CONTAINER_PARENT_CGROUP) {
css = task_css(current, cpuacct_cgrp_id)->parent;
css = task_css(current, cpu_cgrp_id)->parent;
if (!css)
ca_src = task_ca(current);
else
ca_src = css_ca(css);
css = task_css(current, cpu_cgrp_id);
goto ok;
}
css = task_css(current, cpuacct_cgrp_id);
css = task_css(current, cpu_cgrp_id);
while (css) {
if (test_bit(CGRP_RICH_CONTAINER_SOURCE, &css->cgroup->flags))
break;
css = css->parent;
}
if (css)
ca_src = css_ca(css);
else
ca_src = task_ca(reaper);
if (!css)
css = task_css(reaper, cpu_cgrp_id);
ok:
tg = cgroup_tg(ca_src->css.cgroup);
__cpuacct_get_usage_result(ca_src, cpu, tg, res);
__cgroup_get_usage_result(css, cpu, res);
rcu_read_unlock();
}

View File

@ -3434,6 +3434,17 @@ struct task_group *cgroup_tg(struct cgroup *cgrp);
int sched_lat_stat_show(struct seq_file *sf, void *v);
int sched_lat_stat_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val);
void __get_cgroup_avenrun(struct task_group *tg, unsigned long *loads,
unsigned long offset, int shift, bool running);
unsigned long tg_running(struct task_group *tg, int cpu);
unsigned long tg_uninterruptible(struct task_group *tg, int cpu);
int enable_sli_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val);
u64 enable_sli_read(struct cgroup_subsys_state *css, struct cftype *cft);
void tg_enable_sli(struct task_group *tg, bool val);
void __cgroup_get_usage_result(struct cgroup_subsys_state *css, int cpu,
struct cpuacct_usage_result *res);
#else
static inline void task_cpu_increase_nr_migrations(struct task_struct *tsk) { }
static inline void cpu_update_latency(struct sched_entity *se,
@ -3447,6 +3458,9 @@ static inline bool async_load_calc_enabled(void)
}
#endif
void __cpuacct_get_usage(struct cgroup_subsys_state *css, int cpu,
struct cpuacct_usage_result *res);
#ifdef CONFIG_PSI
#ifdef CONFIG_CGROUPS
extern struct cftype cgroup_v1_psi_files[];