ck: UKFEF: unified kernel fault event framework

to #34868789

There are various kernel errors/warnnings, if users want to know
the effect with a error or warning, they should see the detail of
kernel. It is difficult for users to handle so much things.

This patch classify kernel fault events, divide events into its own
module such as sched, mem, io, net, etc. At the same time,
to report the effect class of the current fault event.
There are three effect classes:
Slight - just a jitter, every thing could works.
Normal - the current task may have exception.
Fatal  - system may be unstable.

Accordding to these information, users can easily choose a suitable
action to ensure the reliability.

This feature also can be used for checking the side effect of a
system-change.

This feature can be enabled/disabled via
/proc/sys/kernel/fault_event_enable.

Signed-off-by: Wetp Zhang <wetp.zy@linux.alibaba.com>
Signed-off-by: Meng Shen <shenmeng@linux.alibaba.com>
Acked-by: Xunlei Pang <xlpang@linux.alibaba.com>
This commit is contained in:
Meng Shen 2021-07-15 17:39:45 +08:00 committed by Qiao Ma
parent 3f3c1d3fdf
commit d2ffdd5d83
3 changed files with 246 additions and 0 deletions

View File

@ -0,0 +1,47 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FAULT_EVENT_H
#define _FAULT_EVENT_H
#include <linux/sched.h>
enum FAULT_CLASS {
SLIGHT_FAULT,
NORMAL_FAULT,
FATAL_FAULT,
FAULT_CLASSS_MAX
};
enum FAULT_EVENT {
/*kernel fault events*/
FE_SOFTLOCKUP,
FE_RCUSTALL,
FE_HUNGTASK,
FE_OOM_GLOBAL,
FE_OOM_CGROUP,
FE_ALLOCFAIL,
FE_LIST_CORRUPT,
FE_MM_STATE,
FE_IO_ERR,
FE_EXT4_ERR,
FE_MCE,
FE_SIGNAL,
FE_WARN,
FE_PANIC,
FE_MAX
};
struct fault_event {
enum FAULT_EVENT type;
char *name;
char *module;
atomic_t count;
};
extern unsigned int sysctl_fault_event_enable;
extern unsigned int sysctl_fault_event_print;
extern unsigned int sysctl_panic_on_fatal_event;
extern bool fault_monitor_enable(void);
extern void report_fault_event(int cpu, struct task_struct *tsk,
enum FAULT_CLASS class, enum FAULT_EVENT event,
const char *msg);
#endif

View File

@ -31,6 +31,9 @@
#include <linux/bug.h>
#include <linux/ratelimit.h>
#include <linux/debugfs.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/fault_event.h>
#include <asm/sections.h>
#define PANIC_TIMER_STEP 100
@ -568,6 +571,172 @@ void oops_exit(void)
kmsg_dump(KMSG_DUMP_OOPS);
}
unsigned int sysctl_fault_event_enable = 1;
unsigned int sysctl_fault_event_print;
unsigned int sysctl_panic_on_fatal_event;
static atomic_t tot_fault_cnt;
static atomic_t class_fault_cnt[FAULT_CLASSS_MAX];
static char *fault_class_name[FAULT_CLASSS_MAX] = {
"Slight",
"Normal",
"Fatal"
};
static struct fault_event fevents[FE_MAX] = {
{FE_SOFTLOCKUP, "soft lockup", "general", {0} },
{FE_RCUSTALL, "rcu stall", "general", {0} },
{FE_HUNGTASK, "hung task", "general", {0} },
{FE_OOM_GLOBAL, "global oom", "mem", {0} },
{FE_OOM_CGROUP, "cgroup oom", "mem", {0} },
{FE_ALLOCFAIL, "alloc failed", "mem", {0} },
{FE_LIST_CORRUPT, "list corruption", "general", {0} },
{FE_MM_STATE, "bad mm_struct", "mem", {0} },
{FE_IO_ERR, "io error", "io", {0} },
{FE_EXT4_ERR, "ext4 fs error", "fs", {0} },
{FE_MCE, "mce", "hardware", {0} },
{FE_SIGNAL, "fatal signal", "general", {0} },
{FE_WARN, "warning", "general", {0} },
{FE_PANIC, "panic", "general", {0} },
};
bool fault_monitor_enable(void)
{
return sysctl_fault_event_enable;
}
static const char *get_task_cmdline(struct task_struct *tsk, char *buff,
int size)
{
struct mm_struct *mm;
char *p = buff, c;
int i, len, count = 0;
if (!tsk)
return "nil";
if (tsk->tgid != current->tgid || !tsk->mm
|| (tsk->flags & PF_KTHREAD))
goto use_comm;
mm = tsk->mm;
len = mm->arg_end - mm->arg_start;
len = min(len, size);
if (len <= 0)
goto use_comm;
if (__copy_from_user_inatomic(p, (void *)mm->arg_start, len))
goto use_comm;
if (__copy_from_user_inatomic(&c, (void *)(mm->arg_end - 1), 1))
goto use_comm;
count += len;
if (c == '\0' || len == size)
goto out;
p = buff + len;
len = mm->env_end - mm->env_start;
len = min(len, size - count);
if (len <= 0)
goto out;
if (!__copy_from_user_inatomic(p, (void *)mm->env_start, len))
count += len;
out:
for (i = 0; i < count-1; i++) {
if (buff[i] == '\0')
buff[i] = ' ';
}
buff[count - 1] = '\0';
return buff;
use_comm:
return tsk->comm;
}
void report_fault_event(int cpu, struct task_struct *tsk,
enum FAULT_CLASS class, enum FAULT_EVENT event,
const char *msg)
{
unsigned int evt_cnt;
char tsk_cmdline[256];
if (!sysctl_fault_event_enable)
return;
if (class >= FAULT_CLASSS_MAX || event >= FE_MAX)
return;
evt_cnt = atomic_inc_return(&fevents[event].count);
atomic_inc(&class_fault_cnt[class]);
atomic_inc(&tot_fault_cnt);
if (!sysctl_fault_event_print)
goto may_panic;
printk_ratelimited(KERN_EMERG "%s fault event[%s:%s]: %s. "
"At cpu %d task %d(%s). Total: %d\n",
fault_class_name[class], fevents[event].module,
fevents[event].name, msg ? msg : "", cpu,
tsk ? tsk->pid : -1,
get_task_cmdline(tsk, tsk_cmdline, 256), evt_cnt);
may_panic:
if (sysctl_panic_on_fatal_event && class == FATAL_FAULT &&
event != FE_PANIC) {
sysctl_fault_event_enable = false;
panic("kernel fault event");
}
}
EXPORT_SYMBOL(report_fault_event);
static int fault_events_show(struct seq_file *m, void *v)
{
unsigned int evt_cnt, class_cnt, total;
int i;
total = atomic_read(&tot_fault_cnt);
seq_printf(m, "\nTotal fault events: %d\n\n", total);
for (i = 0; i < FAULT_CLASSS_MAX; i++) {
class_cnt = atomic_read(&class_fault_cnt[i]);
seq_printf(m, "%s: %d\n", fault_class_name[i],
class_cnt);
}
seq_puts(m, "\n");
for (i = 0; i < FE_MAX; i++) {
evt_cnt = atomic_read(&fevents[i].count);
seq_printf(m, "%s: %d\n", fevents[i].name,
evt_cnt);
}
return 0;
}
static int fault_events_open(struct inode *inode, struct file *filp)
{
return single_open(filp, fault_events_show, NULL);
}
const struct proc_ops fault_events_fops = {
.proc_open = fault_events_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
static int fault_events_init(void)
{
proc_create("fault_events", 0644, NULL, &fault_events_fops);
return 0;
}
module_init(fault_events_init);
struct warn_args {
const char *fmt;
va_list args;

View File

@ -72,6 +72,7 @@
#include <linux/latencytop.h>
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <linux/fault_event.h>
#include "../lib/kstrtox.h"
@ -2868,6 +2869,35 @@ static struct ctl_table kern_table[] = {
},
#endif
#endif
{
.procname = "fault_event_enable",
.data = &sysctl_fault_event_enable,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#if defined CONFIG_PRINTK
{
.procname = "fault_event_print",
.data = &sysctl_fault_event_print,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
{
.procname = "panic_on_fatal_event",
.data = &sysctl_panic_on_fatal_event,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};