bpf: refact softirq and percpu metric

Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
This commit is contained in:
Tonghao Zhang 2025-07-13 05:52:28 -04:00
parent 6f91fb52b2
commit 42b2204de9
5 changed files with 164 additions and 235 deletions

View File

@ -24,4 +24,9 @@
#define COMPAT_BPF_EXIST 2 /* update existing element */
#define COMPAT_BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
#define NR_SOFTIRQS_MAX 16
#define NSEC_PER_MSEC 1000000UL
#define NSEC_PER_USEC 1000UL
#endif /* __BPF_COMMON_H__ */

View File

@ -6,10 +6,6 @@
#include "bpf_common.h"
#define NSEC_PER_MSEC 1000000UL
#define NSEC_PER_USEC 1000UL
#define NR_SOFTIRQS_MAX 16 // must be 2^order
enum lat_zone {
LAT_ZONE0 = 0, // 0 ~ 10us
LAT_ZONE1, // 10us ~ 100us
@ -23,84 +19,58 @@ struct tp_softirq {
unsigned int vec;
};
// Because bpf access array is strictly checked,
// the size of the array must be aligned in order
// of 2, so we should not use NR_SOFTIRQS, but
// use NR_SOFTIRQS_MAX as the size of the array
struct softirq_lat {
u64 silat[NR_SOFTIRQS_MAX][LAT_ZONE_MAX];
u64 timestamp;
u64 total_latency[LAT_ZONE_MAX];
};
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
// key -> NR_SOFTIRQS
__type(key, u32);
// value -> ts, record softirq_raise start time
__type(value, u64);
__uint(max_entries, NR_SOFTIRQS);
} silat_map SEC(".maps"); // softirq latency map
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct softirq_lat));
__uint(max_entries, 1);
} softirq_lats SEC(".maps");
__uint(max_entries, NR_SOFTIRQS_MAX);
} softirq_percpu_lats SEC(".maps");
SEC("tracepoint/irq/softirq_raise")
void probe_softirq_raise(struct tp_softirq *ctx)
int probe_softirq_raise(struct tp_softirq *ctx)
{
u32 nr;
u64 now;
nr = ctx->vec;
struct softirq_lat lat = {
.timestamp = bpf_ktime_get_ns(),
};
u32 vec = ctx->vec;
now = bpf_ktime_get_ns();
bpf_map_update_elem(&silat_map, &nr, &now, COMPAT_BPF_ANY);
}
if (vec >= NR_SOFTIRQS)
return 0;
static void calc_softirq_latency(struct softirq_lat *lat_mc, u32 nr, u64 now)
{
u64 lat, *ts;
ts = bpf_map_lookup_elem(&silat_map, &nr);
if (!ts)
return;
lat = now - *ts;
// update to metrics
if (lat < 10 * NSEC_PER_USEC) { // 10us
__sync_fetch_and_add(
&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE0], 1);
} else if (lat < 100 * NSEC_PER_USEC) { // 100us
__sync_fetch_and_add(
&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE1], 1);
} else if (lat < 1 * NSEC_PER_MSEC) { // 1ms
__sync_fetch_and_add(
&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE2], 1);
} else { // 1ms+
__sync_fetch_and_add(
&lat_mc->silat[nr & (NR_SOFTIRQS_MAX - 1)][LAT_ZONE3], 1);
}
bpf_map_update_elem(&softirq_percpu_lats, &vec, &lat, COMPAT_BPF_ANY);
return 0;
}
SEC("tracepoint/irq/softirq_entry")
void probe_softirq_entry(struct tp_softirq *ctx)
int probe_softirq_entry(struct tp_softirq *ctx)
{
u32 key = 0, nr;
u64 now;
struct softirq_lat *lat_mc;
struct softirq_lat *lat;
u32 vec = ctx->vec;
lat_mc = bpf_map_lookup_elem(&softirq_lats, &key);
if (!lat_mc)
return;
if (vec >= NR_SOFTIRQS)
return 0;
nr = ctx->vec;
lat = bpf_map_lookup_elem(&softirq_percpu_lats, &vec);
if (!lat)
return 0;
now = bpf_ktime_get_ns();
u64 latency = bpf_ktime_get_ns() - lat->timestamp;
// update softirq lat to lat metric
calc_softirq_latency(lat_mc, nr, now);
if (latency < 10 * NSEC_PER_USEC) {
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE0], 1);
} else if (latency < 100 * NSEC_PER_USEC) {
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE1], 1);
} else if (latency < 1 * NSEC_PER_MSEC) {
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE2], 1);
} else {
__sync_fetch_and_add(&lat->total_latency[LAT_ZONE3], 1);
}
return 0;
}
char __license[] SEC("license") = "Dual MIT/GPL";

View File

@ -121,7 +121,6 @@ int sched_wakeup_entry(struct sched_wakeup_new_args *ctx)
return trace_enqueue(ctx->pid);
}
#define NSEC_PER_MSEC 1000000L
SEC("raw_tracepoint/sched_switch")
int sched_switch_entry(struct bpf_raw_tracepoint_args *ctx)
{

View File

@ -1,136 +0,0 @@
// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"huatuo-bamai/pkg/metric"
)
const (
softirqHi = iota
softirqTime
softirqNetTx
softirqNetRx
softirqBlock
softirqIrqPoll
softirqTasklet
softirqSched
softirqHrtimer
sofirqRcu
softirqMax
)
const (
latZONE0 = iota // 0 ~ 10us
latZONE1 // 10us ~ 100us
latZONE2 // 100us ~ 1ms
latZONE3 // 1ms ~ inf
latZoneMax
)
const (
// HI:0x1
// TIMER:0x2
// NET_TX:0x4
// NET_RX:0x8
// BLOCK:0x10
// IRQ_POLL:0x20
// TASKLET:0x40
// SCHED:0x80
// HRTIMER:0x100
// RCU:0x200
// fullmask => 0x2ff
defaultSiTypeMask = 0x0c // default: only report NET_TX and NET_RX so far
// Because bpf access array is strictly checked,
// the size of the array must be aligned in order
// of 2, so we should not use softirqMax, but
// use softirqArrayMax as the size of the array
softirqArrayMax = 16 // must be 2^order
)
var monTracerIsRunning bool
func latZoneName(latZone int) string {
switch latZone {
case latZONE0: // 0 ~ 10us
return "0~10 us"
case latZONE1: // 10us ~ 100us
return "10us ~ 100us"
case latZONE2: // 100us ~ 1ms
return "100us ~ 1ms"
case latZONE3: // 1ms ~ inf
return "1ms ~ inf"
default:
return "ERR_ZONE"
}
}
func siTypeName(siType int) string {
switch siType {
case softirqHi:
return "HI"
case softirqTime:
return "TIMER"
case softirqNetTx:
return "NET_TX"
case softirqNetRx:
return "NET_RX"
case softirqBlock:
return "BLOCK"
case softirqIrqPoll:
return "IRQ_POLL"
case softirqTasklet:
return "TASKLET"
case softirqSched:
return "SCHED"
case softirqHrtimer:
return "HRTIMER"
case sofirqRcu:
return "RCU"
default:
return "ERR_TYPE"
}
}
func getMonsoftirqInfo() ([]*metric.Data, error) {
siLabel := make(map[string]string)
monsoftirqMetric := []*metric.Data{}
for siType, lats := range &monsoftirqData.SoftirqLat {
if (1<<siType)&defaultSiTypeMask == 0 {
continue
}
siLabel["softirqType"] = siTypeName(siType)
for zone, count := range lats {
siLabel["zone"] = latZoneName(zone)
monsoftirqMetric = append(monsoftirqMetric, metric.NewGaugeData("latency", float64(count), "softirq latency", siLabel))
}
}
return monsoftirqMetric, nil
}
func (c *monsoftirqTracing) Update() ([]*metric.Data, error) {
if !monTracerIsRunning {
return nil, nil
}
monsoftirqMetric, err := getMonsoftirqInfo()
if err != nil {
return nil, err
}
return monsoftirqMetric, nil
}

View File

@ -19,42 +19,148 @@ import (
"context"
"encoding/binary"
"fmt"
"time"
"strconv"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/utils/bpfutil"
"huatuo-bamai/pkg/metric"
"huatuo-bamai/pkg/tracing"
"github.com/tklauser/numcpus"
)
func init() {
tracing.RegisterEventTracing("monsoftirq", newSoftirqCollector)
tracing.RegisterEventTracing("softirq", newSoftirq)
}
func newSoftirqCollector() (*tracing.EventTracingAttr, error) {
func newSoftirq() (*tracing.EventTracingAttr, error) {
num, err := numcpus.GetPossible()
if err != nil {
return nil, fmt.Errorf("fetch possible cpu num")
}
return &tracing.EventTracingAttr{
TracingData: &monsoftirqTracing{},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
TracingData: &softirqLatency{
bpf: nil,
isRunning: false,
cpu: num,
},
Internal: 10,
Flag: tracing.FlagTracing | tracing.FlagMetric,
}, nil
}
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/monsoftirq_tracing.c -o $BPF_DIR/monsoftirq_tracing.o
type monsoftirqBpfData struct {
SoftirqLat [softirqArrayMax][latZoneMax]uint64
type softirqLatency struct {
bpf bpf.BPF
isRunning bool
cpu int
}
type monsoftirqTracing struct{}
type softirqLatencyData struct {
Timestamp uint64
TotalLatency [4]uint64
}
var monsoftirqData monsoftirqBpfData
const (
softirqHi = iota
softirqTime
softirqNetTx
softirqNetRx
softirqBlock
softirqIrqPoll
softirqTasklet
softirqSched
softirqHrtimer
sofirqRcu
softirqMax
)
// Start monsoftirq work, load bpf and wait data form perfevent
func (c *monsoftirqTracing) Start(ctx context.Context) error {
// load bpf.
func irqTypeName(id int) string {
switch id {
case softirqHi:
return "HI"
case softirqTime:
return "TIMER"
case softirqNetTx:
return "NET_TX"
case softirqNetRx:
return "NET_RX"
case softirqBlock:
return "BLOCK"
case softirqIrqPoll:
return "IRQ_POLL"
case softirqTasklet:
return "TASKLET"
case softirqSched:
return "SCHED"
case softirqHrtimer:
return "HRTIMER"
case sofirqRcu:
return "RCU"
default:
return "ERR_TYPE"
}
}
func irqAllowed(id int) bool {
switch id {
case softirqNetTx, softirqNetRx:
return true
default:
return false
}
}
func (s *softirqLatency) Update() ([]*metric.Data, error) {
if !s.isRunning {
return nil, nil
}
items, err := s.bpf.DumpMapByName("softirq_percpu_lats")
if err != nil {
return nil, fmt.Errorf("dump map: %w", err)
}
labels := make(map[string]string)
metricData := []*metric.Data{}
// IRQ: 0 ... NR_SOFTIRQS_MAX
for _, item := range items {
var irqVector uint32
latencyOnAllCPU := make([]softirqLatencyData, s.cpu)
if err = binary.Read(bytes.NewReader(item.Key), binary.LittleEndian, &irqVector); err != nil {
return nil, fmt.Errorf("read map key: %w", err)
}
if !irqAllowed(int(irqVector)) {
continue
}
if err = binary.Read(bytes.NewReader(item.Value), binary.LittleEndian, &latencyOnAllCPU); err != nil {
return nil, fmt.Errorf("read map value: %w", err)
}
labels["type"] = irqTypeName(int(irqVector))
for cpuid, lat := range latencyOnAllCPU {
labels["cpuid"] = strconv.Itoa(cpuid)
for zoneid, zone := range lat.TotalLatency {
labels["zone"] = strconv.Itoa(zoneid)
metricData = append(metricData, metric.NewGaugeData("latency", float64(zone), "softirq latency", labels))
}
}
}
return metricData, nil
}
func (s *softirqLatency) Start(ctx context.Context) error {
b, err := bpf.LoadBpf(bpfutil.ThisBpfOBJ(), nil)
if err != nil {
return fmt.Errorf("failed to LoadBpf, err: %w", err)
return err
}
defer b.Close()
@ -62,31 +168,16 @@ func (c *monsoftirqTracing) Start(ctx context.Context) error {
return err
}
s.bpf = b
s.isRunning = true
childCtx, cancel := context.WithCancel(ctx)
defer cancel()
b.WaitDetachByBreaker(childCtx, cancel)
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
<-childCtx.Done()
monTracerIsRunning = true
defer func() { monTracerIsRunning = false }()
for {
select {
case <-childCtx.Done():
return nil
case <-ticker.C:
item, err := b.ReadMap(b.MapIDByName("softirq_lats"), []byte{0, 0, 0, 0})
if err != nil {
return fmt.Errorf("failed to read softirq_lats: %w", err)
}
buf := bytes.NewReader(item)
if err = binary.Read(buf, binary.LittleEndian, &monsoftirqData); err != nil {
log.Errorf("can't read softirq_lats: %v", err)
return err
}
}
}
s.isRunning = false
return nil
}