huatuo/internal/pod/container_css_default.go

314 lines
7.1 KiB
Go

// Copyright 2025 The HuaTuo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !didi
package pod
import (
"bufio"
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"syscall"
"time"
"huatuo-bamai/internal/bpf"
"huatuo-bamai/internal/log"
"huatuo-bamai/internal/utils/cgrouputil"
"huatuo-bamai/pkg/types"
mapset "github.com/deckarep/golang-set"
)
// XXX go:generate go run -mod=mod github.com/cilium/ebpf/cmd/bpf2go -target amd64 cgroupCssGather $BPF_DIR/cgroup_css_gather.c -- $BPF_INCLUDE
// use the huatuo bpf framework:
//
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_gather.c -o $BPF_DIR/cgroup_css_gather.o
//go:generate $BPF_COMPILE $BPF_INCLUDE -s $BPF_DIR/cgroup_css_events.c -o $BPF_DIR/cgroup_css_events.o
func parseContainerCSS(containerID string) (map[string]uint64, error) {
msg := make(map[string]uint64)
cssList := cgroupListCssDataByKnode(containerID)
for _, css := range cssList {
msg[css.SubSys] = css.CSS
}
return msg, nil
}
const (
cgroupSubsysCount = 13
kubeletContainerIDKnodeMaxlen = 64
)
var (
// FIXME:
// 1. cgroupv supported only
// 2. cgroup dir name is containerID
kubeletContainerIDRegexp = regexp.MustCompile(`[^a-zA-Z0-9]+`)
cgroupv1SubSysName = []string{"cpu", "cpuacct", "cpuset", "memory", "blkio"}
cgroupv1NotifyCgroupFile = "cgroup.clone_children"
cgroupCssID2SubSysNameMap = map[int]string{}
cgroupCssMetaDataMap sync.Map
// avoid GC
_cgroupCssBpfInternal *bpf.BPF
)
func isValidKnodeName(name string) bool {
return !kubeletContainerIDRegexp.MatchString(name)
}
type containerCssMetaData struct {
CSS uint64
SubSys string
Cgroup uint64
CgroupRoot int32
CgroupLevel int32
ContainerID string
}
type containerCssPerfEvent struct {
Cgroup uint64
OpsType uint64
CgroupRoot int32
CgroupLevel int32
CSS [cgroupSubsysCount]uint64
KnodeName [kubeletContainerIDKnodeMaxlen + 2]byte
}
func cgroupListCssDataByKnode(containerID string) []*containerCssMetaData {
res := []*containerCssMetaData{}
cgroupCssMetaDataMap.Range(func(k, v any) bool {
if m, ok := v.(*containerCssMetaData); ok {
if m.ContainerID == containerID {
res = append(res, m)
}
}
return true
})
return res
}
func cgroupUpdateOrCreateCssData(data *containerCssPerfEvent) error {
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
if !isValidKnodeName(knodeName) {
return fmt.Errorf("knode name is not containterID")
}
for index, css := range data.CSS {
if css == 0 {
continue
}
if sysName, ok := cgroupCssID2SubSysNameMap[index]; ok {
m := &containerCssMetaData{
CSS: css,
Cgroup: data.Cgroup,
CgroupRoot: data.CgroupRoot,
CgroupLevel: data.CgroupLevel,
ContainerID: knodeName,
SubSys: sysName,
}
log.Debugf("update container css data: %+v", m)
cgroupCssMetaDataMap.Store(css, m)
}
}
return nil
}
func cgroupDeleteCssData(data *containerCssPerfEvent) error {
knodeName := strings.TrimRight(string(data.KnodeName[:]), "\x00")
if !isValidKnodeName(knodeName) {
return fmt.Errorf("knode name is not containterID")
}
for index, css := range data.CSS {
if css == 0 {
continue
}
if _, ok := cgroupCssID2SubSysNameMap[index]; ok {
m, loaded := cgroupCssMetaDataMap.LoadAndDelete(css)
if loaded {
log.Debugf("delete container css data: %+v", m)
}
}
}
return nil
}
func cgroupCssEventSync(ctx context.Context, reader bpf.PerfEventReader) {
go func() {
for {
select {
case <-ctx.Done():
return
default:
var data containerCssPerfEvent
if err := reader.ReadInto(&data); err != nil {
if !errors.Is(err, types.ErrExitByCancelCtx) {
log.Errorf("cgroup css sync read events: %v", err)
}
return
}
log.Debugf("sync container css data: %+v", data)
switch data.OpsType {
case 0: // mkdir cgroup
_ = cgroupUpdateOrCreateCssData(&data)
case 1: // rmdir cgroup
_ = cgroupDeleteCssData(&data)
default:
log.Errorf("css event opstype not supported: %+v", data)
}
}
}
}()
}
func cgroupCssNotify() {
rootSet := mapset.NewSet()
for _, subsys := range cgroupv1SubSysName {
root := cgrouputil.CgroupRootFsFilePath(subsys)
realRoot, err := filepath.EvalSymlinks(root)
if err != nil {
continue
}
if rootSet.Contains(realRoot) {
continue
}
rootSet.Add(realRoot)
if err := filepath.WalkDir(realRoot, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() || len(d.Name()) != kubeletContainerIDKnodeMaxlen {
return nil
}
notifyPath := filepath.Join(path, cgroupv1NotifyCgroupFile)
_, _ = os.ReadFile(notifyPath)
log.Debugf("read cgroup path: %s", notifyPath)
return filepath.SkipDir
}); err != nil {
var e *os.PathError
if errors.As(err, &e) && errors.Is(e.Err, syscall.ENOENT) {
continue
}
return
}
}
}
func cgroupInitSubSysIDs() error {
file, err := os.Open("/proc/cgroups")
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
// skip frst head
scanner.Scan()
ssid := 0
for scanner.Scan() {
arr := strings.SplitN(scanner.Text(), "\t", 2)
cgroupCssID2SubSysNameMap[ssid] = arr[0]
ssid++
}
return nil
}
func cgroupInitEventCssWithoutCleanup() error {
cssBpf, err := bpf.LoadBpf("cgroup_css_events.o", nil)
if err != nil {
return fmt.Errorf("LoadBpf: %w", err)
}
_cgroupCssBpfInternal = &cssBpf
childCtx := context.Background()
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
if err != nil {
log.Infof("AttachAndEventPipe: %v", err)
return err
}
cgroupCssEventSync(childCtx, reader)
return nil
}
func cgroupInitGatherCss() error {
cssBpf, err := bpf.LoadBpf("cgroup_css_gather.o", nil)
if err != nil {
return fmt.Errorf("LoadBpf: %w", err)
}
defer cssBpf.Close()
childCtx, cancel := context.WithCancel(context.Background())
defer cancel()
reader, err := cssBpf.AttachAndEventPipe(childCtx, "cgroup_perf_events", 8192)
if err != nil {
log.Infof("AttachAndEventPipe: %v", err)
return err
}
defer reader.Close()
cgroupCssEventSync(childCtx, reader)
time.Sleep(100 * time.Millisecond)
cgroupCssNotify()
// wait sync
time.Sleep(1 * time.Second)
return nil
}
func ContainerCgroupCssInit() error {
if err := cgroupInitSubSysIDs(); err != nil {
panic("only support cgroupv1 now")
}
if err := cgroupInitGatherCss(); err != nil {
return err
}
if err := cgroupInitEventCssWithoutCleanup(); err != nil {
return err
}
return nil
}